xref: /petsc/src/sys/utils/mpimesg.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1 
2 #include <petscsys.h>        /*I  "petscsys.h"  I*/
3 #include <petsc/private/mpiutils.h>
4 
5 /*@C
6   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive
7 
8   Collective
9 
10   Input Parameters:
11 + comm     - Communicator
12 . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
13              message from current node to ith node. Optionally NULL
14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
15              Optionally NULL.
16 
17   Output Parameters:
18 . nrecvs    - number of messages received
19 
20   Level: developer
21 
22   Notes:
23   With this info, the correct message lengths can be determined using
24   PetscGatherMessageLengths()
25 
26   Either iflags or ilengths should be provided.  If iflags is not
27   provided (NULL) it can be computed from ilengths. If iflags is
28   provided, ilengths is not required.
29 
30 .seealso: PetscGatherMessageLengths()
31 @*/
32 PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
33 {
34   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
35 
36   PetscFunctionBegin;
37   CHKERRMPI(MPI_Comm_size(comm,&size));
38   CHKERRMPI(MPI_Comm_rank(comm,&rank));
39 
40   CHKERRQ(PetscMalloc2(size,&recv_buf,size,&iflags_localm));
41 
42   /* If iflags not provided, compute iflags from ilengths */
43   if (!iflags) {
44     PetscCheckFalse(!ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
45     iflags_local = iflags_localm;
46     for (i=0; i<size; i++) {
47       if (ilengths[i]) iflags_local[i] = 1;
48       else iflags_local[i] = 0;
49     }
50   } else iflags_local = (PetscMPIInt*) iflags;
51 
52   /* Post an allreduce to determine the numer of messages the current node will receive */
53   CHKERRMPI(MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm));
54   *nrecvs = recv_buf[rank];
55 
56   CHKERRQ(PetscFree2(recv_buf,iflags_localm));
57   PetscFunctionReturn(0);
58 }
59 
60 /*@C
61   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
62   including (from-id,length) pairs for each message.
63 
64   Collective
65 
66   Input Parameters:
67 + comm      - Communicator
68 . nsends    - number of messages that are to be sent.
69 . nrecvs    - number of messages being received
70 - ilengths  - an array of integers of length sizeof(comm)
71               a non zero ilengths[i] represent a message to i of length ilengths[i]
72 
73   Output Parameters:
74 + onodes    - list of node-ids from which messages are expected
75 - olengths  - corresponding message lengths
76 
77   Level: developer
78 
79   Notes:
80   With this info, the correct MPI_Irecv() can be posted with the correct
81   from-id, with a buffer with the right amount of memory required.
82 
83   The calling function deallocates the memory in onodes and olengths
84 
85   To determine nrecvs, one can use PetscGatherNumberOfMessages()
86 
87 .seealso: PetscGatherNumberOfMessages()
88 @*/
89 PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
90 {
91   PetscMPIInt    size,rank,tag,i,j;
92   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
93   MPI_Status     *w_status = NULL;
94 
95   PetscFunctionBegin;
96   CHKERRMPI(MPI_Comm_size(comm,&size));
97   CHKERRMPI(MPI_Comm_rank(comm,&rank));
98   CHKERRQ(PetscCommGetNewTag(comm,&tag));
99 
100   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
101   CHKERRQ(PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status));
102   s_waits = r_waits+nrecvs;
103 
104   /* Post the Irecv to get the message length-info */
105   CHKERRQ(PetscMalloc1(nrecvs,olengths));
106   for (i=0; i<nrecvs; i++) {
107     CHKERRMPI(MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i));
108   }
109 
110   /* Post the Isends with the message length-info */
111   for (i=0,j=0; i<size; ++i) {
112     if (ilengths[i]) {
113       CHKERRMPI(MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j));
114       j++;
115     }
116   }
117 
118   /* Post waits on sends and receivs */
119   if (nrecvs+nsends) CHKERRMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status));
120 
121   /* Pack up the received data */
122   CHKERRQ(PetscMalloc1(nrecvs,onodes));
123   for (i=0; i<nrecvs; ++i) {
124     (*onodes)[i] = w_status[i].MPI_SOURCE;
125 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
126     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
127        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
128        does not put correct value in recv buffer. See also
129        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
130        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
131      */
132     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
133 #endif
134   }
135   CHKERRQ(PetscFree2(r_waits,w_status));
136   PetscFunctionReturn(0);
137 }
138 
139 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
140 PetscErrorCode  PetscGatherNumberOfMessages_Private(MPI_Comm comm,const PetscMPIInt iflags[],const PetscInt ilengths[],PetscMPIInt *nrecvs)
141 {
142   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
143 
144   PetscFunctionBegin;
145   CHKERRMPI(MPI_Comm_size(comm,&size));
146   CHKERRMPI(MPI_Comm_rank(comm,&rank));
147 
148   CHKERRQ(PetscMalloc2(size,&recv_buf,size,&iflags_localm));
149 
150   /* If iflags not provided, compute iflags from ilengths */
151   if (!iflags) {
152     PetscCheckFalse(!ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
153     iflags_local = iflags_localm;
154     for (i=0; i<size; i++) {
155       if (ilengths[i]) iflags_local[i] = 1;
156       else iflags_local[i] = 0;
157     }
158   } else iflags_local = (PetscMPIInt*) iflags;
159 
160   /* Post an allreduce to determine the numer of messages the current node will receive */
161   CHKERRMPI(MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm));
162   *nrecvs = recv_buf[rank];
163 
164   CHKERRQ(PetscFree2(recv_buf,iflags_localm));
165   PetscFunctionReturn(0);
166 }
167 
168 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
169 PetscErrorCode  PetscGatherMessageLengths_Private(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscInt ilengths[],PetscMPIInt **onodes,PetscInt **olengths)
170 {
171   PetscMPIInt    size,rank,tag,i,j;
172   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
173   MPI_Status     *w_status = NULL;
174 
175   PetscFunctionBegin;
176   CHKERRMPI(MPI_Comm_size(comm,&size));
177   CHKERRMPI(MPI_Comm_rank(comm,&rank));
178   CHKERRQ(PetscCommGetNewTag(comm,&tag));
179 
180   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
181   CHKERRQ(PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status));
182   s_waits = r_waits+nrecvs;
183 
184   /* Post the Irecv to get the message length-info */
185   CHKERRQ(PetscMalloc1(nrecvs,olengths));
186   for (i=0; i<nrecvs; i++) {
187     CHKERRMPI(MPI_Irecv((*olengths)+i,1,MPIU_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i));
188   }
189 
190   /* Post the Isends with the message length-info */
191   for (i=0,j=0; i<size; ++i) {
192     if (ilengths[i]) {
193       CHKERRMPI(MPI_Isend((void*)(ilengths+i),1,MPIU_INT,i,tag,comm,s_waits+j));
194       j++;
195     }
196   }
197 
198   /* Post waits on sends and receivs */
199   if (nrecvs+nsends) CHKERRMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status));
200 
201   /* Pack up the received data */
202   CHKERRQ(PetscMalloc1(nrecvs,onodes));
203   for (i=0; i<nrecvs; ++i) {
204     (*onodes)[i] = w_status[i].MPI_SOURCE;
205     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
206   }
207   CHKERRQ(PetscFree2(r_waits,w_status));
208   PetscFunctionReturn(0);
209 }
210 
211 /*@C
212   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
213   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
214   except it takes TWO ilenths and output TWO olengths.
215 
216   Collective
217 
218   Input Parameters:
219 + comm      - Communicator
220 . nsends    - number of messages that are to be sent.
221 . nrecvs    - number of messages being received
222 . ilengths1 - first array of integers of length sizeof(comm)
223 - ilengths2 - second array of integers of length sizeof(comm)
224 
225   Output Parameters:
226 + onodes    - list of node-ids from which messages are expected
227 . olengths1 - first corresponding message lengths
228 - olengths2 - second  message lengths
229 
230   Level: developer
231 
232   Notes:
233   With this info, the correct MPI_Irecv() can be posted with the correct
234   from-id, with a buffer with the right amount of memory required.
235 
236   The calling function deallocates the memory in onodes and olengths
237 
238   To determine nrecvs, one can use PetscGatherNumberOfMessages()
239 
240 .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
241 @*/
242 PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
243 {
244   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
245   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
246   MPI_Status     *w_status = NULL;
247 
248   PetscFunctionBegin;
249   CHKERRMPI(MPI_Comm_size(comm,&size));
250   CHKERRQ(PetscCommGetNewTag(comm,&tag));
251 
252   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
253   CHKERRQ(PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status));
254   s_waits = r_waits + nrecvs;
255 
256   /* Post the Irecv to get the message length-info */
257   CHKERRQ(PetscMalloc1(nrecvs+1,olengths1));
258   CHKERRQ(PetscMalloc1(nrecvs+1,olengths2));
259   for (i=0; i<nrecvs; i++) {
260     buf_j = buf_r + (2*i);
261     CHKERRMPI(MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i));
262   }
263 
264   /* Post the Isends with the message length-info */
265   for (i=0,j=0; i<size; ++i) {
266     if (ilengths1[i]) {
267       buf_j    = buf_s + (2*j);
268       buf_j[0] = *(ilengths1+i);
269       buf_j[1] = *(ilengths2+i);
270       CHKERRMPI(MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j));
271       j++;
272     }
273   }
274   PetscCheckFalse(j != nsends,PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d",j,nsends);
275 
276   /* Post waits on sends and receivs */
277   if (nrecvs+nsends) CHKERRMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status));
278 
279   /* Pack up the received data */
280   CHKERRQ(PetscMalloc1(nrecvs+1,onodes));
281   for (i=0; i<nrecvs; ++i) {
282     (*onodes)[i]    = w_status[i].MPI_SOURCE;
283     buf_j           = buf_r + (2*i);
284     (*olengths1)[i] = buf_j[0];
285     (*olengths2)[i] = buf_j[1];
286   }
287 
288   CHKERRQ(PetscFree4(r_waits,buf_r,buf_s,w_status));
289   PetscFunctionReturn(0);
290 }
291 
292 /*
293 
294   Allocate a buffer sufficient to hold messages of size specified in olengths.
295   And post Irecvs on these buffers using node info from onodes
296 
297  */
298 PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
299 {
300   PetscInt       **rbuf_t,i,len = 0;
301   MPI_Request    *r_waits_t;
302 
303   PetscFunctionBegin;
304   /* compute memory required for recv buffers */
305   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
306 
307   /* allocate memory for recv buffers */
308   CHKERRQ(PetscMalloc1(nrecvs+1,&rbuf_t));
309   CHKERRQ(PetscMalloc1(len,&rbuf_t[0]));
310   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
311 
312   /* Post the receives */
313   CHKERRQ(PetscMalloc1(nrecvs,&r_waits_t));
314   for (i=0; i<nrecvs; ++i) {
315     CHKERRMPI(MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i));
316   }
317 
318   *rbuf    = rbuf_t;
319   *r_waits = r_waits_t;
320   PetscFunctionReturn(0);
321 }
322 
323 PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
324 {
325   PetscMPIInt    i;
326   PetscScalar    **rbuf_t;
327   MPI_Request    *r_waits_t;
328   PetscInt       len = 0;
329 
330   PetscFunctionBegin;
331   /* compute memory required for recv buffers */
332   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
333 
334   /* allocate memory for recv buffers */
335   CHKERRQ(PetscMalloc1(nrecvs+1,&rbuf_t));
336   CHKERRQ(PetscMalloc1(len,&rbuf_t[0]));
337   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
338 
339   /* Post the receives */
340   CHKERRQ(PetscMalloc1(nrecvs,&r_waits_t));
341   for (i=0; i<nrecvs; ++i) {
342     CHKERRMPI(MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i));
343   }
344 
345   *rbuf    = rbuf_t;
346   *r_waits = r_waits_t;
347   PetscFunctionReturn(0);
348 }
349