xref: /petsc/src/sys/utils/mpimesg.c (revision f97672e55eacc8688507b9471cd7ec2664d7f203)
1 
2 #include <petscsys.h>        /*I  "petscsys.h"  I*/
3 #include <petsc/private/mpiutils.h>
4 
5 /*@C
6   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive
7 
8   Collective
9 
10   Input Parameters:
11 + comm     - Communicator
12 . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
13              message from current node to ith node. Optionally NULL
14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
15              Optionally NULL.
16 
17   Output Parameters:
18 . nrecvs    - number of messages received
19 
20   Level: developer
21 
22   Notes:
23   With this info, the correct message lengths can be determined using
24   PetscGatherMessageLengths()
25 
26   Either iflags or ilengths should be provided.  If iflags is not
27   provided (NULL) it can be computed from ilengths. If iflags is
28   provided, ilengths is not required.
29 
30 .seealso: `PetscGatherMessageLengths()`
31 @*/
32 PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
33 {
34   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
35 
36   PetscFunctionBegin;
37   PetscCallMPI(MPI_Comm_size(comm,&size));
38   PetscCallMPI(MPI_Comm_rank(comm,&rank));
39 
40   PetscCall(PetscMalloc2(size,&recv_buf,size,&iflags_localm));
41 
42   /* If iflags not provided, compute iflags from ilengths */
43   if (!iflags) {
44     PetscCheck(ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
45     iflags_local = iflags_localm;
46     for (i=0; i<size; i++) {
47       if (ilengths[i]) iflags_local[i] = 1;
48       else iflags_local[i] = 0;
49     }
50   } else iflags_local = (PetscMPIInt*) iflags;
51 
52   /* Post an allreduce to determine the numer of messages the current node will receive */
53   PetscCall(MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm));
54   *nrecvs = recv_buf[rank];
55 
56   PetscCall(PetscFree2(recv_buf,iflags_localm));
57   PetscFunctionReturn(0);
58 }
59 
60 /*@C
61   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
62   including (from-id,length) pairs for each message.
63 
64   Collective
65 
66   Input Parameters:
67 + comm      - Communicator
68 . nsends    - number of messages that are to be sent.
69 . nrecvs    - number of messages being received
70 - ilengths  - an array of integers of length sizeof(comm)
71               a non zero ilengths[i] represent a message to i of length ilengths[i]
72 
73   Output Parameters:
74 + onodes    - list of node-ids from which messages are expected
75 - olengths  - corresponding message lengths
76 
77   Level: developer
78 
79   Notes:
80   With this info, the correct MPI_Irecv() can be posted with the correct
81   from-id, with a buffer with the right amount of memory required.
82 
83   The calling function deallocates the memory in onodes and olengths
84 
85   To determine nrecvs, one can use PetscGatherNumberOfMessages()
86 
87 .seealso: `PetscGatherNumberOfMessages()`
88 @*/
89 PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
90 {
91   PetscMPIInt    size,rank,tag,i,j;
92   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
93   MPI_Status     *w_status = NULL;
94 
95   PetscFunctionBegin;
96   PetscCallMPI(MPI_Comm_size(comm,&size));
97   PetscCallMPI(MPI_Comm_rank(comm,&rank));
98   PetscCall(PetscCommGetNewTag(comm,&tag));
99 
100   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
101   PetscCall(PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status));
102   s_waits = r_waits+nrecvs;
103 
104   /* Post the Irecv to get the message length-info */
105   PetscCall(PetscMalloc1(nrecvs,olengths));
106   for (i=0; i<nrecvs; i++) {
107     PetscCallMPI(MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i));
108   }
109 
110   /* Post the Isends with the message length-info */
111   for (i=0,j=0; i<size; ++i) {
112     if (ilengths[i]) {
113       PetscCallMPI(MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j));
114       j++;
115     }
116   }
117 
118   /* Post waits on sends and receivs */
119   if (nrecvs+nsends) PetscCallMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status));
120 
121   /* Pack up the received data */
122   PetscCall(PetscMalloc1(nrecvs,onodes));
123   for (i=0; i<nrecvs; ++i) {
124     (*onodes)[i] = w_status[i].MPI_SOURCE;
125 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
126     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
127        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
128        does not put correct value in recv buffer. See also
129        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
130        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
131      */
132     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
133 #endif
134   }
135   PetscCall(PetscFree2(r_waits,w_status));
136   PetscFunctionReturn(0);
137 }
138 
139 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
140 PetscErrorCode  PetscGatherNumberOfMessages_Private(MPI_Comm comm,const PetscMPIInt iflags[],const PetscInt ilengths[],PetscMPIInt *nrecvs)
141 {
142   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
143 
144   PetscFunctionBegin;
145   PetscCallMPI(MPI_Comm_size(comm,&size));
146   PetscCallMPI(MPI_Comm_rank(comm,&rank));
147 
148   PetscCall(PetscMalloc2(size,&recv_buf,size,&iflags_localm));
149 
150   /* If iflags not provided, compute iflags from ilengths */
151   if (!iflags) {
152     PetscCheck(ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
153     iflags_local = iflags_localm;
154     for (i=0; i<size; i++) {
155       if (ilengths[i]) iflags_local[i] = 1;
156       else iflags_local[i] = 0;
157     }
158   } else iflags_local = (PetscMPIInt*) iflags;
159 
160   /* Post an allreduce to determine the numer of messages the current node will receive */
161   PetscCall(MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm));
162   *nrecvs = recv_buf[rank];
163 
164   PetscCall(PetscFree2(recv_buf,iflags_localm));
165   PetscFunctionReturn(0);
166 }
167 
168 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
169 PetscErrorCode  PetscGatherMessageLengths_Private(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscInt ilengths[],PetscMPIInt **onodes,PetscInt **olengths)
170 {
171   PetscMPIInt    size,rank,tag,i,j;
172   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
173   MPI_Status     *w_status = NULL;
174 
175   PetscFunctionBegin;
176   PetscCallMPI(MPI_Comm_size(comm,&size));
177   PetscCallMPI(MPI_Comm_rank(comm,&rank));
178   PetscCall(PetscCommGetNewTag(comm,&tag));
179 
180   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
181   PetscCall(PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status));
182   s_waits = r_waits+nrecvs;
183 
184   /* Post the Irecv to get the message length-info */
185   PetscCall(PetscMalloc1(nrecvs,olengths));
186   for (i=0; i<nrecvs; i++) {
187     PetscCallMPI(MPI_Irecv((*olengths)+i,1,MPIU_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i));
188   }
189 
190   /* Post the Isends with the message length-info */
191   for (i=0,j=0; i<size; ++i) {
192     if (ilengths[i]) {
193       PetscCallMPI(MPI_Isend((void*)(ilengths+i),1,MPIU_INT,i,tag,comm,s_waits+j));
194       j++;
195     }
196   }
197 
198   /* Post waits on sends and receivs */
199   if (nrecvs+nsends) PetscCallMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status));
200 
201   /* Pack up the received data */
202   PetscCall(PetscMalloc1(nrecvs,onodes));
203   for (i=0; i<nrecvs; ++i) {
204     (*onodes)[i] = w_status[i].MPI_SOURCE;
205     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
206   }
207   PetscCall(PetscFree2(r_waits,w_status));
208   PetscFunctionReturn(0);
209 }
210 
211 /*@C
212   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
213   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
214   except it takes TWO ilenths and output TWO olengths.
215 
216   Collective
217 
218   Input Parameters:
219 + comm      - Communicator
220 . nsends    - number of messages that are to be sent.
221 . nrecvs    - number of messages being received
222 . ilengths1 - first array of integers of length sizeof(comm)
223 - ilengths2 - second array of integers of length sizeof(comm)
224 
225   Output Parameters:
226 + onodes    - list of node-ids from which messages are expected
227 . olengths1 - first corresponding message lengths
228 - olengths2 - second  message lengths
229 
230   Level: developer
231 
232   Notes:
233   With this info, the correct MPI_Irecv() can be posted with the correct
234   from-id, with a buffer with the right amount of memory required.
235 
236   The calling function deallocates the memory in onodes and olengths
237 
238   To determine nrecvs, one can use PetscGatherNumberOfMessages()
239 
240 .seealso: `PetscGatherMessageLengths()` `and` `PetscGatherNumberOfMessages()`
241 @*/
242 PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
243 {
244   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
245   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
246   MPI_Status     *w_status = NULL;
247 
248   PetscFunctionBegin;
249   PetscCallMPI(MPI_Comm_size(comm,&size));
250   PetscCall(PetscCommGetNewTag(comm,&tag));
251 
252   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
253   PetscCall(PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status));
254   s_waits = r_waits + nrecvs;
255 
256   /* Post the Irecv to get the message length-info */
257   PetscCall(PetscMalloc1(nrecvs+1,olengths1));
258   PetscCall(PetscMalloc1(nrecvs+1,olengths2));
259   for (i=0; i<nrecvs; i++) {
260     buf_j = buf_r + (2*i);
261     PetscCallMPI(MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i));
262   }
263 
264   /* Post the Isends with the message length-info */
265   for (i=0,j=0; i<size; ++i) {
266     if (ilengths1[i]) {
267       buf_j    = buf_s + (2*j);
268       buf_j[0] = *(ilengths1+i);
269       buf_j[1] = *(ilengths2+i);
270       PetscCallMPI(MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j));
271       j++;
272     }
273   }
274   PetscCheck(j == nsends,PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d",j,nsends);
275 
276   /* Post waits on sends and receivs */
277   if (nrecvs+nsends) PetscCallMPI(MPI_Waitall(nrecvs+nsends,r_waits,w_status));
278 
279   /* Pack up the received data */
280   PetscCall(PetscMalloc1(nrecvs+1,onodes));
281   for (i=0; i<nrecvs; ++i) {
282     (*onodes)[i]    = w_status[i].MPI_SOURCE;
283     buf_j           = buf_r + (2*i);
284     (*olengths1)[i] = buf_j[0];
285     (*olengths2)[i] = buf_j[1];
286   }
287 
288   PetscCall(PetscFree4(r_waits,buf_r,buf_s,w_status));
289   PetscFunctionReturn(0);
290 }
291 
292 /*
293 
294   Allocate a buffer sufficient to hold messages of size specified in olengths.
295   And post Irecvs on these buffers using node info from onodes
296 
297  */
298 PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
299 {
300   PetscInt       **rbuf_t,i,len = 0;
301   MPI_Request    *r_waits_t;
302 
303   PetscFunctionBegin;
304   /* compute memory required for recv buffers */
305   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
306 
307   /* allocate memory for recv buffers */
308   PetscCall(PetscMalloc1(nrecvs+1,&rbuf_t));
309   PetscCall(PetscMalloc1(len,&rbuf_t[0]));
310   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
311 
312   /* Post the receives */
313   PetscCall(PetscMalloc1(nrecvs,&r_waits_t));
314   for (i=0; i<nrecvs; ++i) {
315     PetscCallMPI(MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i));
316   }
317 
318   *rbuf    = rbuf_t;
319   *r_waits = r_waits_t;
320   PetscFunctionReturn(0);
321 }
322 
323 PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
324 {
325   PetscMPIInt    i;
326   PetscScalar    **rbuf_t;
327   MPI_Request    *r_waits_t;
328   PetscInt       len = 0;
329 
330   PetscFunctionBegin;
331   /* compute memory required for recv buffers */
332   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
333 
334   /* allocate memory for recv buffers */
335   PetscCall(PetscMalloc1(nrecvs+1,&rbuf_t));
336   PetscCall(PetscMalloc1(len,&rbuf_t[0]));
337   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
338 
339   /* Post the receives */
340   PetscCall(PetscMalloc1(nrecvs,&r_waits_t));
341   for (i=0; i<nrecvs; ++i) {
342     PetscCallMPI(MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i));
343   }
344 
345   *rbuf    = rbuf_t;
346   *r_waits = r_waits_t;
347   PetscFunctionReturn(0);
348 }
349