xref: /petsc/src/sys/utils/mpimesg.c (revision dc2eb70d7d162c29244b12c92d19653e4c9defec)
1 
2 #include <petscsys.h>        /*I  "petscsys.h"  I*/
3 #include <petsc/private/mpiutils.h>
4 
5 /*@C
6   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive
7 
8   Collective
9 
10   Input Parameters:
11 + comm     - Communicator
12 . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
13              message from current node to ith node. Optionally NULL
14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
15              Optionally NULL.
16 
17   Output Parameters:
18 . nrecvs    - number of messages received
19 
20   Level: developer
21 
22   Notes:
23   With this info, the correct message lengths can be determined using
24   PetscGatherMessageLengths()
25 
26   Either iflags or ilengths should be provided.  If iflags is not
27   provided (NULL) it can be computed from ilengths. If iflags is
28   provided, ilengths is not required.
29 
30 .seealso: PetscGatherMessageLengths()
31 @*/
32 PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
33 {
34   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
35   PetscErrorCode ierr;
36 
37   PetscFunctionBegin;
38   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
39   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
40 
41   ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr);
42 
43   /* If iflags not provided, compute iflags from ilengths */
44   if (!iflags) {
45     PetscCheckFalse(!ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
46     iflags_local = iflags_localm;
47     for (i=0; i<size; i++) {
48       if (ilengths[i]) iflags_local[i] = 1;
49       else iflags_local[i] = 0;
50     }
51   } else iflags_local = (PetscMPIInt*) iflags;
52 
53   /* Post an allreduce to determine the numer of messages the current node will receive */
54   ierr    = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr);
55   *nrecvs = recv_buf[rank];
56 
57   ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 
61 /*@C
62   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
63   including (from-id,length) pairs for each message.
64 
65   Collective
66 
67   Input Parameters:
68 + comm      - Communicator
69 . nsends    - number of messages that are to be sent.
70 . nrecvs    - number of messages being received
71 - ilengths  - an array of integers of length sizeof(comm)
72               a non zero ilengths[i] represent a message to i of length ilengths[i]
73 
74   Output Parameters:
75 + onodes    - list of node-ids from which messages are expected
76 - olengths  - corresponding message lengths
77 
78   Level: developer
79 
80   Notes:
81   With this info, the correct MPI_Irecv() can be posted with the correct
82   from-id, with a buffer with the right amount of memory required.
83 
84   The calling function deallocates the memory in onodes and olengths
85 
86   To determine nrecvs, one can use PetscGatherNumberOfMessages()
87 
88 .seealso: PetscGatherNumberOfMessages()
89 @*/
90 PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
91 {
92   PetscErrorCode ierr;
93   PetscMPIInt    size,rank,tag,i,j;
94   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
95   MPI_Status     *w_status = NULL;
96 
97   PetscFunctionBegin;
98   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
99   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
100   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
101 
102   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
103   ierr    = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr);
104   s_waits = r_waits+nrecvs;
105 
106   /* Post the Irecv to get the message length-info */
107   ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr);
108   for (i=0; i<nrecvs; i++) {
109     ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
110   }
111 
112   /* Post the Isends with the message length-info */
113   for (i=0,j=0; i<size; ++i) {
114     if (ilengths[i]) {
115       ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
116       j++;
117     }
118   }
119 
120   /* Post waits on sends and receivs */
121   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
122 
123   /* Pack up the received data */
124   ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr);
125   for (i=0; i<nrecvs; ++i) {
126     (*onodes)[i] = w_status[i].MPI_SOURCE;
127 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
128     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
129        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
130        does not put correct value in recv buffer. See also
131        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
132        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
133      */
134     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
135 #endif
136   }
137   ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
142 PetscErrorCode  PetscGatherNumberOfMessages_Private(MPI_Comm comm,const PetscMPIInt iflags[],const PetscInt ilengths[],PetscMPIInt *nrecvs)
143 {
144   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
145   PetscErrorCode ierr;
146 
147   PetscFunctionBegin;
148   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
149   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
150 
151   ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr);
152 
153   /* If iflags not provided, compute iflags from ilengths */
154   if (!iflags) {
155     PetscCheckFalse(!ilengths,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
156     iflags_local = iflags_localm;
157     for (i=0; i<size; i++) {
158       if (ilengths[i]) iflags_local[i] = 1;
159       else iflags_local[i] = 0;
160     }
161   } else iflags_local = (PetscMPIInt*) iflags;
162 
163   /* Post an allreduce to determine the numer of messages the current node will receive */
164   ierr    = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr);
165   *nrecvs = recv_buf[rank];
166 
167   ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 
171 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
172 PetscErrorCode  PetscGatherMessageLengths_Private(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscInt ilengths[],PetscMPIInt **onodes,PetscInt **olengths)
173 {
174   PetscErrorCode ierr;
175   PetscMPIInt    size,rank,tag,i,j;
176   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
177   MPI_Status     *w_status = NULL;
178 
179   PetscFunctionBegin;
180   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
181   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
182   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
183 
184   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
185   ierr    = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr);
186   s_waits = r_waits+nrecvs;
187 
188   /* Post the Irecv to get the message length-info */
189   ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr);
190   for (i=0; i<nrecvs; i++) {
191     ierr = MPI_Irecv((*olengths)+i,1,MPIU_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
192   }
193 
194   /* Post the Isends with the message length-info */
195   for (i=0,j=0; i<size; ++i) {
196     if (ilengths[i]) {
197       ierr = MPI_Isend((void*)(ilengths+i),1,MPIU_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
198       j++;
199     }
200   }
201 
202   /* Post waits on sends and receivs */
203   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
204 
205   /* Pack up the received data */
206   ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr);
207   for (i=0; i<nrecvs; ++i) {
208     (*onodes)[i] = w_status[i].MPI_SOURCE;
209     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
210   }
211   ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr);
212   PetscFunctionReturn(0);
213 }
214 
215 /*@C
216   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
217   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
218   except it takes TWO ilenths and output TWO olengths.
219 
220   Collective
221 
222   Input Parameters:
223 + comm      - Communicator
224 . nsends    - number of messages that are to be sent.
225 . nrecvs    - number of messages being received
226 . ilengths1 - first array of integers of length sizeof(comm)
227 - ilengths2 - second array of integers of length sizeof(comm)
228 
229   Output Parameters:
230 + onodes    - list of node-ids from which messages are expected
231 . olengths1 - first corresponding message lengths
232 - olengths2 - second  message lengths
233 
234   Level: developer
235 
236   Notes:
237   With this info, the correct MPI_Irecv() can be posted with the correct
238   from-id, with a buffer with the right amount of memory required.
239 
240   The calling function deallocates the memory in onodes and olengths
241 
242   To determine nrecvs, one can use PetscGatherNumberOfMessages()
243 
244 .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
245 @*/
246 PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
247 {
248   PetscErrorCode ierr;
249   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
250   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
251   MPI_Status     *w_status = NULL;
252 
253   PetscFunctionBegin;
254   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
255   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
256 
257   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
258   ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr);
259   s_waits = r_waits + nrecvs;
260 
261   /* Post the Irecv to get the message length-info */
262   ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr);
263   ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr);
264   for (i=0; i<nrecvs; i++) {
265     buf_j = buf_r + (2*i);
266     ierr  = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
267   }
268 
269   /* Post the Isends with the message length-info */
270   for (i=0,j=0; i<size; ++i) {
271     if (ilengths1[i]) {
272       buf_j    = buf_s + (2*j);
273       buf_j[0] = *(ilengths1+i);
274       buf_j[1] = *(ilengths2+i);
275       ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
276       j++;
277     }
278   }
279   PetscCheckFalse(j != nsends,PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d",j,nsends);
280 
281   /* Post waits on sends and receivs */
282   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
283 
284   /* Pack up the received data */
285   ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr);
286   for (i=0; i<nrecvs; ++i) {
287     (*onodes)[i]    = w_status[i].MPI_SOURCE;
288     buf_j           = buf_r + (2*i);
289     (*olengths1)[i] = buf_j[0];
290     (*olengths2)[i] = buf_j[1];
291   }
292 
293   ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr);
294   PetscFunctionReturn(0);
295 }
296 
297 /*
298 
299   Allocate a buffer sufficient to hold messages of size specified in olengths.
300   And post Irecvs on these buffers using node info from onodes
301 
302  */
303 PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
304 {
305   PetscErrorCode ierr;
306   PetscInt       **rbuf_t,i,len = 0;
307   MPI_Request    *r_waits_t;
308 
309   PetscFunctionBegin;
310   /* compute memory required for recv buffers */
311   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
312 
313   /* allocate memory for recv buffers */
314   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
315   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
316   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
317 
318   /* Post the receives */
319   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
320   for (i=0; i<nrecvs; ++i) {
321     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
322   }
323 
324   *rbuf    = rbuf_t;
325   *r_waits = r_waits_t;
326   PetscFunctionReturn(0);
327 }
328 
329 PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
330 {
331   PetscErrorCode ierr;
332   PetscMPIInt    i;
333   PetscScalar    **rbuf_t;
334   MPI_Request    *r_waits_t;
335   PetscInt       len = 0;
336 
337   PetscFunctionBegin;
338   /* compute memory required for recv buffers */
339   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
340 
341   /* allocate memory for recv buffers */
342   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
343   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
344   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
345 
346   /* Post the receives */
347   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
348   for (i=0; i<nrecvs; ++i) {
349     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
350   }
351 
352   *rbuf    = rbuf_t;
353   *r_waits = r_waits_t;
354   PetscFunctionReturn(0);
355 }
356