xref: /petsc/src/sys/utils/mpimesg.c (revision 030f984af8d8bb4c203755d35bded3c05b3d83ce)
1 
2 #include <petscsys.h>        /*I  "petscsys.h"  I*/
3 
4 /*@C
5   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive
6 
7   Collective
8 
9   Input Parameters:
10 + comm     - Communicator
11 . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
12              message from current node to ith node. Optionally NULL
13 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
14              Optionally NULL.
15 
16   Output Parameters:
17 . nrecvs    - number of messages received
18 
19   Level: developer
20 
21   Notes:
22   With this info, the correct message lengths can be determined using
23   PetscGatherMessageLengths()
24 
25   Either iflags or ilengths should be provided.  If iflags is not
26   provided (NULL) it can be computed from ilengths. If iflags is
27   provided, ilengths is not required.
28 
29 .seealso: PetscGatherMessageLengths()
30 @*/
31 PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
32 {
33   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
34   PetscErrorCode ierr;
35 
36   PetscFunctionBegin;
37   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
38   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
39 
40   ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr);
41 
42   /* If iflags not provided, compute iflags from ilengths */
43   if (!iflags) {
44     if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
45     iflags_local = iflags_localm;
46     for (i=0; i<size; i++) {
47       if (ilengths[i]) iflags_local[i] = 1;
48       else iflags_local[i] = 0;
49     }
50   } else iflags_local = (PetscMPIInt*) iflags;
51 
52   /* Post an allreduce to determine the numer of messages the current node will receive */
53   ierr    = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr);
54   *nrecvs = recv_buf[rank];
55 
56   ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr);
57   PetscFunctionReturn(0);
58 }
59 
60 /*@C
61   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
62   including (from-id,length) pairs for each message.
63 
64   Collective
65 
66   Input Parameters:
67 + comm      - Communicator
68 . nsends    - number of messages that are to be sent.
69 . nrecvs    - number of messages being received
70 - ilengths  - an array of integers of length sizeof(comm)
71               a non zero ilengths[i] represent a message to i of length ilengths[i]
72 
73   Output Parameters:
74 + onodes    - list of node-ids from which messages are expected
75 - olengths  - corresponding message lengths
76 
77   Level: developer
78 
79   Notes:
80   With this info, the correct MPI_Irecv() can be posted with the correct
81   from-id, with a buffer with the right amount of memory required.
82 
83   The calling function deallocates the memory in onodes and olengths
84 
85   To determine nrecvs, one can use PetscGatherNumberOfMessages()
86 
87 .seealso: PetscGatherNumberOfMessages()
88 @*/
89 PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
90 {
91   PetscErrorCode ierr;
92   PetscMPIInt    size,rank,tag,i,j;
93   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
94   MPI_Status     *w_status = NULL;
95 
96   PetscFunctionBegin;
97   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
98   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
99   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
100 
101   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
102   ierr    = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr);
103   s_waits = r_waits+nrecvs;
104 
105   /* Post the Irecv to get the message length-info */
106   ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr);
107   for (i=0; i<nrecvs; i++) {
108     ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
109   }
110 
111   /* Post the Isends with the message length-info */
112   for (i=0,j=0; i<size; ++i) {
113     if (ilengths[i]) {
114       ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
115       j++;
116     }
117   }
118 
119   /* Post waits on sends and receivs */
120   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
121 
122   /* Pack up the received data */
123   ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr);
124   for (i=0; i<nrecvs; ++i) {
125     (*onodes)[i] = w_status[i].MPI_SOURCE;
126 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
127     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
128        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
129        does not put correct value in recv buffer. See also
130        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
131        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
132      */
133     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
134 #endif
135   }
136   ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
142   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
143   except it takes TWO ilenths and output TWO olengths.
144 
145   Collective
146 
147   Input Parameters:
148 + comm      - Communicator
149 . nsends    - number of messages that are to be sent.
150 . nrecvs    - number of messages being received
151 - ilengths1, ilengths2 - array of integers of length sizeof(comm)
152               a non zero ilengths[i] represent a message to i of length ilengths[i]
153 
154   Output Parameters:
155 + onodes    - list of node-ids from which messages are expected
156 - olengths1, olengths2 - corresponding message lengths
157 
158   Level: developer
159 
160   Notes:
161   With this info, the correct MPI_Irecv() can be posted with the correct
162   from-id, with a buffer with the right amount of memory required.
163 
164   The calling function deallocates the memory in onodes and olengths
165 
166   To determine nrecvs, one can use PetscGatherNumberOfMessages()
167 
168 .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
169 @*/
170 PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
171 {
172   PetscErrorCode ierr;
173   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
174   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
175   MPI_Status     *w_status = NULL;
176 
177   PetscFunctionBegin;
178   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
179   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
180 
181   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
182   ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr);
183   s_waits = r_waits + nrecvs;
184 
185   /* Post the Irecv to get the message length-info */
186   ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr);
187   ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr);
188   for (i=0; i<nrecvs; i++) {
189     buf_j = buf_r + (2*i);
190     ierr  = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
191   }
192 
193   /* Post the Isends with the message length-info */
194   for (i=0,j=0; i<size; ++i) {
195     if (ilengths1[i]) {
196       buf_j    = buf_s + (2*j);
197       buf_j[0] = *(ilengths1+i);
198       buf_j[1] = *(ilengths2+i);
199       ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
200       j++;
201     }
202   }
203   if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);
204 
205   /* Post waits on sends and receivs */
206   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
207 
208   /* Pack up the received data */
209   ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr);
210   for (i=0; i<nrecvs; ++i) {
211     (*onodes)[i]    = w_status[i].MPI_SOURCE;
212     buf_j           = buf_r + (2*i);
213     (*olengths1)[i] = buf_j[0];
214     (*olengths2)[i] = buf_j[1];
215   }
216 
217   ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr);
218   PetscFunctionReturn(0);
219 }
220 
221 /*
222 
223   Allocate a bufffer sufficient to hold messages of size specified in olengths.
224   And post Irecvs on these buffers using node info from onodes
225 
226  */
227 PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
228 {
229   PetscErrorCode ierr;
230   PetscInt       **rbuf_t,i,len = 0;
231   MPI_Request    *r_waits_t;
232 
233   PetscFunctionBegin;
234   /* compute memory required for recv buffers */
235   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
236 
237   /* allocate memory for recv buffers */
238   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
239   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
240   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
241 
242   /* Post the receives */
243   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
244   for (i=0; i<nrecvs; ++i) {
245     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
246   }
247 
248   *rbuf    = rbuf_t;
249   *r_waits = r_waits_t;
250   PetscFunctionReturn(0);
251 }
252 
253 PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
254 {
255   PetscErrorCode ierr;
256   PetscMPIInt    i;
257   PetscScalar    **rbuf_t;
258   MPI_Request    *r_waits_t;
259   PetscInt       len = 0;
260 
261   PetscFunctionBegin;
262   /* compute memory required for recv buffers */
263   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
264 
265   /* allocate memory for recv buffers */
266   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
267   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
268   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
269 
270   /* Post the receives */
271   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
272   for (i=0; i<nrecvs; ++i) {
273     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
274   }
275 
276   *rbuf    = rbuf_t;
277   *r_waits = r_waits_t;
278   PetscFunctionReturn(0);
279 }
280