xref: /petsc/src/sys/utils/mpimesg.c (revision 8af18dd833b18fc2df02377e75d76d273dc343a9)
1 
2 #include <petscsys.h>        /*I  "petscsys.h"  I*/
3 
4 /*@C
5   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive
6 
7   Collective
8 
9   Input Parameters:
10 + comm     - Communicator
11 . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
12              message from current node to ith node. Optionally NULL
13 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
14              Optionally NULL.
15 
16   Output Parameters:
17 . nrecvs    - number of messages received
18 
19   Level: developer
20 
21   Notes:
22   With this info, the correct message lengths can be determined using
23   PetscGatherMessageLengths()
24 
25   Either iflags or ilengths should be provided.  If iflags is not
26   provided (NULL) it can be computed from ilengths. If iflags is
27   provided, ilengths is not required.
28 
29 .seealso: PetscGatherMessageLengths()
30 @*/
31 PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
32 {
33   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;
34   PetscErrorCode ierr;
35 
36   PetscFunctionBegin;
37   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
38   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
39 
40   ierr = PetscMalloc2(size,&recv_buf,size,&iflags_localm);CHKERRQ(ierr);
41 
42   /* If iflags not provided, compute iflags from ilengths */
43   if (!iflags) {
44     if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
45     iflags_local = iflags_localm;
46     for (i=0; i<size; i++) {
47       if (ilengths[i]) iflags_local[i] = 1;
48       else iflags_local[i] = 0;
49     }
50   } else iflags_local = (PetscMPIInt*) iflags;
51 
52   /* Post an allreduce to determine the numer of messages the current node will receive */
53   ierr    = MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);CHKERRMPI(ierr);
54   *nrecvs = recv_buf[rank];
55 
56   ierr = PetscFree2(recv_buf,iflags_localm);CHKERRQ(ierr);
57   PetscFunctionReturn(0);
58 }
59 
60 /*@C
61   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
62   including (from-id,length) pairs for each message.
63 
64   Collective
65 
66   Input Parameters:
67 + comm      - Communicator
68 . nsends    - number of messages that are to be sent.
69 . nrecvs    - number of messages being received
70 - ilengths  - an array of integers of length sizeof(comm)
71               a non zero ilengths[i] represent a message to i of length ilengths[i]
72 
73   Output Parameters:
74 + onodes    - list of node-ids from which messages are expected
75 - olengths  - corresponding message lengths
76 
77   Level: developer
78 
79   Notes:
80   With this info, the correct MPI_Irecv() can be posted with the correct
81   from-id, with a buffer with the right amount of memory required.
82 
83   The calling function deallocates the memory in onodes and olengths
84 
85   To determine nrecvs, one can use PetscGatherNumberOfMessages()
86 
87 .seealso: PetscGatherNumberOfMessages()
88 @*/
89 PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
90 {
91   PetscErrorCode ierr;
92   PetscMPIInt    size,rank,tag,i,j;
93   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
94   MPI_Status     *w_status = NULL;
95 
96   PetscFunctionBegin;
97   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
98   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
99   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
100 
101   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
102   ierr    = PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);CHKERRQ(ierr);
103   s_waits = r_waits+nrecvs;
104 
105   /* Post the Irecv to get the message length-info */
106   ierr = PetscMalloc1(nrecvs,olengths);CHKERRQ(ierr);
107   for (i=0; i<nrecvs; i++) {
108     ierr = MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
109   }
110 
111   /* Post the Isends with the message length-info */
112   for (i=0,j=0; i<size; ++i) {
113     if (ilengths[i]) {
114       ierr = MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
115       j++;
116     }
117   }
118 
119   /* Post waits on sends and receivs */
120   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
121 
122   /* Pack up the received data */
123   ierr = PetscMalloc1(nrecvs,onodes);CHKERRQ(ierr);
124   for (i=0; i<nrecvs; ++i) {
125     (*onodes)[i] = w_status[i].MPI_SOURCE;
126 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
127     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
128        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
129        does not put correct value in recv buffer. See also
130        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
131        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
132      */
133     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
134 #endif
135   }
136   ierr = PetscFree2(r_waits,w_status);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
142   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
143   except it takes TWO ilenths and output TWO olengths.
144 
145   Collective
146 
147   Input Parameters:
148 + comm      - Communicator
149 . nsends    - number of messages that are to be sent.
150 . nrecvs    - number of messages being received
151 . ilengths1 - first array of integers of length sizeof(comm)
152 - ilengths2 - second array of integers of length sizeof(comm)
153 
154   Output Parameters:
155 + onodes    - list of node-ids from which messages are expected
156 . olengths1 - first corresponding message lengths
157 - olengths2 - second  message lengths
158 
159   Level: developer
160 
161   Notes:
162   With this info, the correct MPI_Irecv() can be posted with the correct
163   from-id, with a buffer with the right amount of memory required.
164 
165   The calling function deallocates the memory in onodes and olengths
166 
167   To determine nrecvs, one can use PetscGatherNumberOfMessages()
168 
169 .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
170 @*/
171 PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
172 {
173   PetscErrorCode ierr;
174   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
175   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
176   MPI_Status     *w_status = NULL;
177 
178   PetscFunctionBegin;
179   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
180   ierr = PetscCommGetNewTag(comm,&tag);CHKERRQ(ierr);
181 
182   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
183   ierr = PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);CHKERRQ(ierr);
184   s_waits = r_waits + nrecvs;
185 
186   /* Post the Irecv to get the message length-info */
187   ierr = PetscMalloc1(nrecvs+1,olengths1);CHKERRQ(ierr);
188   ierr = PetscMalloc1(nrecvs+1,olengths2);CHKERRQ(ierr);
189   for (i=0; i<nrecvs; i++) {
190     buf_j = buf_r + (2*i);
191     ierr  = MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);CHKERRMPI(ierr);
192   }
193 
194   /* Post the Isends with the message length-info */
195   for (i=0,j=0; i<size; ++i) {
196     if (ilengths1[i]) {
197       buf_j    = buf_s + (2*j);
198       buf_j[0] = *(ilengths1+i);
199       buf_j[1] = *(ilengths2+i);
200       ierr = MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);CHKERRMPI(ierr);
201       j++;
202     }
203   }
204   if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);
205 
206   /* Post waits on sends and receivs */
207   if (nrecvs+nsends) {ierr = MPI_Waitall(nrecvs+nsends,r_waits,w_status);CHKERRMPI(ierr);}
208 
209   /* Pack up the received data */
210   ierr = PetscMalloc1(nrecvs+1,onodes);CHKERRQ(ierr);
211   for (i=0; i<nrecvs; ++i) {
212     (*onodes)[i]    = w_status[i].MPI_SOURCE;
213     buf_j           = buf_r + (2*i);
214     (*olengths1)[i] = buf_j[0];
215     (*olengths2)[i] = buf_j[1];
216   }
217 
218   ierr = PetscFree4(r_waits,buf_r,buf_s,w_status);CHKERRQ(ierr);
219   PetscFunctionReturn(0);
220 }
221 
222 /*
223 
224   Allocate a buffer sufficient to hold messages of size specified in olengths.
225   And post Irecvs on these buffers using node info from onodes
226 
227  */
228 PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
229 {
230   PetscErrorCode ierr;
231   PetscInt       **rbuf_t,i,len = 0;
232   MPI_Request    *r_waits_t;
233 
234   PetscFunctionBegin;
235   /* compute memory required for recv buffers */
236   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
237 
238   /* allocate memory for recv buffers */
239   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
240   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
241   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
242 
243   /* Post the receives */
244   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
245   for (i=0; i<nrecvs; ++i) {
246     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
247   }
248 
249   *rbuf    = rbuf_t;
250   *r_waits = r_waits_t;
251   PetscFunctionReturn(0);
252 }
253 
254 PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
255 {
256   PetscErrorCode ierr;
257   PetscMPIInt    i;
258   PetscScalar    **rbuf_t;
259   MPI_Request    *r_waits_t;
260   PetscInt       len = 0;
261 
262   PetscFunctionBegin;
263   /* compute memory required for recv buffers */
264   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */
265 
266   /* allocate memory for recv buffers */
267   ierr = PetscMalloc1(nrecvs+1,&rbuf_t);CHKERRQ(ierr);
268   ierr = PetscMalloc1(len,&rbuf_t[0]);CHKERRQ(ierr);
269   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];
270 
271   /* Post the receives */
272   ierr = PetscMalloc1(nrecvs,&r_waits_t);CHKERRQ(ierr);
273   for (i=0; i<nrecvs; ++i) {
274     ierr = MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);CHKERRMPI(ierr);
275   }
276 
277   *rbuf    = rbuf_t;
278   *r_waits = r_waits_t;
279   PetscFunctionReturn(0);
280 }
281