xref: /petsc/src/sys/utils/mpimesg.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1 
2 #include <petscsys.h> /*I  "petscsys.h"  I*/
3 #include <petsc/private/mpiutils.h>
4 
5 /*@C
6   PetscGatherNumberOfMessages -  Computes the number of messages an MPI rank expects to receive during a neighbor communication
7 
8   Collective
9 
10   Input Parameters:
11 + comm     - Communicator
12 . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
13              message from current node to ith node. Optionally NULL
14 - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
15              Optionally NULL.
16 
17   Output Parameters:
18 . nrecvs    - number of messages received
19 
20   Level: developer
21 
22   Notes:
23   With this info, the correct message lengths can be determined using
24   `PetscGatherMessageLengths()`
25 
26   Either iflags or ilengths should be provided.  If iflags is not
27   provided (NULL) it can be computed from ilengths. If iflags is
28   provided, ilengths is not required.
29 
30 .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
31 @*/
32 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs)
33 {
34   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL;
35 
36   PetscFunctionBegin;
37   PetscCallMPI(MPI_Comm_size(comm, &size));
38   PetscCallMPI(MPI_Comm_rank(comm, &rank));
39 
40   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));
41 
42   /* If iflags not provided, compute iflags from ilengths */
43   if (!iflags) {
44     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
45     iflags_local = iflags_localm;
46     for (i = 0; i < size; i++) {
47       if (ilengths[i]) iflags_local[i] = 1;
48       else iflags_local[i] = 0;
49     }
50   } else iflags_local = (PetscMPIInt *)iflags;
51 
52   /* Post an allreduce to determine the numer of messages the current node will receive */
53   PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
54   *nrecvs = recv_buf[rank];
55 
56   PetscCall(PetscFree2(recv_buf, iflags_localm));
57   PetscFunctionReturn(0);
58 }
59 
60 /*@C
61   PetscGatherMessageLengths - Computes infomation about messages that an MPI rank will receive,
62   including (from-id,length) pairs for each message.
63 
64   Collective
65 
66   Input Parameters:
67 + comm      - Communicator
68 . nsends    - number of messages that are to be sent.
69 . nrecvs    - number of messages being received
70 - ilengths  - an array of integers of length sizeof(comm)
71               a non zero ilengths[i] represent a message to i of length ilengths[i]
72 
73   Output Parameters:
74 + onodes    - list of node-ids from which messages are expected
75 - olengths  - corresponding message lengths
76 
77   Level: developer
78 
79   Notes:
80   With this info, the correct `MPI_Irecv()` can be posted with the correct
81   from-id, with a buffer with the right amount of memory required.
82 
83   The calling function deallocates the memory in onodes and olengths
84 
85   To determine nrecvs, one can use `PetscGatherNumberOfMessages()`
86 
87 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
88 @*/
89 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths)
90 {
91   PetscMPIInt  size, rank, tag, i, j;
92   MPI_Request *s_waits = NULL, *r_waits = NULL;
93   MPI_Status  *w_status = NULL;
94 
95   PetscFunctionBegin;
96   PetscCallMPI(MPI_Comm_size(comm, &size));
97   PetscCallMPI(MPI_Comm_rank(comm, &rank));
98   PetscCall(PetscCommGetNewTag(comm, &tag));
99 
100   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
101   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
102   s_waits = r_waits + nrecvs;
103 
104   /* Post the Irecv to get the message length-info */
105   PetscCall(PetscMalloc1(nrecvs, olengths));
106   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
107 
108   /* Post the Isends with the message length-info */
109   for (i = 0, j = 0; i < size; ++i) {
110     if (ilengths[i]) {
111       PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j));
112       j++;
113     }
114   }
115 
116   /* Post waits on sends and receives */
117   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
118 
119   /* Pack up the received data */
120   PetscCall(PetscMalloc1(nrecvs, onodes));
121   for (i = 0; i < nrecvs; ++i) {
122     (*onodes)[i] = w_status[i].MPI_SOURCE;
123 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
124     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
125        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
126        does not put correct value in recv buffer. See also
127        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
128        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
129      */
130     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
131 #endif
132   }
133   PetscCall(PetscFree2(r_waits, w_status));
134   PetscFunctionReturn(0);
135 }
136 
137 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
138 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs)
139 {
140   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL;
141 
142   PetscFunctionBegin;
143   PetscCallMPI(MPI_Comm_size(comm, &size));
144   PetscCallMPI(MPI_Comm_rank(comm, &rank));
145 
146   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));
147 
148   /* If iflags not provided, compute iflags from ilengths */
149   if (!iflags) {
150     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
151     iflags_local = iflags_localm;
152     for (i = 0; i < size; i++) {
153       if (ilengths[i]) iflags_local[i] = 1;
154       else iflags_local[i] = 0;
155     }
156   } else iflags_local = (PetscMPIInt *)iflags;
157 
158   /* Post an allreduce to determine the numer of messages the current node will receive */
159   PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
160   *nrecvs = recv_buf[rank];
161 
162   PetscCall(PetscFree2(recv_buf, iflags_localm));
163   PetscFunctionReturn(0);
164 }
165 
166 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
167 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths)
168 {
169   PetscMPIInt  size, rank, tag, i, j;
170   MPI_Request *s_waits = NULL, *r_waits = NULL;
171   MPI_Status  *w_status = NULL;
172 
173   PetscFunctionBegin;
174   PetscCallMPI(MPI_Comm_size(comm, &size));
175   PetscCallMPI(MPI_Comm_rank(comm, &rank));
176   PetscCall(PetscCommGetNewTag(comm, &tag));
177 
178   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
179   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
180   s_waits = r_waits + nrecvs;
181 
182   /* Post the Irecv to get the message length-info */
183   PetscCall(PetscMalloc1(nrecvs, olengths));
184   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
185 
186   /* Post the Isends with the message length-info */
187   for (i = 0, j = 0; i < size; ++i) {
188     if (ilengths[i]) {
189       PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j));
190       j++;
191     }
192   }
193 
194   /* Post waits on sends and receives */
195   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
196 
197   /* Pack up the received data */
198   PetscCall(PetscMalloc1(nrecvs, onodes));
199   for (i = 0; i < nrecvs; ++i) {
200     (*onodes)[i] = w_status[i].MPI_SOURCE;
201     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
202   }
203   PetscCall(PetscFree2(r_waits, w_status));
204   PetscFunctionReturn(0);
205 }
206 
207 /*@C
208   PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive,
209   including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()`
210   except it takes TWO ilenths and output TWO olengths.
211 
212   Collective
213 
214   Input Parameters:
215 + comm      - Communicator
216 . nsends    - number of messages that are to be sent.
217 . nrecvs    - number of messages being received
218 . ilengths1 - first array of integers of length sizeof(comm)
219 - ilengths2 - second array of integers of length sizeof(comm)
220 
221   Output Parameters:
222 + onodes    - list of node-ids from which messages are expected
223 . olengths1 - first corresponding message lengths
224 - olengths2 - second  message lengths
225 
226   Level: developer
227 
228   Notes:
229   With this info, the correct `MPI_Irecv()` can be posted with the correct
230   from-id, with a buffer with the right amount of memory required.
231 
232   The calling function deallocates the memory in onodes and olengths
233 
234   To determine nrecvs, one can use PetscGatherNumberOfMessages()
235 
236 .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()`
237 @*/
238 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2)
239 {
240   PetscMPIInt  size, tag, i, j, *buf_s = NULL, *buf_r = NULL, *buf_j = NULL;
241   MPI_Request *s_waits = NULL, *r_waits = NULL;
242   MPI_Status  *w_status = NULL;
243 
244   PetscFunctionBegin;
245   PetscCallMPI(MPI_Comm_size(comm, &size));
246   PetscCall(PetscCommGetNewTag(comm, &tag));
247 
248   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
249   PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status));
250   s_waits = r_waits + nrecvs;
251 
252   /* Post the Irecv to get the message length-info */
253   PetscCall(PetscMalloc1(nrecvs + 1, olengths1));
254   PetscCall(PetscMalloc1(nrecvs + 1, olengths2));
255   for (i = 0; i < nrecvs; i++) {
256     buf_j = buf_r + (2 * i);
257     PetscCallMPI(MPI_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
258   }
259 
260   /* Post the Isends with the message length-info */
261   for (i = 0, j = 0; i < size; ++i) {
262     if (ilengths1[i]) {
263       buf_j    = buf_s + (2 * j);
264       buf_j[0] = *(ilengths1 + i);
265       buf_j[1] = *(ilengths2 + i);
266       PetscCallMPI(MPI_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j));
267       j++;
268     }
269   }
270   PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends);
271 
272   /* Post waits on sends and receives */
273   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
274 
275   /* Pack up the received data */
276   PetscCall(PetscMalloc1(nrecvs + 1, onodes));
277   for (i = 0; i < nrecvs; ++i) {
278     (*onodes)[i]    = w_status[i].MPI_SOURCE;
279     buf_j           = buf_r + (2 * i);
280     (*olengths1)[i] = buf_j[0];
281     (*olengths2)[i] = buf_j[1];
282   }
283 
284   PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status));
285   PetscFunctionReturn(0);
286 }
287 
288 /*
289   Allocate a buffer sufficient to hold messages of size specified in olengths.
290   And post Irecvs on these buffers using node info from onodes
291  */
292 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits)
293 {
294   PetscInt   **rbuf_t, i, len = 0;
295   MPI_Request *r_waits_t;
296 
297   PetscFunctionBegin;
298   /* compute memory required for recv buffers */
299   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */
300 
301   /* allocate memory for recv buffers */
302   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
303   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
304   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];
305 
306   /* Post the receives */
307   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
308   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i));
309 
310   *rbuf    = rbuf_t;
311   *r_waits = r_waits_t;
312   PetscFunctionReturn(0);
313 }
314 
315 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits)
316 {
317   PetscMPIInt   i;
318   PetscScalar **rbuf_t;
319   MPI_Request  *r_waits_t;
320   PetscInt      len = 0;
321 
322   PetscFunctionBegin;
323   /* compute memory required for recv buffers */
324   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */
325 
326   /* allocate memory for recv buffers */
327   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
328   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
329   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];
330 
331   /* Post the receives */
332   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
333   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i));
334 
335   *rbuf    = rbuf_t;
336   *r_waits = r_waits_t;
337   PetscFunctionReturn(0);
338 }
339