xref: /petsc/src/sys/utils/mpimesg.c (revision 66af8762ec03dbef0e079729eb2a1734a35ed7ff)
1 #include <petscsys.h> /*I  "petscsys.h"  I*/
2 #include <petsc/private/mpiutils.h>
3 
4 /*@C
5   PetscGatherNumberOfMessages -  Computes the number of messages an MPI rank expects to receive during a neighbor communication
6 
7   Collective
8 
9   Input Parameters:
10 + comm     - Communicator
11 . iflags   - an array of integers of length sizeof(comm). A '1' in `ilengths`[i] represent a
12              message from current node to ith node. Optionally `NULL`
13 - ilengths - Non zero ilengths[i] represent a message to i of length `ilengths`[i].
14              Optionally `NULL`.
15 
16   Output Parameter:
17 . nrecvs - number of messages received
18 
19   Level: developer
20 
21   Notes:
22   With this info, the correct message lengths can be determined using
23   `PetscGatherMessageLengths()`
24 
25   Either `iflags` or `ilengths` should be provided.  If `iflags` is not
26   provided (`NULL`) it can be computed from `ilengths`. If `iflags` is
27   provided, `ilengths` is not required.
28 
29 .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
30 @*/
31 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs)
32 {
33   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL;
34 
35   PetscFunctionBegin;
36   PetscCallMPI(MPI_Comm_size(comm, &size));
37   PetscCallMPI(MPI_Comm_rank(comm, &rank));
38 
39   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));
40 
41   /* If iflags not provided, compute iflags from ilengths */
42   if (!iflags) {
43     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
44     iflags_local = iflags_localm;
45     for (i = 0; i < size; i++) {
46       if (ilengths[i]) iflags_local[i] = 1;
47       else iflags_local[i] = 0;
48     }
49   } else iflags_local = (PetscMPIInt *)iflags;
50 
51   /* Post an allreduce to determine the number of messages the current MPI rank will receive */
52   PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
53   *nrecvs = recv_buf[rank];
54 
55   PetscCall(PetscFree2(recv_buf, iflags_localm));
56   PetscFunctionReturn(PETSC_SUCCESS);
57 }
58 
59 /*@C
60   PetscGatherMessageLengths - Computes information about messages that an MPI rank will receive,
61   including (from-id,length) pairs for each message.
62 
63   Collective
64 
65   Input Parameters:
66 + comm     - Communicator
67 . nsends   - number of messages that are to be sent.
68 . nrecvs   - number of messages being received
69 - ilengths - an array of integers of length sizeof(comm)
70               a non zero `ilengths`[i] represent a message to i of length `ilengths`[i]
71 
72   Output Parameters:
73 + onodes   - list of ranks from which messages are expected
74 - olengths - corresponding message lengths
75 
76   Level: developer
77 
78   Notes:
79   With this info, the correct `MPI_Irecv()` can be posted with the correct
80   from-id, with a buffer with the right amount of memory required.
81 
82   The calling function deallocates the memory in onodes and olengths
83 
84   To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()`
85 
86 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
87 @*/
88 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths)
89 {
90   PetscMPIInt  size, rank, tag, i, j;
91   MPI_Request *s_waits = NULL, *r_waits = NULL;
92   MPI_Status  *w_status = NULL;
93 
94   PetscFunctionBegin;
95   PetscCallMPI(MPI_Comm_size(comm, &size));
96   PetscCallMPI(MPI_Comm_rank(comm, &rank));
97   PetscCall(PetscCommGetNewTag(comm, &tag));
98 
99   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
100   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
101   if (nrecvs + nsends) s_waits = r_waits + nrecvs;
102 
103   /* Post the Irecv to get the message length-info */
104   PetscCall(PetscMalloc1(nrecvs, olengths));
105   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
106 
107   /* Post the Isends with the message length-info */
108   for (i = 0, j = 0; i < size; ++i) {
109     if (ilengths[i]) {
110       PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j));
111       j++;
112     }
113   }
114 
115   /* Post waits on sends and receives */
116   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
117 
118   /* Pack up the received data */
119   PetscCall(PetscMalloc1(nrecvs, onodes));
120   for (i = 0; i < nrecvs; ++i) {
121     (*onodes)[i] = w_status[i].MPI_SOURCE;
122 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
123     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
124        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
125        does not put correct value in recv buffer. See also
126        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
127        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
128      */
129     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
130 #endif
131   }
132   PetscCall(PetscFree2(r_waits, w_status));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
137 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs)
138 {
139   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL;
140 
141   PetscFunctionBegin;
142   PetscCallMPI(MPI_Comm_size(comm, &size));
143   PetscCallMPI(MPI_Comm_rank(comm, &rank));
144 
145   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));
146 
147   /* If iflags not provided, compute iflags from ilengths */
148   if (!iflags) {
149     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
150     iflags_local = iflags_localm;
151     for (i = 0; i < size; i++) {
152       if (ilengths[i]) iflags_local[i] = 1;
153       else iflags_local[i] = 0;
154     }
155   } else iflags_local = (PetscMPIInt *)iflags;
156 
157   /* Post an allreduce to determine the number of messages the current MPI rank will receive */
158   PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
159   *nrecvs = recv_buf[rank];
160 
161   PetscCall(PetscFree2(recv_buf, iflags_localm));
162   PetscFunctionReturn(PETSC_SUCCESS);
163 }
164 
165 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
166 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths)
167 {
168   PetscMPIInt  size, rank, tag, i, j;
169   MPI_Request *s_waits = NULL, *r_waits = NULL;
170   MPI_Status  *w_status = NULL;
171 
172   PetscFunctionBegin;
173   PetscCallMPI(MPI_Comm_size(comm, &size));
174   PetscCallMPI(MPI_Comm_rank(comm, &rank));
175   PetscCall(PetscCommGetNewTag(comm, &tag));
176 
177   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
178   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
179   if (r_waits) s_waits = r_waits + nrecvs;
180 
181   /* Post the Irecv to get the message length-info */
182   PetscCall(PetscMalloc1(nrecvs, olengths));
183   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
184 
185   /* Post the Isends with the message length-info */
186   for (i = 0, j = 0; i < size; ++i) {
187     if (ilengths[i]) {
188       PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j));
189       j++;
190     }
191   }
192 
193   /* Post waits on sends and receives */
194   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
195 
196   /* Pack up the received data */
197   PetscCall(PetscMalloc1(nrecvs, onodes));
198   for (i = 0; i < nrecvs; ++i) {
199     (*onodes)[i] = w_status[i].MPI_SOURCE;
200     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
201   }
202   PetscCall(PetscFree2(r_waits, w_status));
203   PetscFunctionReturn(PETSC_SUCCESS);
204 }
205 
206 /*@C
207   PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive,
208   including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()`
209   except it takes TWO ilenths and output TWO olengths.
210 
211   Collective
212 
213   Input Parameters:
214 + comm      - Communicator
215 . nsends    - number of messages that are to be sent.
216 . nrecvs    - number of messages being received
217 . ilengths1 - first array of integers of length sizeof(comm)
218 - ilengths2 - second array of integers of length sizeof(comm)
219 
220   Output Parameters:
221 + onodes    - list of ranks from which messages are expected
222 . olengths1 - first corresponding message lengths
223 - olengths2 - second  message lengths
224 
225   Level: developer
226 
227   Notes:
228   With this info, the correct `MPI_Irecv()` can be posted with the correct
229   from-id, with a buffer with the right amount of memory required.
230 
231   The calling function should `PetscFree()` the memory in `onodes` and `olengths`
232 
233   To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()`
234 
235 .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()`
236 @*/
237 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2)
238 {
239   PetscMPIInt  size, tag, i, j, *buf_s = NULL, *buf_r = NULL, *buf_j = NULL;
240   MPI_Request *s_waits = NULL, *r_waits = NULL;
241   MPI_Status  *w_status = NULL;
242 
243   PetscFunctionBegin;
244   PetscCallMPI(MPI_Comm_size(comm, &size));
245   PetscCall(PetscCommGetNewTag(comm, &tag));
246 
247   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
248   PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status));
249   s_waits = r_waits + nrecvs;
250 
251   /* Post the Irecv to get the message length-info */
252   PetscCall(PetscMalloc1(nrecvs + 1, olengths1));
253   PetscCall(PetscMalloc1(nrecvs + 1, olengths2));
254   for (i = 0; i < nrecvs; i++) {
255     buf_j = buf_r + (2 * i);
256     PetscCallMPI(MPI_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
257   }
258 
259   /* Post the Isends with the message length-info */
260   for (i = 0, j = 0; i < size; ++i) {
261     if (ilengths1[i]) {
262       buf_j    = buf_s + (2 * j);
263       buf_j[0] = *(ilengths1 + i);
264       buf_j[1] = *(ilengths2 + i);
265       PetscCallMPI(MPI_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j));
266       j++;
267     }
268   }
269   PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends);
270 
271   /* Post waits on sends and receives */
272   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
273 
274   /* Pack up the received data */
275   PetscCall(PetscMalloc1(nrecvs + 1, onodes));
276   for (i = 0; i < nrecvs; ++i) {
277     (*onodes)[i]    = w_status[i].MPI_SOURCE;
278     buf_j           = buf_r + (2 * i);
279     (*olengths1)[i] = buf_j[0];
280     (*olengths2)[i] = buf_j[1];
281   }
282 
283   PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status));
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 /*
288   Allocate a buffer sufficient to hold messages of size specified in olengths.
289   And post Irecvs on these buffers using node info from onodes
290  */
291 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits)
292 {
293   PetscInt   **rbuf_t, i, len = 0;
294   MPI_Request *r_waits_t;
295 
296   PetscFunctionBegin;
297   /* compute memory required for recv buffers */
298   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */
299 
300   /* allocate memory for recv buffers */
301   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
302   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
303   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];
304 
305   /* Post the receives */
306   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
307   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i));
308 
309   *rbuf    = rbuf_t;
310   *r_waits = r_waits_t;
311   PetscFunctionReturn(PETSC_SUCCESS);
312 }
313 
314 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits)
315 {
316   PetscMPIInt   i;
317   PetscScalar **rbuf_t;
318   MPI_Request  *r_waits_t;
319   PetscInt      len = 0;
320 
321   PetscFunctionBegin;
322   /* compute memory required for recv buffers */
323   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */
324 
325   /* allocate memory for recv buffers */
326   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
327   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
328   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];
329 
330   /* Post the receives */
331   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
332   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i));
333 
334   *rbuf    = rbuf_t;
335   *r_waits = r_waits_t;
336   PetscFunctionReturn(PETSC_SUCCESS);
337 }
338