xref: /petsc/src/sys/utils/mpimesg.c (revision 0619917b5a674bb687c64e7daba2ab22be99af31)
1 
2 #include <petscsys.h> /*I  "petscsys.h"  I*/
3 #include <petsc/private/mpiutils.h>
4 
5 /*@C
6   PetscGatherNumberOfMessages -  Computes the number of messages an MPI rank expects to receive during a neighbor communication
7 
8   Collective
9 
10   Input Parameters:
11 + comm     - Communicator
12 . iflags   - an array of integers of length sizeof(comm). A '1' in `ilengths`[i] represent a
13              message from current node to ith node. Optionally `NULL`
14 - ilengths - Non zero ilengths[i] represent a message to i of length `ilengths`[i].
15              Optionally `NULL`.
16 
17   Output Parameter:
18 . nrecvs - number of messages received
19 
20   Level: developer
21 
22   Notes:
23   With this info, the correct message lengths can be determined using
24   `PetscGatherMessageLengths()`
25 
26   Either `iflags` or `ilengths` should be provided.  If `iflags` is not
27   provided (`NULL`) it can be computed from `ilengths`. If `iflags` is
28   provided, `ilengths` is not required.
29 
30 .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
31 @*/
32 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs)
33 {
34   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL;
35 
36   PetscFunctionBegin;
37   PetscCallMPI(MPI_Comm_size(comm, &size));
38   PetscCallMPI(MPI_Comm_rank(comm, &rank));
39 
40   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));
41 
42   /* If iflags not provided, compute iflags from ilengths */
43   if (!iflags) {
44     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
45     iflags_local = iflags_localm;
46     for (i = 0; i < size; i++) {
47       if (ilengths[i]) iflags_local[i] = 1;
48       else iflags_local[i] = 0;
49     }
50   } else iflags_local = (PetscMPIInt *)iflags;
51 
52   /* Post an allreduce to determine the number of messages the current MPI rank will receive */
53   PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
54   *nrecvs = recv_buf[rank];
55 
56   PetscCall(PetscFree2(recv_buf, iflags_localm));
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /*@C
61   PetscGatherMessageLengths - Computes information about messages that an MPI rank will receive,
62   including (from-id,length) pairs for each message.
63 
64   Collective
65 
66   Input Parameters:
67 + comm     - Communicator
68 . nsends   - number of messages that are to be sent.
69 . nrecvs   - number of messages being received
70 - ilengths - an array of integers of length sizeof(comm)
71               a non zero `ilengths`[i] represent a message to i of length `ilengths`[i]
72 
73   Output Parameters:
74 + onodes   - list of ranks from which messages are expected
75 - olengths - corresponding message lengths
76 
77   Level: developer
78 
79   Notes:
80   With this info, the correct `MPI_Irecv()` can be posted with the correct
81   from-id, with a buffer with the right amount of memory required.
82 
83   The calling function deallocates the memory in onodes and olengths
84 
85   To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()`
86 
87 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
88 @*/
89 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths)
90 {
91   PetscMPIInt  size, rank, tag, i, j;
92   MPI_Request *s_waits = NULL, *r_waits = NULL;
93   MPI_Status  *w_status = NULL;
94 
95   PetscFunctionBegin;
96   PetscCallMPI(MPI_Comm_size(comm, &size));
97   PetscCallMPI(MPI_Comm_rank(comm, &rank));
98   PetscCall(PetscCommGetNewTag(comm, &tag));
99 
100   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
101   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
102   s_waits = r_waits + nrecvs;
103 
104   /* Post the Irecv to get the message length-info */
105   PetscCall(PetscMalloc1(nrecvs, olengths));
106   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
107 
108   /* Post the Isends with the message length-info */
109   for (i = 0, j = 0; i < size; ++i) {
110     if (ilengths[i]) {
111       PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j));
112       j++;
113     }
114   }
115 
116   /* Post waits on sends and receives */
117   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
118 
119   /* Pack up the received data */
120   PetscCall(PetscMalloc1(nrecvs, onodes));
121   for (i = 0; i < nrecvs; ++i) {
122     (*onodes)[i] = w_status[i].MPI_SOURCE;
123 #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
124     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
125        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
126        does not put correct value in recv buffer. See also
127        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
128        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
129      */
130     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
131 #endif
132   }
133   PetscCall(PetscFree2(r_waits, w_status));
134   PetscFunctionReturn(PETSC_SUCCESS);
135 }
136 
137 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
138 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs)
139 {
140   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm = NULL;
141 
142   PetscFunctionBegin;
143   PetscCallMPI(MPI_Comm_size(comm, &size));
144   PetscCallMPI(MPI_Comm_rank(comm, &rank));
145 
146   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));
147 
148   /* If iflags not provided, compute iflags from ilengths */
149   if (!iflags) {
150     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
151     iflags_local = iflags_localm;
152     for (i = 0; i < size; i++) {
153       if (ilengths[i]) iflags_local[i] = 1;
154       else iflags_local[i] = 0;
155     }
156   } else iflags_local = (PetscMPIInt *)iflags;
157 
158   /* Post an allreduce to determine the number of messages the current MPI rank will receive */
159   PetscCall(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
160   *nrecvs = recv_buf[rank];
161 
162   PetscCall(PetscFree2(recv_buf, iflags_localm));
163   PetscFunctionReturn(PETSC_SUCCESS);
164 }
165 
166 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
167 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths)
168 {
169   PetscMPIInt  size, rank, tag, i, j;
170   MPI_Request *s_waits = NULL, *r_waits = NULL;
171   MPI_Status  *w_status = NULL;
172 
173   PetscFunctionBegin;
174   PetscCallMPI(MPI_Comm_size(comm, &size));
175   PetscCallMPI(MPI_Comm_rank(comm, &rank));
176   PetscCall(PetscCommGetNewTag(comm, &tag));
177 
178   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
179   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
180   s_waits = r_waits + nrecvs;
181 
182   /* Post the Irecv to get the message length-info */
183   PetscCall(PetscMalloc1(nrecvs, olengths));
184   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPI_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
185 
186   /* Post the Isends with the message length-info */
187   for (i = 0, j = 0; i < size; ++i) {
188     if (ilengths[i]) {
189       PetscCallMPI(MPI_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j));
190       j++;
191     }
192   }
193 
194   /* Post waits on sends and receives */
195   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
196 
197   /* Pack up the received data */
198   PetscCall(PetscMalloc1(nrecvs, onodes));
199   for (i = 0; i < nrecvs; ++i) {
200     (*onodes)[i] = w_status[i].MPI_SOURCE;
201     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
202   }
203   PetscCall(PetscFree2(r_waits, w_status));
204   PetscFunctionReturn(PETSC_SUCCESS);
205 }
206 
207 /*@C
208   PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive,
209   including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()`
210   except it takes TWO ilenths and output TWO olengths.
211 
212   Collective
213 
214   Input Parameters:
215 + comm      - Communicator
216 . nsends    - number of messages that are to be sent.
217 . nrecvs    - number of messages being received
218 . ilengths1 - first array of integers of length sizeof(comm)
219 - ilengths2 - second array of integers of length sizeof(comm)
220 
221   Output Parameters:
222 + onodes    - list of ranks from which messages are expected
223 . olengths1 - first corresponding message lengths
224 - olengths2 - second  message lengths
225 
226   Level: developer
227 
228   Notes:
229   With this info, the correct `MPI_Irecv()` can be posted with the correct
230   from-id, with a buffer with the right amount of memory required.
231 
232   The calling function should `PetscFree()` the memory in `onodes` and `olengths`
233 
234   To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()`
235 
236 .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()`
237 @*/
238 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2)
239 {
240   PetscMPIInt  size, tag, i, j, *buf_s = NULL, *buf_r = NULL, *buf_j = NULL;
241   MPI_Request *s_waits = NULL, *r_waits = NULL;
242   MPI_Status  *w_status = NULL;
243 
244   PetscFunctionBegin;
245   PetscCallMPI(MPI_Comm_size(comm, &size));
246   PetscCall(PetscCommGetNewTag(comm, &tag));
247 
248   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
249   PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status));
250   s_waits = r_waits + nrecvs;
251 
252   /* Post the Irecv to get the message length-info */
253   PetscCall(PetscMalloc1(nrecvs + 1, olengths1));
254   PetscCall(PetscMalloc1(nrecvs + 1, olengths2));
255   for (i = 0; i < nrecvs; i++) {
256     buf_j = buf_r + (2 * i);
257     PetscCallMPI(MPI_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
258   }
259 
260   /* Post the Isends with the message length-info */
261   for (i = 0, j = 0; i < size; ++i) {
262     if (ilengths1[i]) {
263       buf_j    = buf_s + (2 * j);
264       buf_j[0] = *(ilengths1 + i);
265       buf_j[1] = *(ilengths2 + i);
266       PetscCallMPI(MPI_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j));
267       j++;
268     }
269   }
270   PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends);
271 
272   /* Post waits on sends and receives */
273   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
274 
275   /* Pack up the received data */
276   PetscCall(PetscMalloc1(nrecvs + 1, onodes));
277   for (i = 0; i < nrecvs; ++i) {
278     (*onodes)[i]    = w_status[i].MPI_SOURCE;
279     buf_j           = buf_r + (2 * i);
280     (*olengths1)[i] = buf_j[0];
281     (*olengths2)[i] = buf_j[1];
282   }
283 
284   PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status));
285   PetscFunctionReturn(PETSC_SUCCESS);
286 }
287 
288 /*
289   Allocate a buffer sufficient to hold messages of size specified in olengths.
290   And post Irecvs on these buffers using node info from onodes
291  */
292 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits)
293 {
294   PetscInt   **rbuf_t, i, len = 0;
295   MPI_Request *r_waits_t;
296 
297   PetscFunctionBegin;
298   /* compute memory required for recv buffers */
299   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */
300 
301   /* allocate memory for recv buffers */
302   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
303   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
304   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];
305 
306   /* Post the receives */
307   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
308   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i));
309 
310   *rbuf    = rbuf_t;
311   *r_waits = r_waits_t;
312   PetscFunctionReturn(PETSC_SUCCESS);
313 }
314 
315 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits)
316 {
317   PetscMPIInt   i;
318   PetscScalar **rbuf_t;
319   MPI_Request  *r_waits_t;
320   PetscInt      len = 0;
321 
322   PetscFunctionBegin;
323   /* compute memory required for recv buffers */
324   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */
325 
326   /* allocate memory for recv buffers */
327   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
328   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
329   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];
330 
331   /* Post the receives */
332   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
333   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPI_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i));
334 
335   *rbuf    = rbuf_t;
336   *r_waits = r_waits_t;
337   PetscFunctionReturn(PETSC_SUCCESS);
338 }
339