xref: /petsc/src/sys/utils/mpimesg.c (revision 2ff79c18c26c94ed8cb599682f680f231dca6444)
1 #include <petscsys.h> /*I  "petscsys.h"  I*/
2 
3 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm, const PetscMPIInt[], const PetscInt[], PetscMPIInt *);
4 PETSC_SINGLE_LIBRARY_INTERN PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm, PetscMPIInt, PetscMPIInt, const PetscInt[], PetscMPIInt **, PetscInt **);
5 
6 /*@C
7   PetscGatherNumberOfMessages -  Computes the number of messages an MPI rank expects to receive during a neighbor communication
8 
9   Collective, No Fortran Support
10 
11   Input Parameters:
12 + comm     - Communicator
13 . iflags   - an array of integers of length sizeof(comm). A '1' in `ilengths`[i] represent a
14              message from current node to ith node. Optionally `NULL`
15 - ilengths - Non zero ilengths[i] represent a message to i of length `ilengths`[i].
16              Optionally `NULL`.
17 
18   Output Parameter:
19 . nrecvs - number of messages received
20 
21   Level: developer
22 
23   Notes:
24   With this info, the correct message lengths can be determined using
25   `PetscGatherMessageLengths()`
26 
27   Either `iflags` or `ilengths` should be provided.  If `iflags` is not
28   provided (`NULL`) it can be computed from `ilengths`. If `iflags` is
29   provided, `ilengths` is not required.
30 
31 .seealso: `PetscGatherMessageLengths()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
32 @*/
33 PetscErrorCode PetscGatherNumberOfMessages(MPI_Comm comm, const PetscMPIInt iflags[], const PetscMPIInt ilengths[], PetscMPIInt *nrecvs)
34 {
35   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm;
36 
37   PetscFunctionBegin;
38   PetscCallMPI(MPI_Comm_size(comm, &size));
39   PetscCallMPI(MPI_Comm_rank(comm, &rank));
40 
41   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));
42 
43   /* If iflags not provided, compute iflags from ilengths */
44   if (!iflags) {
45     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
46     iflags_local = iflags_localm;
47     for (i = 0; i < size; i++) {
48       if (ilengths[i]) iflags_local[i] = 1;
49       else iflags_local[i] = 0;
50     }
51   } else iflags_local = (PetscMPIInt *)iflags;
52 
53   /* Post an allreduce to determine the number of messages the current MPI rank will receive */
54   PetscCallMPI(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
55   *nrecvs = recv_buf[rank];
56 
57   PetscCall(PetscFree2(recv_buf, iflags_localm));
58   PetscFunctionReturn(PETSC_SUCCESS);
59 }
60 
61 /*@C
62   PetscGatherMessageLengths - Computes information about messages that an MPI rank will receive,
63   including (from-id,length) pairs for each message.
64 
65   Collective, No Fortran Support
66 
67   Input Parameters:
68 + comm     - Communicator
69 . nsends   - number of messages that are to be sent.
70 . nrecvs   - number of messages being received
71 - ilengths - an array of integers of length sizeof(comm)
72               a non zero `ilengths`[i] represent a message to i of length `ilengths`[i]
73 
74   Output Parameters:
75 + onodes   - list of ranks from which messages are expected
76 - olengths - corresponding message lengths
77 
78   Level: developer
79 
80   Notes:
81   With this info, the correct `MPIU_Irecv()` can be posted with the correct
82   from-id, with a buffer with the right amount of memory required.
83 
84   The calling function deallocates the memory in onodes and olengths
85 
86   To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()`
87 
88 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths2()`, `PetscCommBuildTwoSided()`
89 @*/
90 PetscErrorCode PetscGatherMessageLengths(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths[], PetscMPIInt **onodes, PetscMPIInt **olengths)
91 {
92   PetscMPIInt  size, rank, tag, i, j;
93   MPI_Request *s_waits, *r_waits;
94   MPI_Status  *w_status;
95 
96   PetscFunctionBegin;
97   PetscCallMPI(MPI_Comm_size(comm, &size));
98   PetscCallMPI(MPI_Comm_rank(comm, &rank));
99   PetscCall(PetscCommGetNewTag(comm, &tag));
100 
101   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
102   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
103   s_waits = PetscSafePointerPlusOffset(r_waits, nrecvs);
104 
105   /* Post the Irecv to get the message length-info */
106   PetscCall(PetscMalloc1(nrecvs, olengths));
107   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPIU_Irecv((*olengths) + i, 1, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
108 
109   /* Post the Isends with the message length-info */
110   for (i = 0, j = 0; i < size; ++i) {
111     if (ilengths[i]) {
112       PetscCallMPI(MPIU_Isend((void *)(ilengths + i), 1, MPI_INT, i, tag, comm, s_waits + j));
113       j++;
114     }
115   }
116 
117   /* Post waits on sends and receives */
118   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
119 
120   /* Pack up the received data */
121   PetscCall(PetscMalloc1(nrecvs, onodes));
122   for (i = 0; i < nrecvs; ++i) {
123     (*onodes)[i] = w_status[i].MPI_SOURCE;
124 #if defined(PETSC_HAVE_OPENMPI)
125     /* This line is a workaround for a bug in Open MPI 2.1.1 distributed by Ubuntu-18.04.2 LTS.
126        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. Open MPI
127        does not put correct value in recv buffer. See also
128        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
129        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
130      */
131     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
132 #endif
133   }
134   PetscCall(PetscFree2(r_waits, w_status));
135   PetscFunctionReturn(PETSC_SUCCESS);
136 }
137 
138 /* Same as PetscGatherNumberOfMessages(), except using PetscInt for ilengths[] */
139 PetscErrorCode PetscGatherNumberOfMessages_Private(MPI_Comm comm, const PetscMPIInt iflags[], const PetscInt ilengths[], PetscMPIInt *nrecvs)
140 {
141   PetscMPIInt size, rank, *recv_buf, i, *iflags_local = NULL, *iflags_localm;
142 
143   PetscFunctionBegin;
144   PetscCallMPI(MPI_Comm_size(comm, &size));
145   PetscCallMPI(MPI_Comm_rank(comm, &rank));
146 
147   PetscCall(PetscMalloc2(size, &recv_buf, size, &iflags_localm));
148 
149   /* If iflags not provided, compute iflags from ilengths */
150   if (!iflags) {
151     PetscCheck(ilengths, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Either iflags or ilengths should be provided");
152     iflags_local = iflags_localm;
153     for (i = 0; i < size; i++) {
154       if (ilengths[i]) iflags_local[i] = 1;
155       else iflags_local[i] = 0;
156     }
157   } else iflags_local = (PetscMPIInt *)iflags;
158 
159   /* Post an allreduce to determine the number of messages the current MPI rank will receive */
160   PetscCallMPI(MPIU_Allreduce(iflags_local, recv_buf, size, MPI_INT, MPI_SUM, comm));
161   *nrecvs = recv_buf[rank];
162 
163   PetscCall(PetscFree2(recv_buf, iflags_localm));
164   PetscFunctionReturn(PETSC_SUCCESS);
165 }
166 
167 /* Same as PetscGatherMessageLengths(), except using PetscInt for message lengths */
168 PetscErrorCode PetscGatherMessageLengths_Private(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscInt ilengths[], PetscMPIInt **onodes, PetscInt **olengths)
169 {
170   PetscMPIInt  size, rank, tag, i, j;
171   MPI_Request *s_waits, *r_waits;
172   MPI_Status  *w_status;
173 
174   PetscFunctionBegin;
175   PetscCallMPI(MPI_Comm_size(comm, &size));
176   PetscCallMPI(MPI_Comm_rank(comm, &rank));
177   PetscCall(PetscCommGetNewTag(comm, &tag));
178 
179   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
180   PetscCall(PetscMalloc2(nrecvs + nsends, &r_waits, nrecvs + nsends, &w_status));
181   s_waits = PetscSafePointerPlusOffset(r_waits, nrecvs);
182 
183   /* Post the Irecv to get the message length-info */
184   PetscCall(PetscMalloc1(nrecvs, olengths));
185   for (i = 0; i < nrecvs; i++) PetscCallMPI(MPIU_Irecv((*olengths) + i, 1, MPIU_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
186 
187   /* Post the Isends with the message length-info */
188   for (i = 0, j = 0; i < size; ++i) {
189     if (ilengths[i]) {
190       PetscCallMPI(MPIU_Isend((void *)(ilengths + i), 1, MPIU_INT, i, tag, comm, s_waits + j));
191       j++;
192     }
193   }
194 
195   /* Post waits on sends and receives */
196   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
197 
198   /* Pack up the received data */
199   PetscCall(PetscMalloc1(nrecvs, onodes));
200   for (i = 0; i < nrecvs; ++i) {
201     (*onodes)[i] = w_status[i].MPI_SOURCE;
202     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank]; /* See comments in PetscGatherMessageLengths */
203   }
204   PetscCall(PetscFree2(r_waits, w_status));
205   PetscFunctionReturn(PETSC_SUCCESS);
206 }
207 
208 /*@C
209   PetscGatherMessageLengths2 - Computes info about messages that a MPI rank will receive,
210   including (from-id,length) pairs for each message. Same functionality as `PetscGatherMessageLengths()`
211   except it takes TWO ilenths and output TWO olengths.
212 
213   Collective, No Fortran Support
214 
215   Input Parameters:
216 + comm      - Communicator
217 . nsends    - number of messages that are to be sent.
218 . nrecvs    - number of messages being received
219 . ilengths1 - first array of integers of length sizeof(comm)
220 - ilengths2 - second array of integers of length sizeof(comm)
221 
222   Output Parameters:
223 + onodes    - list of ranks from which messages are expected
224 . olengths1 - first corresponding message lengths
225 - olengths2 - second  message lengths
226 
227   Level: developer
228 
229   Notes:
230   With this info, the correct `MPIU_Irecv()` can be posted with the correct
231   from-id, with a buffer with the right amount of memory required.
232 
233   The calling function should `PetscFree()` the memory in `onodes` and `olengths`
234 
235   To determine `nrecvs`, one can use `PetscGatherNumberOfMessages()`
236 
237 .seealso: `PetscGatherMessageLengths()`, `PetscGatherNumberOfMessages()`, `PetscCommBuildTwoSided()`
238 @*/
239 PetscErrorCode PetscGatherMessageLengths2(MPI_Comm comm, PetscMPIInt nsends, PetscMPIInt nrecvs, const PetscMPIInt ilengths1[], const PetscMPIInt ilengths2[], PetscMPIInt **onodes, PetscMPIInt **olengths1, PetscMPIInt **olengths2)
240 {
241   PetscMPIInt  size, tag, i, j, *buf_s, *buf_r, *buf_j = NULL;
242   MPI_Request *s_waits, *r_waits;
243   MPI_Status  *w_status;
244 
245   PetscFunctionBegin;
246   PetscCallMPI(MPI_Comm_size(comm, &size));
247   PetscCall(PetscCommGetNewTag(comm, &tag));
248 
249   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
250   PetscCall(PetscMalloc4(nrecvs + nsends, &r_waits, 2 * nrecvs, &buf_r, 2 * nsends, &buf_s, nrecvs + nsends, &w_status));
251   s_waits = PetscSafePointerPlusOffset(r_waits, nrecvs);
252 
253   /* Post the Irecv to get the message length-info */
254   PetscCall(PetscMalloc1(nrecvs + 1, olengths1));
255   PetscCall(PetscMalloc1(nrecvs + 1, olengths2));
256   for (i = 0; i < nrecvs; i++) {
257     buf_j = buf_r + (2 * i);
258     PetscCallMPI(MPIU_Irecv(buf_j, 2, MPI_INT, MPI_ANY_SOURCE, tag, comm, r_waits + i));
259   }
260 
261   /* Post the Isends with the message length-info */
262   for (i = 0, j = 0; i < size; ++i) {
263     if (ilengths1[i]) {
264       buf_j    = buf_s + (2 * j);
265       buf_j[0] = *(ilengths1 + i);
266       buf_j[1] = *(ilengths2 + i);
267       PetscCallMPI(MPIU_Isend(buf_j, 2, MPI_INT, i, tag, comm, s_waits + j));
268       j++;
269     }
270   }
271   PetscCheck(j == nsends, PETSC_COMM_SELF, PETSC_ERR_PLIB, "j %d not equal to expected number of sends %d", j, nsends);
272 
273   /* Post waits on sends and receives */
274   if (nrecvs + nsends) PetscCallMPI(MPI_Waitall(nrecvs + nsends, r_waits, w_status));
275 
276   /* Pack up the received data */
277   PetscCall(PetscMalloc1(nrecvs + 1, onodes));
278   for (i = 0; i < nrecvs; ++i) {
279     (*onodes)[i]    = w_status[i].MPI_SOURCE;
280     buf_j           = buf_r + (2 * i);
281     (*olengths1)[i] = buf_j[0];
282     (*olengths2)[i] = buf_j[1];
283   }
284 
285   PetscCall(PetscFree4(r_waits, buf_r, buf_s, w_status));
286   PetscFunctionReturn(PETSC_SUCCESS);
287 }
288 
289 /*
290   Allocate a buffer sufficient to hold messages of size specified in olengths.
291   And post Irecvs on these buffers using node info from onodes
292  */
293 PetscErrorCode PetscPostIrecvInt(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscInt ***rbuf, MPI_Request **r_waits)
294 {
295   PetscInt   **rbuf_t, i, len = 0;
296   MPI_Request *r_waits_t;
297 
298   PetscFunctionBegin;
299   /* compute memory required for recv buffers */
300   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */
301 
302   /* allocate memory for recv buffers */
303   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
304   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
305   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];
306 
307   /* Post the receives */
308   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
309   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPIU_Irecv(rbuf_t[i], olengths[i], MPIU_INT, onodes[i], tag, comm, r_waits_t + i));
310 
311   *rbuf    = rbuf_t;
312   *r_waits = r_waits_t;
313   PetscFunctionReturn(PETSC_SUCCESS);
314 }
315 
316 PetscErrorCode PetscPostIrecvScalar(MPI_Comm comm, PetscMPIInt tag, PetscMPIInt nrecvs, const PetscMPIInt onodes[], const PetscMPIInt olengths[], PetscScalar ***rbuf, MPI_Request **r_waits)
317 {
318   PetscMPIInt   i;
319   PetscScalar **rbuf_t;
320   MPI_Request  *r_waits_t;
321   PetscInt      len = 0;
322 
323   PetscFunctionBegin;
324   /* compute memory required for recv buffers */
325   for (i = 0; i < nrecvs; i++) len += olengths[i]; /* each message length */
326 
327   /* allocate memory for recv buffers */
328   PetscCall(PetscMalloc1(nrecvs + 1, &rbuf_t));
329   PetscCall(PetscMalloc1(len, &rbuf_t[0]));
330   for (i = 1; i < nrecvs; ++i) rbuf_t[i] = rbuf_t[i - 1] + olengths[i - 1];
331 
332   /* Post the receives */
333   PetscCall(PetscMalloc1(nrecvs, &r_waits_t));
334   for (i = 0; i < nrecvs; ++i) PetscCallMPI(MPIU_Irecv(rbuf_t[i], olengths[i], MPIU_SCALAR, onodes[i], tag, comm, r_waits_t + i));
335 
336   *rbuf    = rbuf_t;
337   *r_waits = r_waits_t;
338   PetscFunctionReturn(PETSC_SUCCESS);
339 }
340