1 #include <petscsys.h> /*I "petscsys.h" I*/
2 #include <petsc/private/petscimpl.h>
3
4 PetscLogEvent PETSC_BuildTwoSided;
5 PetscLogEvent PETSC_BuildTwoSidedF;
6
7 const char *const PetscBuildTwoSidedTypes[] = {"ALLREDUCE", "IBARRIER", "REDSCATTER", "PetscBuildTwoSidedType", "PETSC_BUILDTWOSIDED_", NULL};
8
9 static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;
10
11 /*@
12 PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication
13
14 Logically Collective
15
16 Input Parameters:
17 + comm - `PETSC_COMM_WORLD`
18 - twosided - algorithm to use in subsequent calls to `PetscCommBuildTwoSided()`
19
20 Level: developer
21
22 Note:
23 This option is currently global, but could be made per-communicator.
24
25 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedGetType()`, `PetscBuildTwoSidedType`
26 @*/
PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)27 PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm, PetscBuildTwoSidedType twosided)
28 {
29 PetscFunctionBegin;
30 if (PetscDefined(USE_DEBUG)) { /* We don't have a PetscObject so can't use PetscValidLogicalCollectiveEnum */
31 PetscMPIInt b1[2], b2[2];
32 b1[0] = -(PetscMPIInt)twosided;
33 b1[1] = (PetscMPIInt)twosided;
34 PetscCallMPI(MPIU_Allreduce(b1, b2, 2, MPI_INT, MPI_MAX, comm));
35 PetscCheck(-b2[0] == b2[1], comm, PETSC_ERR_ARG_WRONG, "Enum value must be same on all processes");
36 }
37 _twosided_type = twosided;
38 PetscFunctionReturn(PETSC_SUCCESS);
39 }
40
41 /*@
42 PetscCommBuildTwoSidedGetType - get algorithm used when building two-sided communication
43
44 Logically Collective
45
46 Output Parameters:
47 + comm - communicator on which to query algorithm
48 - twosided - algorithm to use for `PetscCommBuildTwoSided()`
49
50 Level: developer
51
52 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedSetType()`, `PetscBuildTwoSidedType`
53 @*/
PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType * twosided)54 PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm, PetscBuildTwoSidedType *twosided)
55 {
56 PetscMPIInt size;
57
58 PetscFunctionBegin;
59 *twosided = PETSC_BUILDTWOSIDED_NOTSET;
60 if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
61 PetscCallMPI(MPI_Comm_size(comm, &size));
62 _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE; /* default for small comms, see https://gitlab.com/petsc/petsc/-/merge_requests/2611 */
63 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
64 if (size > 1024) _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
65 #endif
66 PetscCall(PetscOptionsGetEnum(NULL, NULL, "-build_twosided", PetscBuildTwoSidedTypes, (PetscEnum *)&_twosided_type, NULL));
67 }
68 *twosided = _twosided_type;
69 PetscFunctionReturn(PETSC_SUCCESS);
70 }
71
72 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt * toranks,const void * todata,PetscMPIInt * nfrom,PetscMPIInt ** fromranks,void * fromdata)73 static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
74 {
75 PetscMPIInt nrecvs, tag, done, i;
76 MPI_Aint lb, unitbytes;
77 char *tdata;
78 MPI_Request *sendreqs, barrier;
79 PetscSegBuffer segrank, segdata;
80 PetscBool barrier_started;
81
82 PetscFunctionBegin;
83 PetscCall(PetscCommDuplicate(comm, &comm, &tag));
84 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
85 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
86 tdata = (char *)todata;
87 PetscCall(PetscMalloc1(nto, &sendreqs));
88 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
89 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank));
90 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata));
91
92 nrecvs = 0;
93 barrier = MPI_REQUEST_NULL;
94 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
95 * but we need to work around it. */
96 barrier_started = PETSC_FALSE;
97 for (done = 0; !done;) {
98 PetscMPIInt flag;
99 MPI_Status status;
100 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status));
101 if (flag) { /* incoming message */
102 PetscMPIInt *recvrank;
103 void *buf;
104 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank));
105 PetscCall(PetscSegBufferGet(segdata, count, &buf));
106 *recvrank = status.MPI_SOURCE;
107 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE));
108 nrecvs++;
109 }
110 if (!barrier_started) {
111 PetscMPIInt sent, nsends;
112 PetscCall(PetscMPIIntCast(nto, &nsends));
113 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE));
114 if (sent) {
115 PetscCallMPI(MPI_Ibarrier(comm, &barrier));
116 barrier_started = PETSC_TRUE;
117 PetscCall(PetscFree(sendreqs));
118 }
119 } else {
120 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE));
121 }
122 }
123 *nfrom = nrecvs;
124 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks));
125 PetscCall(PetscSegBufferDestroy(&segrank));
126 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata));
127 PetscCall(PetscSegBufferDestroy(&segdata));
128 PetscCall(PetscCommDestroy(&comm));
129 PetscFunctionReturn(PETSC_SUCCESS);
130 }
131 #endif
132
PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt * toranks,const void * todata,PetscMPIInt * nfrom,PetscMPIInt ** fromranks,void * fromdata)133 static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
134 {
135 PetscMPIInt size, rank, *iflags, nrecvs, tag, *franks, i, flg;
136 MPI_Aint lb, unitbytes;
137 char *tdata, *fdata;
138 MPI_Request *reqs, *sendreqs;
139 MPI_Status *statuses;
140 PetscCommCounter *counter;
141
142 PetscFunctionBegin;
143 PetscCallMPI(MPI_Comm_size(comm, &size));
144 PetscCallMPI(MPI_Comm_rank(comm, &rank));
145 PetscCall(PetscCommDuplicate(comm, &comm, &tag));
146 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
147 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
148 if (!counter->iflags) {
149 PetscCall(PetscCalloc1(size, &counter->iflags));
150 iflags = counter->iflags;
151 } else {
152 iflags = counter->iflags;
153 PetscCall(PetscArrayzero(iflags, size));
154 }
155 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1;
156 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, iflags, size, MPI_INT, MPI_SUM, comm));
157 nrecvs = iflags[rank];
158 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
159 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
160 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata));
161 tdata = (char *)todata;
162 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses));
163 sendreqs = PetscSafePointerPlusOffset(reqs, nrecvs);
164 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPIU_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i));
165 for (i = 0; i < nto; i++) PetscCallMPI(MPIU_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
166 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses));
167 PetscCall(PetscMalloc1(nrecvs, &franks));
168 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
169 PetscCall(PetscFree2(reqs, statuses));
170 PetscCall(PetscCommDestroy(&comm));
171
172 *nfrom = nrecvs;
173 *fromranks = franks;
174 *(void **)fromdata = fdata;
175 PetscFunctionReturn(PETSC_SUCCESS);
176 }
177
178 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt * toranks,const void * todata,PetscMPIInt * nfrom,PetscMPIInt ** fromranks,void * fromdata)179 static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata)
180 {
181 PetscMPIInt size, *iflags, nrecvs, tag, *franks, i, flg;
182 MPI_Aint lb, unitbytes;
183 char *tdata, *fdata;
184 MPI_Request *reqs, *sendreqs;
185 MPI_Status *statuses;
186 PetscCommCounter *counter;
187
188 PetscFunctionBegin;
189 PetscCallMPI(MPI_Comm_size(comm, &size));
190 PetscCall(PetscCommDuplicate(comm, &comm, &tag));
191 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Counter_keyval, &counter, &flg));
192 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inner PETSc communicator does not have its tag/name counter attribute set");
193 if (!counter->iflags) {
194 PetscCall(PetscCalloc1(size, &counter->iflags));
195 iflags = counter->iflags;
196 } else {
197 iflags = counter->iflags;
198 PetscCall(PetscArrayzero(iflags, size));
199 }
200 for (i = 0; i < nto; i++) iflags[toranks[i]] = 1;
201 PetscCallMPI(MPI_Reduce_scatter_block(iflags, &nrecvs, 1, MPI_INT, MPI_SUM, comm));
202 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
203 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
204 PetscCall(PetscMalloc(nrecvs * count * unitbytes, &fdata));
205 tdata = (char *)todata;
206 PetscCall(PetscMalloc2(nto + nrecvs, &reqs, nto + nrecvs, &statuses));
207 sendreqs = reqs + nrecvs;
208 for (i = 0; i < nrecvs; i++) PetscCallMPI(MPIU_Irecv((void *)(fdata + count * unitbytes * i), count, dtype, MPI_ANY_SOURCE, tag, comm, reqs + i));
209 for (i = 0; i < nto; i++) PetscCallMPI(MPIU_Isend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
210 PetscCallMPI(MPI_Waitall(nto + nrecvs, reqs, statuses));
211 PetscCall(PetscMalloc1(nrecvs, &franks));
212 for (i = 0; i < nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
213 PetscCall(PetscFree2(reqs, statuses));
214 PetscCall(PetscCommDestroy(&comm));
215
216 *nfrom = nrecvs;
217 *fromranks = franks;
218 *(void **)fromdata = fdata;
219 PetscFunctionReturn(PETSC_SUCCESS);
220 }
221 #endif
222
223 /*@C
224 PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)
225
226 Collective, No Fortran Support
227
228 Input Parameters:
229 + comm - communicator
230 . count - number of entries to send/receive (must match on all ranks)
231 . dtype - datatype to send/receive from each rank (must match on all ranks)
232 . nto - number of ranks to send data to
233 . toranks - ranks to send to (array of length `nto`)
234 - todata - data to send to each rank (packed)
235
236 Output Parameters:
237 + nfrom - number of ranks receiving messages from
238 . fromranks - ranks receiving messages from (length `nfrom`, caller should `PetscFree()`)
239 - fromdata - packed data from each rank, each with count entries of type dtype (length `nfrom`, caller responsible for `PetscFree()`)
240
241 Options Database Key:
242 . -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication. Default is allreduce for communicators with <= 1024 ranks,
243 otherwise ibarrier.
244
245 Level: developer
246
247 Notes:
248 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
249 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other constant-size data, see {cite}`hoeflersiebretlumsdaine10`.
250
251 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
252
253 .seealso: `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`, `PetscCommBuildTwoSidedSetType()`, `PetscCommBuildTwoSidedType`
254 @*/
PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt toranks[],const void * todata,PetscMPIInt * nfrom,PetscMPIInt * fromranks[],void * fromdata)255 PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt toranks[], const void *todata, PetscMPIInt *nfrom, PetscMPIInt *fromranks[], void *fromdata)
256 {
257 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
258
259 PetscFunctionBegin;
260 PetscCall(PetscSysInitializePackage());
261 PetscCall(PetscLogEventSync(PETSC_BuildTwoSided, comm));
262 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSided, 0, 0, 0, 0));
263 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype));
264 switch (buildtype) {
265 case PETSC_BUILDTWOSIDED_IBARRIER:
266 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
267 PetscCall(PetscCommBuildTwoSided_Ibarrier(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
268 break;
269 #else
270 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
271 #endif
272 case PETSC_BUILDTWOSIDED_ALLREDUCE:
273 PetscCall(PetscCommBuildTwoSided_Allreduce(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
274 break;
275 case PETSC_BUILDTWOSIDED_REDSCATTER:
276 #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
277 PetscCall(PetscCommBuildTwoSided_RedScatter(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
278 break;
279 #else
280 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
281 #endif
282 default:
283 SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication");
284 }
285 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSided, 0, 0, 0, 0));
286 PetscFunctionReturn(PETSC_SUCCESS);
287 }
288
PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt * toranks,const void * todata,PetscMPIInt * nfrom,PetscMPIInt ** fromranks,void * fromdata,PetscMPIInt ntags,MPI_Request ** toreqs,MPI_Request ** fromreqs,PetscErrorCode (* send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void *,MPI_Request[],void *),PetscErrorCode (* recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void *,MPI_Request[],void *),PetscCtx ctx)289 static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx)
290 {
291 PetscMPIInt i, *tag;
292 MPI_Aint lb, unitbytes;
293 MPI_Request *sendreq, *recvreq;
294
295 PetscFunctionBegin;
296 PetscCall(PetscMalloc1(ntags, &tag));
297 if (ntags > 0) PetscCall(PetscCommDuplicate(comm, &comm, &tag[0]));
298 for (i = 1; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tag[i]));
299
300 /* Perform complete initial rendezvous */
301 PetscCall(PetscCommBuildTwoSided(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata));
302
303 PetscCall(PetscMalloc1(nto * ntags, &sendreq));
304 PetscCall(PetscMalloc1(*nfrom * ntags, &recvreq));
305
306 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
307 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
308 for (i = 0; i < nto; i++) {
309 PetscMPIInt k;
310 for (k = 0; k < ntags; k++) sendreq[i * ntags + k] = MPI_REQUEST_NULL;
311 PetscCall((*send)(comm, tag, i, toranks[i], ((char *)todata) + count * unitbytes * i, sendreq + i * ntags, ctx));
312 }
313 for (i = 0; i < *nfrom; i++) {
314 void *header = (*(char **)fromdata) + count * unitbytes * i;
315 PetscMPIInt k;
316 for (k = 0; k < ntags; k++) recvreq[i * ntags + k] = MPI_REQUEST_NULL;
317 PetscCall((*recv)(comm, tag, (*fromranks)[i], header, recvreq + i * ntags, ctx));
318 }
319 PetscCall(PetscFree(tag));
320 PetscCall(PetscCommDestroy(&comm));
321 *toreqs = sendreq;
322 *fromreqs = recvreq;
323 PetscFunctionReturn(PETSC_SUCCESS);
324 }
325
326 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
327
PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt * toranks,const void * todata,PetscMPIInt * nfrom,PetscMPIInt ** fromranks,void * fromdata,PetscMPIInt ntags,MPI_Request ** toreqs,MPI_Request ** fromreqs,PetscErrorCode (* send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void *,MPI_Request[],void *),PetscErrorCode (* recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void *,MPI_Request[],void *),PetscCtx ctx)328 static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx)
329 {
330 PetscMPIInt nrecvs, tag, *tags, done, i;
331 MPI_Aint lb, unitbytes;
332 char *tdata;
333 MPI_Request *sendreqs, *usendreqs, *req, barrier;
334 PetscSegBuffer segrank, segdata, segreq;
335 PetscBool barrier_started;
336
337 PetscFunctionBegin;
338 PetscCall(PetscCommDuplicate(comm, &comm, &tag));
339 PetscCall(PetscMalloc1(ntags, &tags));
340 for (i = 0; i < ntags; i++) PetscCall(PetscCommGetNewTag(comm, &tags[i]));
341 PetscCallMPI(MPI_Type_get_extent(dtype, &lb, &unitbytes));
342 PetscCheck(lb == 0, comm, PETSC_ERR_SUP, "Datatype with nonzero lower bound %ld", (long)lb);
343 tdata = (char *)todata;
344 PetscCall(PetscMalloc1(nto, &sendreqs));
345 PetscCall(PetscMalloc1(nto * ntags, &usendreqs));
346 /* Post synchronous sends */
347 for (i = 0; i < nto; i++) PetscCallMPI(MPI_Issend((void *)(tdata + count * unitbytes * i), count, dtype, toranks[i], tag, comm, sendreqs + i));
348 /* Post actual payloads. These are typically larger messages. Hopefully sending these later does not slow down the
349 * synchronous messages above. */
350 for (i = 0; i < nto; i++) {
351 PetscMPIInt k;
352 for (k = 0; k < ntags; k++) usendreqs[i * ntags + k] = MPI_REQUEST_NULL;
353 PetscCall((*send)(comm, tags, i, toranks[i], tdata + count * unitbytes * i, usendreqs + i * ntags, ctx));
354 }
355
356 PetscCall(PetscSegBufferCreate(sizeof(PetscMPIInt), 4, &segrank));
357 PetscCall(PetscSegBufferCreate(unitbytes, 4 * count, &segdata));
358 PetscCall(PetscSegBufferCreate(sizeof(MPI_Request), 4, &segreq));
359
360 nrecvs = 0;
361 barrier = MPI_REQUEST_NULL;
362 /* MPICH-3.2 sometimes does not create a request in some "optimized" cases. This is arguably a standard violation,
363 * but we need to work around it. */
364 barrier_started = PETSC_FALSE;
365 for (done = 0; !done;) {
366 PetscMPIInt flag;
367 MPI_Status status;
368 PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag, comm, &flag, &status));
369 if (flag) { /* incoming message */
370 PetscMPIInt *recvrank, k;
371 void *buf;
372 PetscCall(PetscSegBufferGet(segrank, 1, &recvrank));
373 PetscCall(PetscSegBufferGet(segdata, count, &buf));
374 *recvrank = status.MPI_SOURCE;
375 PetscCallMPI(MPI_Recv(buf, count, dtype, status.MPI_SOURCE, tag, comm, MPI_STATUS_IGNORE));
376 PetscCall(PetscSegBufferGet(segreq, ntags, &req));
377 for (k = 0; k < ntags; k++) req[k] = MPI_REQUEST_NULL;
378 PetscCall((*recv)(comm, tags, status.MPI_SOURCE, buf, req, ctx));
379 nrecvs++;
380 }
381 if (!barrier_started) {
382 PetscMPIInt sent, nsends;
383 PetscCall(PetscMPIIntCast(nto, &nsends));
384 PetscCallMPI(MPI_Testall(nsends, sendreqs, &sent, MPI_STATUSES_IGNORE));
385 if (sent) {
386 PetscCallMPI(MPI_Ibarrier(comm, &barrier));
387 barrier_started = PETSC_TRUE;
388 }
389 } else {
390 PetscCallMPI(MPI_Test(&barrier, &done, MPI_STATUS_IGNORE));
391 }
392 }
393 *nfrom = nrecvs;
394 PetscCall(PetscSegBufferExtractAlloc(segrank, fromranks));
395 PetscCall(PetscSegBufferDestroy(&segrank));
396 PetscCall(PetscSegBufferExtractAlloc(segdata, fromdata));
397 PetscCall(PetscSegBufferDestroy(&segdata));
398 *toreqs = usendreqs;
399 PetscCall(PetscSegBufferExtractAlloc(segreq, fromreqs));
400 PetscCall(PetscSegBufferDestroy(&segreq));
401 PetscCall(PetscFree(sendreqs));
402 PetscCall(PetscFree(tags));
403 PetscCall(PetscCommDestroy(&comm));
404 PetscFunctionReturn(PETSC_SUCCESS);
405 }
406 #endif
407
408 /*@C
409 PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous
410
411 Collective, No Fortran Support
412
413 Input Parameters:
414 + comm - communicator
415 . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
416 . dtype - datatype to send/receive from each rank (must match on all ranks)
417 . nto - number of ranks to send data to
418 . toranks - ranks to send to (array of length nto)
419 . todata - data to send to each rank (packed)
420 . ntags - number of tags needed by send/recv callbacks
421 . send - callback invoked on sending process when ready to send primary payload
422 . recv - callback invoked on receiving process after delivery of rendezvous message
423 - ctx - context for callbacks
424
425 Output Parameters:
426 + nfrom - number of ranks receiving messages from
427 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
428 - fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
429
430 Level: developer
431
432 Notes:
433 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
434 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data, {cite}`hoeflersiebretlumsdaine10`.
435
436 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
437
438 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedFReq()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`
439 @*/
PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt * toranks,const void * todata,PetscMPIInt * nfrom,PetscMPIInt ** fromranks,void * fromdata,PetscMPIInt ntags,PetscErrorCode (* send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void *,MPI_Request[],void *),PetscErrorCode (* recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void *,MPI_Request[],void *),PetscCtx ctx)440 PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx)
441 {
442 MPI_Request *toreqs, *fromreqs;
443
444 PetscFunctionBegin;
445 PetscCall(PetscCommBuildTwoSidedFReq(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, &toreqs, &fromreqs, send, recv, ctx));
446 PetscCallMPI(MPI_Waitall(nto * ntags, toreqs, MPI_STATUSES_IGNORE));
447 PetscCallMPI(MPI_Waitall(*nfrom * ntags, fromreqs, MPI_STATUSES_IGNORE));
448 PetscCall(PetscFree(toreqs));
449 PetscCall(PetscFree(fromreqs));
450 PetscFunctionReturn(PETSC_SUCCESS);
451 }
452
453 /*@C
454 PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests
455
456 Collective, No Fortran Support
457
458 Input Parameters:
459 + comm - communicator
460 . count - number of entries to send/receive in initial rendezvous (must match on all ranks)
461 . dtype - datatype to send/receive from each rank (must match on all ranks)
462 . nto - number of ranks to send data to
463 . toranks - ranks to send to (array of length nto)
464 . todata - data to send to each rank (packed)
465 . ntags - number of tags needed by send/recv callbacks
466 . send - callback invoked on sending process when ready to send primary payload
467 . recv - callback invoked on receiving process after delivery of rendezvous message
468 - ctx - context for callbacks
469
470 Output Parameters:
471 + nfrom - number of ranks receiving messages from
472 . fromranks - ranks receiving messages from (length nfrom; caller should `PetscFree()`)
473 . fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for `PetscFree()`)
474 . toreqs - array of nto*ntags sender requests (caller must wait on these, then `PetscFree()`)
475 - fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then `PetscFree()`)
476
477 Level: developer
478
479 Notes:
480 This memory-scalable interface is an alternative to calling `PetscGatherNumberOfMessages()` and
481 `PetscGatherMessageLengths()`, possibly with a subsequent round of communication to send other data, {cite}`hoeflersiebretlumsdaine10`.
482
483 Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.
484
485 .seealso: `PetscCommBuildTwoSided()`, `PetscCommBuildTwoSidedF()`, `PetscGatherNumberOfMessages()`, `PetscGatherMessageLengths()`
486 @*/
PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt * toranks,const void * todata,PetscMPIInt * nfrom,PetscMPIInt ** fromranks,void * fromdata,PetscMPIInt ntags,MPI_Request ** toreqs,MPI_Request ** fromreqs,PetscErrorCode (* send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void *,MPI_Request[],void *),PetscErrorCode (* recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void *,MPI_Request[],void *),PetscCtx ctx)487 PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm, PetscMPIInt count, MPI_Datatype dtype, PetscMPIInt nto, const PetscMPIInt *toranks, const void *todata, PetscMPIInt *nfrom, PetscMPIInt **fromranks, void *fromdata, PetscMPIInt ntags, MPI_Request **toreqs, MPI_Request **fromreqs, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx)
488 {
489 PetscErrorCode (*f)(MPI_Comm, PetscMPIInt, MPI_Datatype, PetscMPIInt, const PetscMPIInt[], const void *, PetscMPIInt *, PetscMPIInt **, void *, PetscMPIInt, MPI_Request **, MPI_Request **, PetscErrorCode (*send)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, PetscMPIInt, void *, MPI_Request[], void *), PetscErrorCode (*recv)(MPI_Comm, const PetscMPIInt[], PetscMPIInt, void *, MPI_Request[], void *), PetscCtx ctx);
490 PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
491 PetscMPIInt i, size;
492
493 PetscFunctionBegin;
494 PetscCall(PetscSysInitializePackage());
495 PetscCallMPI(MPI_Comm_size(comm, &size));
496 for (i = 0; i < nto; i++) PetscCheck(toranks[i] >= 0 && size > toranks[i], comm, PETSC_ERR_ARG_OUTOFRANGE, "toranks[%d] %d not in comm size %d", i, toranks[i], size);
497 PetscCall(PetscLogEventSync(PETSC_BuildTwoSidedF, comm));
498 PetscCall(PetscLogEventBegin(PETSC_BuildTwoSidedF, 0, 0, 0, 0));
499 PetscCall(PetscCommBuildTwoSidedGetType(comm, &buildtype));
500 switch (buildtype) {
501 case PETSC_BUILDTWOSIDED_IBARRIER:
502 #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
503 f = PetscCommBuildTwoSidedFReq_Ibarrier;
504 break;
505 #else
506 SETERRQ(comm, PETSC_ERR_PLIB, "MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
507 #endif
508 case PETSC_BUILDTWOSIDED_ALLREDUCE:
509 case PETSC_BUILDTWOSIDED_REDSCATTER:
510 f = PetscCommBuildTwoSidedFReq_Reference;
511 break;
512 default:
513 SETERRQ(comm, PETSC_ERR_PLIB, "Unknown method for building two-sided communication");
514 }
515 PetscCall((*f)(comm, count, dtype, nto, toranks, todata, nfrom, fromranks, fromdata, ntags, toreqs, fromreqs, send, recv, ctx));
516 PetscCall(PetscLogEventEnd(PETSC_BuildTwoSidedF, 0, 0, 0, 0));
517 PetscFunctionReturn(PETSC_SUCCESS);
518 }
519