1 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2 #include <../src/vec/is/sf/impls/basic/sfpack.h>
3 #include <petsc/private/viewerimpl.h>
4
5 // Init persistent MPI send/recv requests
PetscSFLinkInitMPIRequests_Persistent_Basic(PetscSF sf,PetscSFLink link,PetscSFDirection direction)6 static PetscErrorCode PetscSFLinkInitMPIRequests_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
7 {
8 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
9 PetscInt cnt;
10 PetscMPIInt nrootranks, ndrootranks, nleafranks, ndleafranks;
11 const PetscInt *rootoffset, *leafoffset;
12 MPI_Aint disp;
13 MPI_Comm comm = PetscObjectComm((PetscObject)sf);
14 MPI_Datatype unit = link->unit;
15 const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
16 const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
17
18 PetscFunctionBegin;
19 if (bas->rootbuflen[PETSCSF_REMOTE] && !link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi]) {
20 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
21 if (direction == PETSCSF_LEAF2ROOT) {
22 for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
23 disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
24 cnt = rootoffset[i + 1] - rootoffset[i];
25 PetscCallMPI(MPIU_Recv_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
26 }
27 } else { /* PETSCSF_ROOT2LEAF */
28 for (PetscMPIInt i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
29 disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
30 cnt = rootoffset[i + 1] - rootoffset[i];
31 PetscCallMPI(MPIU_Send_init(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
32 }
33 }
34 link->rootreqsinited[direction][rootmtype_mpi][rootdirect_mpi] = PETSC_TRUE;
35 }
36
37 if (sf->leafbuflen[PETSCSF_REMOTE] && !link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi]) {
38 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
39 if (direction == PETSCSF_LEAF2ROOT) {
40 for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
41 disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
42 cnt = leafoffset[i + 1] - leafoffset[i];
43 PetscCallMPI(MPIU_Send_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
44 }
45 } else { /* PETSCSF_ROOT2LEAF */
46 for (PetscMPIInt i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
47 disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
48 cnt = leafoffset[i + 1] - leafoffset[i];
49 PetscCallMPI(MPIU_Recv_init(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
50 }
51 }
52 link->leafreqsinited[direction][leafmtype_mpi][leafdirect_mpi] = PETSC_TRUE;
53 }
54 PetscFunctionReturn(PETSC_SUCCESS);
55 }
56
57 // Start MPI requests. If use non-GPU aware MPI, we might need to copy data from device buf to host buf
PetscSFLinkStartCommunication_Persistent_Basic(PetscSF sf,PetscSFLink link,PetscSFDirection direction)58 static PetscErrorCode PetscSFLinkStartCommunication_Persistent_Basic(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
59 {
60 PetscMPIInt nsreqs = 0, nrreqs = 0;
61 MPI_Request *sreqs = NULL, *rreqs = NULL;
62 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
63 PetscInt sbuflen, rbuflen;
64
65 PetscFunctionBegin;
66 rbuflen = (direction == PETSCSF_ROOT2LEAF) ? sf->leafbuflen[PETSCSF_REMOTE] : bas->rootbuflen[PETSCSF_REMOTE];
67 if (rbuflen) {
68 if (direction == PETSCSF_ROOT2LEAF) {
69 nrreqs = sf->nleafreqs;
70 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &rreqs));
71 } else { /* leaf to root */
72 nrreqs = bas->nrootreqs;
73 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &rreqs, NULL));
74 }
75 }
76
77 sbuflen = (direction == PETSCSF_ROOT2LEAF) ? bas->rootbuflen[PETSCSF_REMOTE] : sf->leafbuflen[PETSCSF_REMOTE];
78 if (sbuflen) {
79 if (direction == PETSCSF_ROOT2LEAF) {
80 nsreqs = bas->nrootreqs;
81 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /*device2host before sending */));
82 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, &sreqs, NULL));
83 } else { /* leaf to root */
84 nsreqs = sf->nleafreqs;
85 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE));
86 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, NULL, NULL, NULL, &sreqs));
87 }
88 }
89 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link)); // need to sync the stream to make BOTH sendbuf and recvbuf ready
90 if (rbuflen) PetscCallMPI(MPI_Startall_irecv(rbuflen, link->unit, nrreqs, rreqs));
91 if (sbuflen) PetscCallMPI(MPI_Startall_isend(sbuflen, link->unit, nsreqs, sreqs));
92 PetscFunctionReturn(PETSC_SUCCESS);
93 }
94
95 #if defined(PETSC_HAVE_MPIX_STREAM)
96 // issue MPIX_Isend/Irecv_enqueue()
PetscSFLinkStartCommunication_MPIX_Stream(PetscSF sf,PetscSFLink link,PetscSFDirection direction)97 static PetscErrorCode PetscSFLinkStartCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
98 {
99 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
100 PetscInt i, j;
101 PetscMPIInt nrootranks, ndrootranks, nleafranks, ndleafranks, cnt;
102 const PetscInt *rootoffset, *leafoffset;
103 MPI_Aint disp;
104 MPI_Comm stream_comm = sf->stream_comm;
105 MPI_Datatype unit = link->unit;
106 const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi; /* Used to select buffers passed to MPI */
107 const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
108
109 PetscFunctionBegin;
110 if (bas->rootbuflen[PETSCSF_REMOTE]) {
111 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, NULL, &rootoffset, NULL));
112 if (direction == PETSCSF_LEAF2ROOT) {
113 for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
114 disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
115 cnt = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]);
116 PetscCallMPI(MPIX_Irecv_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
117 }
118 } else { // PETSCSF_ROOT2LEAF
119 for (i = ndrootranks, j = 0; i < nrootranks; i++, j++) {
120 disp = (rootoffset[i] - rootoffset[ndrootranks]) * link->unitbytes;
121 cnt = (PetscMPIInt)(rootoffset[i + 1] - rootoffset[i]);
122 // no need to sync the gpu stream!
123 PetscCallMPI(MPIX_Isend_enqueue(link->rootbuf[PETSCSF_REMOTE][rootmtype_mpi] + disp, cnt, unit, bas->iranks[i], link->tag, stream_comm, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi] + j));
124 }
125 }
126 }
127
128 if (sf->leafbuflen[PETSCSF_REMOTE]) {
129 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nleafranks, &ndleafranks, NULL, &leafoffset, NULL, NULL));
130 if (direction == PETSCSF_LEAF2ROOT) {
131 for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
132 disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
133 cnt = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]);
134 // no need to sync the gpu stream!
135 PetscCallMPI(MPIX_Isend_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
136 }
137 } else { // PETSCSF_ROOT2LEAF
138 for (i = ndleafranks, j = 0; i < nleafranks; i++, j++) {
139 disp = (leafoffset[i] - leafoffset[ndleafranks]) * link->unitbytes;
140 cnt = (PetscMPIInt)(leafoffset[i + 1] - leafoffset[i]);
141 PetscCallMPI(MPIX_Irecv_enqueue(link->leafbuf[PETSCSF_REMOTE][leafmtype_mpi] + disp, cnt, unit, sf->ranks[i], link->tag, stream_comm, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi] + j));
142 }
143 }
144 }
145 PetscFunctionReturn(PETSC_SUCCESS);
146 }
147
PetscSFLinkFinishCommunication_MPIX_Stream(PetscSF sf,PetscSFLink link,PetscSFDirection direction)148 static PetscErrorCode PetscSFLinkFinishCommunication_MPIX_Stream(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
149 {
150 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
151 const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
152 const PetscInt rootdirect_mpi = link->rootdirect_mpi, leafdirect_mpi = link->leafdirect_mpi;
153
154 PetscFunctionBegin;
155 PetscCallMPI(MPIX_Waitall_enqueue(bas->nrootreqs, link->rootreqs[direction][rootmtype_mpi][rootdirect_mpi], MPI_STATUSES_IGNORE));
156 PetscCallMPI(MPIX_Waitall_enqueue(sf->nleafreqs, link->leafreqs[direction][leafmtype_mpi][leafdirect_mpi], MPI_STATUSES_IGNORE));
157 PetscFunctionReturn(PETSC_SUCCESS);
158 }
159 #endif
160
PetscSFSetCommunicationOps_Basic(PetscSF sf,PetscSFLink link)161 static PetscErrorCode PetscSFSetCommunicationOps_Basic(PetscSF sf, PetscSFLink link)
162 {
163 PetscFunctionBegin;
164 link->InitMPIRequests = PetscSFLinkInitMPIRequests_Persistent_Basic;
165 link->StartCommunication = PetscSFLinkStartCommunication_Persistent_Basic;
166 #if defined(PETSC_HAVE_MPIX_STREAM)
167 const PetscMemType rootmtype_mpi = link->rootmtype_mpi, leafmtype_mpi = link->leafmtype_mpi;
168 if (sf->use_stream_aware_mpi && (PetscMemTypeDevice(rootmtype_mpi) || PetscMemTypeDevice(leafmtype_mpi))) {
169 link->StartCommunication = PetscSFLinkStartCommunication_MPIX_Stream;
170 link->FinishCommunication = PetscSFLinkFinishCommunication_MPIX_Stream;
171 }
172 #endif
173 PetscFunctionReturn(PETSC_SUCCESS);
174 }
175
176 /*===================================================================================*/
177 /* SF public interface implementations */
178 /*===================================================================================*/
PetscSFSetUp_Basic(PetscSF sf)179 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
180 {
181 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
182 PetscInt *rlengths, *ilengths;
183 PetscMPIInt nRemoteRootRanks, nRemoteLeafRanks;
184 PetscMPIInt rank, niranks, *iranks, tag;
185 MPI_Comm comm;
186 MPI_Group group;
187 MPI_Request *rootreqs, *leafreqs;
188
189 PetscFunctionBegin;
190 PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
191 PetscCall(PetscSFSetUpRanks(sf, group));
192 PetscCallMPI(MPI_Group_free(&group));
193 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
194 PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
195 PetscCallMPI(MPI_Comm_rank(comm, &rank));
196 /*
197 * Inform roots about how many leaves and from which ranks
198 */
199 PetscCall(PetscMalloc1(sf->nranks, &rlengths));
200 /* Determine number, sending ranks and length of incoming */
201 for (PetscMPIInt i = 0; i < sf->nranks; i++) rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
202 nRemoteRootRanks = sf->nranks - sf->ndranks;
203 PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, PetscSafePointerPlusOffset(sf->ranks, sf->ndranks), PetscSafePointerPlusOffset(rlengths, sf->ndranks), &niranks, &iranks, (void **)&ilengths));
204
205 /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
206 We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
207 small and the sorting is cheap.
208 */
209 PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));
210
211 /* Partition into distinguished and non-distinguished incoming ranks */
212 bas->ndiranks = sf->ndranks;
213 bas->niranks = bas->ndiranks + niranks;
214 PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
215 bas->ioffset[0] = 0;
216 for (PetscMPIInt i = 0; i < bas->ndiranks; i++) {
217 bas->iranks[i] = sf->ranks[i];
218 bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
219 }
220 PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
221 for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) {
222 bas->iranks[i] = iranks[i - bas->ndiranks];
223 bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
224 }
225 bas->itotal = bas->ioffset[bas->niranks];
226 PetscCall(PetscFree(rlengths));
227 PetscCall(PetscFree(iranks));
228 PetscCall(PetscFree(ilengths));
229
230 /* Send leaf identities to roots */
231 nRemoteLeafRanks = bas->niranks - bas->ndiranks;
232 PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
233 PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
234 for (PetscMPIInt i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
235 for (PetscMPIInt i = 0; i < sf->nranks; i++) {
236 PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
237 if (i < sf->ndranks) {
238 PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
239 PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
240 PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
241 PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
242 continue;
243 }
244 PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
245 }
246 PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
247 PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));
248
249 sf->nleafreqs = nRemoteRootRanks;
250 bas->nrootreqs = nRemoteLeafRanks;
251
252 /* Setup fields related to packing, such as rootbuflen[] */
253 PetscCall(PetscSFSetUpPackFields(sf));
254 PetscCall(PetscFree2(rootreqs, leafreqs));
255 PetscFunctionReturn(PETSC_SUCCESS);
256 }
257
PetscSFReset_Basic(PetscSF sf)258 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
259 {
260 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
261 PetscSFLink link = bas->avail, next;
262
263 PetscFunctionBegin;
264 PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
265 PetscCall(PetscFree2(bas->iranks, bas->ioffset));
266 PetscCall(PetscFree(bas->irootloc));
267
268 #if defined(PETSC_HAVE_DEVICE)
269 for (int i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
270 #endif
271
272 #if defined(PETSC_HAVE_NVSHMEM)
273 PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
274 #endif
275
276 for (; link; link = next) {
277 next = link->next;
278 PetscCall(PetscSFLinkDestroy(sf, link));
279 }
280 bas->avail = NULL;
281 PetscCall(PetscSFResetPackFields(sf));
282 PetscFunctionReturn(PETSC_SUCCESS);
283 }
284
PetscSFDestroy_Basic(PetscSF sf)285 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
286 {
287 PetscFunctionBegin;
288 PetscCall(PetscSFReset_Basic(sf));
289 PetscCall(PetscFree(sf->data));
290 PetscFunctionReturn(PETSC_SUCCESS);
291 }
292
293 #if defined(PETSC_USE_SINGLE_LIBRARY)
294 #include <petscmat.h>
295
PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)296 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
297 {
298 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
299 PetscMPIInt nrootranks, ndrootranks;
300 const PetscInt *rootoffset;
301 PetscMPIInt rank, size;
302 const PetscMPIInt *rootranks;
303 MPI_Comm comm = PetscObjectComm((PetscObject)sf);
304 PetscScalar unitbytes;
305 Mat A;
306
307 PetscFunctionBegin;
308 PetscCallMPI(MPI_Comm_size(comm, &size));
309 PetscCallMPI(MPI_Comm_rank(comm, &rank));
310 /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
311 PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix
312 in senders' view point: how many bytes I will send to my neighbors.
313
314 Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
315
316 If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
317 different interpretations for the same SF for different data types. Since we most care about VecScatter,
318 we uniformly treat each vertex as a PetscScalar.
319 */
320 unitbytes = (PetscScalar)sizeof(PetscScalar);
321
322 PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
323 PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
324 PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
325 for (PetscMPIInt i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
326 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
327 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
328 PetscCall(MatView(A, viewer));
329 PetscCall(MatDestroy(&A));
330 PetscFunctionReturn(PETSC_SUCCESS);
331 }
332 #endif
333
PetscSFView_Basic(PetscSF sf,PetscViewer viewer)334 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
335 {
336 PetscBool isascii;
337
338 PetscFunctionBegin;
339 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
340 if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, " MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
341 #if defined(PETSC_USE_SINGLE_LIBRARY)
342 else {
343 PetscBool isdraw, isbinary;
344 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
345 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
346 if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
347 }
348 #endif
349 PetscFunctionReturn(PETSC_SUCCESS);
350 }
351
PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void * rootdata,PetscMemType leafmtype,void * leafdata,MPI_Op op)352 PETSC_INTERN PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
353 {
354 PetscSFLink link = NULL;
355
356 PetscFunctionBegin;
357 /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
358 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
359 /* Pack rootdata to rootbuf for remote communication */
360 PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
361 /* Start communication, e.g., post MPIU_Isend */
362 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
363 /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
364 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
365 PetscFunctionReturn(PETSC_SUCCESS);
366 }
367
PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void * rootdata,void * leafdata,MPI_Op op)368 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
369 {
370 PetscSFLink link = NULL;
371
372 PetscFunctionBegin;
373 /* Retrieve the link used in XxxBegin() with root/leafdata as key */
374 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
375 /* Finish remote communication, e.g., post MPI_Waitall */
376 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
377 /* Unpack data in leafbuf to leafdata for remote communication */
378 PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
379 /* Recycle the link */
380 PetscCall(PetscSFLinkReclaim(sf, &link));
381 PetscFunctionReturn(PETSC_SUCCESS);
382 }
383
384 /* Shared by ReduceBegin and FetchAndOpBegin */
PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void * leafdata,PetscMemType rootmtype,void * rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink * out)385 static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
386 {
387 PetscSFLink link = NULL;
388
389 PetscFunctionBegin;
390 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
391 PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
392 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
393 *out = link;
394 PetscFunctionReturn(PETSC_SUCCESS);
395 }
396
397 /* leaf -> root with reduction */
PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void * leafdata,PetscMemType rootmtype,void * rootdata,MPI_Op op)398 PETSC_INTERN PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
399 {
400 PetscSFLink link = NULL;
401
402 PetscFunctionBegin;
403 PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
404 PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
405 PetscFunctionReturn(PETSC_SUCCESS);
406 }
407
PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void * leafdata,void * rootdata,MPI_Op op)408 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
409 {
410 PetscSFLink link = NULL;
411
412 PetscFunctionBegin;
413 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
414 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
415 PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
416 PetscCall(PetscSFLinkReclaim(sf, &link));
417 PetscFunctionReturn(PETSC_SUCCESS);
418 }
419
PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void * rootdata,PetscMemType leafmtype,const void * leafdata,void * leafupdate,MPI_Op op)420 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
421 {
422 PetscSFLink link = NULL;
423
424 PetscFunctionBegin;
425 PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
426 PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
427 PetscFunctionReturn(PETSC_SUCCESS);
428 }
429
PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void * rootdata,const void * leafdata,void * leafupdate,MPI_Op op)430 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
431 {
432 PetscSFLink link = NULL;
433
434 PetscFunctionBegin;
435 PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
436 /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
437 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
438 /* Do fetch-and-op, the (remote) update results are in rootbuf */
439 PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
440 /* Bcast rootbuf to leafupdate */
441 PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
442 PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
443 /* Unpack and insert fetched data into leaves */
444 PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
445 PetscCall(PetscSFLinkReclaim(sf, &link));
446 PetscFunctionReturn(PETSC_SUCCESS);
447 }
448
PetscSFGetLeafRanks_Basic(PetscSF sf,PetscMPIInt * niranks,const PetscMPIInt ** iranks,const PetscInt ** ioffset,const PetscInt ** irootloc)449 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
450 {
451 PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
452
453 PetscFunctionBegin;
454 if (niranks) *niranks = bas->niranks;
455 if (iranks) *iranks = bas->iranks;
456 if (ioffset) *ioffset = bas->ioffset;
457 if (irootloc) *irootloc = bas->irootloc;
458 PetscFunctionReturn(PETSC_SUCCESS);
459 }
460
461 /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
462 We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
463 was sorted before calling the routine.
464 */
PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt * selected,PetscSF * newsf)465 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
466 {
467 PetscSF esf;
468 PetscInt *esf_roffset, *esf_rmine, *esf_rremote;
469 PetscInt j, p, q, nroots, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
470 char *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
471 PetscMPIInt *esf_ranks, nranks, ndranks, niranks, esf_nranks, esf_ndranks, ndiranks;
472 const PetscMPIInt *ranks, *iranks;
473 const PetscInt *roffset, *rmine, *rremote, *ioffset, *irootloc;
474 PetscBool connected;
475 PetscSFNode *new_iremote;
476 PetscSF_Basic *bas;
477
478 PetscFunctionBegin;
479 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
480 PetscCall(PetscSFSetFromOptions(esf));
481 PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
482
483 /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
484 PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
485 PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
486 maxlocal = maxleaf - minleaf + 1;
487 PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
488 leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
489 /* Tag selected roots */
490 for (PetscInt i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;
491
492 PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
493 PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
494 PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
495 esf_nranks = esf_ndranks = esf_nleaves = 0;
496 for (PetscMPIInt i = 0; i < nranks; i++) {
497 connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
498 for (j = roffset[i]; j < roffset[i + 1]; j++) {
499 if (leafdata[rmine[j]]) {
500 esf_nleaves++;
501 connected = PETSC_TRUE;
502 }
503 }
504 if (connected) {
505 esf_nranks++;
506 if (i < ndranks) esf_ndranks++;
507 }
508 }
509
510 /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
511 PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
512 PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
513 PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
514 p = 0; /* Counter for connected root ranks */
515 q = 0; /* Counter for connected leaves */
516 esf_roffset[0] = 0;
517 for (PetscMPIInt i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
518 connected = PETSC_FALSE;
519 for (j = roffset[i]; j < roffset[i + 1]; j++) {
520 if (leafdata[rmine[j]]) {
521 esf_rmine[q] = new_ilocal[q] = rmine[j];
522 esf_rremote[q] = rremote[j];
523 new_iremote[q].index = rremote[j];
524 new_iremote[q].rank = ranks[i];
525 connected = PETSC_TRUE;
526 q++;
527 }
528 }
529 if (connected) {
530 esf_ranks[p] = ranks[i];
531 esf_roffset[p + 1] = q;
532 p++;
533 }
534 }
535
536 /* SetGraph internally resets the SF, so we only set its fields after the call */
537 PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
538 esf->nranks = esf_nranks;
539 esf->ndranks = esf_ndranks;
540 esf->ranks = esf_ranks;
541 esf->roffset = esf_roffset;
542 esf->rmine = esf_rmine;
543 esf->rremote = esf_rremote;
544 esf->nleafreqs = esf_nranks - esf_ndranks;
545
546 /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
547 bas = (PetscSF_Basic *)esf->data;
548 PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
549 /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
550 we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
551 */
552 PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
553 PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
554 bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
555 p = 0; /* Counter for connected leaf ranks */
556 q = 0; /* Counter for connected roots */
557 for (PetscMPIInt i = 0; i < niranks; i++) {
558 connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
559 for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
560 if (rootdata[irootloc[j]]) {
561 bas->irootloc[q++] = irootloc[j];
562 connected = PETSC_TRUE;
563 }
564 }
565 if (connected) {
566 bas->niranks++;
567 if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
568 bas->iranks[p] = iranks[i];
569 bas->ioffset[p + 1] = q;
570 p++;
571 }
572 }
573 bas->itotal = q;
574 bas->nrootreqs = bas->niranks - bas->ndiranks;
575 esf->persistent = PETSC_TRUE;
576 /* Setup packing related fields */
577 PetscCall(PetscSFSetUpPackFields(esf));
578
579 /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
580 #if defined(PETSC_HAVE_CUDA)
581 if (esf->backend == PETSCSF_BACKEND_CUDA) {
582 esf->ops->Malloc = PetscSFMalloc_CUDA;
583 esf->ops->Free = PetscSFFree_CUDA;
584 }
585 #endif
586
587 #if defined(PETSC_HAVE_HIP)
588 /* TODO: Needs debugging */
589 if (esf->backend == PETSCSF_BACKEND_HIP) {
590 esf->ops->Malloc = PetscSFMalloc_HIP;
591 esf->ops->Free = PetscSFFree_HIP;
592 }
593 #endif
594
595 #if defined(PETSC_HAVE_KOKKOS)
596 if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
597 esf->ops->Malloc = PetscSFMalloc_Kokkos;
598 esf->ops->Free = PetscSFFree_Kokkos;
599 }
600 #endif
601 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
602 PetscCall(PetscFree2(rootdata, leafmem));
603 *newsf = esf;
604 PetscFunctionReturn(PETSC_SUCCESS);
605 }
606
PetscSFCreate_Basic(PetscSF sf)607 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
608 {
609 PetscSF_Basic *dat;
610
611 PetscFunctionBegin;
612 sf->ops->SetUp = PetscSFSetUp_Basic;
613 sf->ops->Reset = PetscSFReset_Basic;
614 sf->ops->Destroy = PetscSFDestroy_Basic;
615 sf->ops->View = PetscSFView_Basic;
616 sf->ops->BcastBegin = PetscSFBcastBegin_Basic;
617 sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
618 sf->ops->ReduceBegin = PetscSFReduceBegin_Basic;
619 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
620 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic;
621 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic;
622 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic;
623 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
624 sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Basic;
625
626 sf->persistent = PETSC_TRUE; // currently SFBASIC always uses persistent send/recv
627 sf->collective = PETSC_FALSE;
628
629 PetscCall(PetscNew(&dat));
630 sf->data = (void *)dat;
631 PetscFunctionReturn(PETSC_SUCCESS);
632 }
633