xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2 #include <../src/vec/is/sf/impls/basic/sfpack.h>
3 #include <petsc/private/viewerimpl.h>
4 
5 /*===================================================================================*/
6 /*              SF public interface implementations                                  */
7 /*===================================================================================*/
8 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
9 {
10   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
11   PetscInt      *rlengths, *ilengths, i, nRemoteRootRanks, nRemoteLeafRanks;
12   PetscMPIInt    rank, niranks, *iranks, tag;
13   MPI_Comm       comm;
14   MPI_Group      group;
15   MPI_Request   *rootreqs, *leafreqs;
16 
17   PetscFunctionBegin;
18   PetscCallMPI(MPI_Comm_group(PETSC_COMM_SELF, &group));
19   PetscCall(PetscSFSetUpRanks(sf, group));
20   PetscCallMPI(MPI_Group_free(&group));
21   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22   PetscCall(PetscObjectGetNewTag((PetscObject)sf, &tag));
23   PetscCallMPI(MPI_Comm_rank(comm, &rank));
24   /*
25    * Inform roots about how many leaves and from which ranks
26    */
27   PetscCall(PetscMalloc1(sf->nranks, &rlengths));
28   /* Determine number, sending ranks and length of incoming */
29   for (i = 0; i < sf->nranks; i++) { rlengths[i] = sf->roffset[i + 1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ }
30   nRemoteRootRanks = sf->nranks - sf->ndranks;
31   PetscCall(PetscCommBuildTwoSided(comm, 1, MPIU_INT, nRemoteRootRanks, sf->ranks + sf->ndranks, rlengths + sf->ndranks, &niranks, &iranks, (void **)&ilengths));
32 
33   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
34      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
35      small and the sorting is cheap.
36    */
37   PetscCall(PetscSortMPIIntWithIntArray(niranks, iranks, ilengths));
38 
39   /* Partition into distinguished and non-distinguished incoming ranks */
40   bas->ndiranks = sf->ndranks;
41   bas->niranks  = bas->ndiranks + niranks;
42   PetscCall(PetscMalloc2(bas->niranks, &bas->iranks, bas->niranks + 1, &bas->ioffset));
43   bas->ioffset[0] = 0;
44   for (i = 0; i < bas->ndiranks; i++) {
45     bas->iranks[i]      = sf->ranks[i];
46     bas->ioffset[i + 1] = bas->ioffset[i] + rlengths[i];
47   }
48   PetscCheck(bas->ndiranks <= 1 && (bas->ndiranks != 1 || bas->iranks[0] == rank), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Broken setup for shared ranks");
49   for (; i < bas->niranks; i++) {
50     bas->iranks[i]      = iranks[i - bas->ndiranks];
51     bas->ioffset[i + 1] = bas->ioffset[i] + ilengths[i - bas->ndiranks];
52   }
53   bas->itotal = bas->ioffset[i];
54   PetscCall(PetscFree(rlengths));
55   PetscCall(PetscFree(iranks));
56   PetscCall(PetscFree(ilengths));
57 
58   /* Send leaf identities to roots */
59   nRemoteLeafRanks = bas->niranks - bas->ndiranks;
60   PetscCall(PetscMalloc1(bas->itotal, &bas->irootloc));
61   PetscCall(PetscMalloc2(nRemoteLeafRanks, &rootreqs, nRemoteRootRanks, &leafreqs));
62   for (i = bas->ndiranks; i < bas->niranks; i++) PetscCallMPI(MPIU_Irecv(bas->irootloc + bas->ioffset[i], bas->ioffset[i + 1] - bas->ioffset[i], MPIU_INT, bas->iranks[i], tag, comm, &rootreqs[i - bas->ndiranks]));
63   for (i = 0; i < sf->nranks; i++) {
64     PetscInt npoints = sf->roffset[i + 1] - sf->roffset[i];
65     if (i < sf->ndranks) {
66       PetscCheck(sf->ranks[i] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished leaf rank");
67       PetscCheck(bas->iranks[0] == rank, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot interpret distinguished root rank");
68       PetscCheck(npoints == bas->ioffset[1] - bas->ioffset[0], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Distinguished rank exchange has mismatched lengths");
69       PetscCall(PetscArraycpy(bas->irootloc + bas->ioffset[0], sf->rremote + sf->roffset[i], npoints));
70       continue;
71     }
72     PetscCallMPI(MPIU_Isend(sf->rremote + sf->roffset[i], npoints, MPIU_INT, sf->ranks[i], tag, comm, &leafreqs[i - sf->ndranks]));
73   }
74   PetscCallMPI(MPI_Waitall(nRemoteLeafRanks, rootreqs, MPI_STATUSES_IGNORE));
75   PetscCallMPI(MPI_Waitall(nRemoteRootRanks, leafreqs, MPI_STATUSES_IGNORE));
76 
77   sf->nleafreqs  = nRemoteRootRanks;
78   bas->nrootreqs = nRemoteLeafRanks;
79   sf->persistent = PETSC_TRUE;
80 
81   /* Setup fields related to packing, such as rootbuflen[] */
82   PetscCall(PetscSFSetUpPackFields(sf));
83   PetscCall(PetscFree2(rootreqs, leafreqs));
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
87 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
88 {
89   PetscSF_Basic *bas  = (PetscSF_Basic *)sf->data;
90   PetscSFLink    link = bas->avail, next;
91 
92   PetscFunctionBegin;
93   PetscCheck(!bas->inuse, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Outstanding operation has not been completed");
94   PetscCall(PetscFree2(bas->iranks, bas->ioffset));
95   PetscCall(PetscFree(bas->irootloc));
96 
97 #if defined(PETSC_HAVE_DEVICE)
98   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, bas->irootloc_d[i]));
99 #endif
100 
101 #if defined(PETSC_HAVE_NVSHMEM)
102   PetscCall(PetscSFReset_Basic_NVSHMEM(sf));
103 #endif
104 
105   for (; link; link = next) {
106     next = link->next;
107     PetscCall(PetscSFLinkDestroy(sf, link));
108   }
109   bas->avail = NULL;
110   PetscCall(PetscSFResetPackFields(sf));
111   PetscFunctionReturn(PETSC_SUCCESS);
112 }
113 
114 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
115 {
116   PetscFunctionBegin;
117   PetscCall(PetscSFReset_Basic(sf));
118   PetscCall(PetscFree(sf->data));
119   PetscFunctionReturn(PETSC_SUCCESS);
120 }
121 
122 #if defined(PETSC_USE_SINGLE_LIBRARY)
123   #include <petscmat.h>
124 
125 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf, PetscViewer viewer)
126 {
127   PetscSF_Basic     *bas = (PetscSF_Basic *)sf->data;
128   PetscInt           i, nrootranks, ndrootranks;
129   const PetscInt    *rootoffset;
130   PetscMPIInt        rank, size;
131   const PetscMPIInt *rootranks;
132   MPI_Comm           comm = PetscObjectComm((PetscObject)sf);
133   PetscScalar        unitbytes;
134   Mat                A;
135 
136   PetscFunctionBegin;
137   PetscCallMPI(MPI_Comm_size(comm, &size));
138   PetscCallMPI(MPI_Comm_rank(comm, &rank));
139   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
140     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
141     in senders' view point: how many bytes I will send to my neighbors.
142 
143     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
144 
145     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
146     different interpretations for the same SF for different data types. Since we most care about VecScatter,
147     we uniformly treat each vertex as a PetscScalar.
148   */
149   unitbytes = (PetscScalar)sizeof(PetscScalar);
150 
151   PetscCall(PetscSFGetRootInfo_Basic(sf, &nrootranks, &ndrootranks, &rootranks, &rootoffset, NULL));
152   PetscCall(MatCreateAIJ(comm, 1, 1, size, size, 1, NULL, nrootranks - ndrootranks, NULL, &A));
153   PetscCall(MatSetOptionsPrefix(A, "__petsc_internal__")); /* To prevent the internal A from taking any command line options */
154   for (i = 0; i < nrootranks; i++) PetscCall(MatSetValue(A, (PetscInt)rank, bas->iranks[i], (rootoffset[i + 1] - rootoffset[i]) * unitbytes, INSERT_VALUES));
155   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
156   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
157   PetscCall(MatView(A, viewer));
158   PetscCall(MatDestroy(&A));
159   PetscFunctionReturn(PETSC_SUCCESS);
160 }
161 #endif
162 
163 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf, PetscViewer viewer)
164 {
165   PetscBool isascii;
166 
167   PetscFunctionBegin;
168   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
169   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) PetscCall(PetscViewerASCIIPrintf(viewer, "  MultiSF sort=%s\n", sf->rankorder ? "rank-order" : "unordered"));
170 #if defined(PETSC_USE_SINGLE_LIBRARY)
171   else {
172     PetscBool isdraw, isbinary;
173     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
174     PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
175     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) PetscCall(PetscSFView_Basic_PatternAndSizes(sf, viewer));
176   }
177 #endif
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
182 {
183   PetscSFLink link = NULL;
184 
185   PetscFunctionBegin;
186   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
187   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link));
188   /* Pack rootdata to rootbuf for remote communication */
189   PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata));
190   /* Start communication, e.g., post MPI_Isend */
191   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
192   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
193   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_ROOT2LEAF, (void *)rootdata, leafdata, op));
194   PetscFunctionReturn(PETSC_SUCCESS);
195 }
196 
197 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
198 {
199   PetscSFLink link = NULL;
200 
201   PetscFunctionBegin;
202   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
203   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
204   /* Finish remote communication, e.g., post MPI_Waitall */
205   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
206   /* Unpack data in leafbuf to leafdata for remote communication */
207   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafdata, op));
208   /* Recycle the link */
209   PetscCall(PetscSFLinkReclaim(sf, &link));
210   PetscFunctionReturn(PETSC_SUCCESS);
211 }
212 
213 /* Shared by ReduceBegin and FetchAndOpBegin */
214 static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op, PetscSFOperation sfop, PetscSFLink *out)
215 {
216   PetscSFLink link = NULL;
217 
218   PetscFunctionBegin;
219   PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, sfop, &link));
220   PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata));
221   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_LEAF2ROOT));
222   *out = link;
223   PetscFunctionReturn(PETSC_SUCCESS);
224 }
225 
226 /* leaf -> root with reduction */
227 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
228 {
229   PetscSFLink link = NULL;
230 
231   PetscFunctionBegin;
232   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_REDUCE, &link));
233   PetscCall(PetscSFLinkScatterLocal(sf, link, PETSCSF_LEAF2ROOT, rootdata, (void *)leafdata, op));
234   PetscFunctionReturn(PETSC_SUCCESS);
235 }
236 
237 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
238 {
239   PetscSFLink link = NULL;
240 
241   PetscFunctionBegin;
242   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
243   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
244   PetscCall(PetscSFLinkUnpackRootData(sf, link, PETSCSF_REMOTE, rootdata, op));
245   PetscCall(PetscSFLinkReclaim(sf, &link));
246   PetscFunctionReturn(PETSC_SUCCESS);
247 }
248 
249 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, void *leafupdate, MPI_Op op)
250 {
251   PetscSFLink link = NULL;
252 
253   PetscFunctionBegin;
254   PetscCall(PetscSFLeafToRootBegin_Basic(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op, PETSCSF_FETCH, &link));
255   PetscCall(PetscSFLinkFetchAndOpLocal(sf, link, rootdata, leafdata, leafupdate, op));
256   PetscFunctionReturn(PETSC_SUCCESS);
257 }
258 
259 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
260 {
261   PetscSFLink link = NULL;
262 
263   PetscFunctionBegin;
264   PetscCall(PetscSFLinkGetInUse(sf, unit, rootdata, leafdata, PETSC_OWN_POINTER, &link));
265   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
266   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_LEAF2ROOT));
267   /* Do fetch-and-op, the (remote) update results are in rootbuf */
268   PetscCall(PetscSFLinkFetchAndOpRemote(sf, link, rootdata, op));
269   /* Bcast rootbuf to leafupdate */
270   PetscCall(PetscSFLinkStartCommunication(sf, link, PETSCSF_ROOT2LEAF));
271   PetscCall(PetscSFLinkFinishCommunication(sf, link, PETSCSF_ROOT2LEAF));
272   /* Unpack and insert fetched data into leaves */
273   PetscCall(PetscSFLinkUnpackLeafData(sf, link, PETSCSF_REMOTE, leafupdate, MPI_REPLACE));
274   PetscCall(PetscSFLinkReclaim(sf, &link));
275   PetscFunctionReturn(PETSC_SUCCESS);
276 }
277 
278 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
279 {
280   PetscSF_Basic *bas = (PetscSF_Basic *)sf->data;
281 
282   PetscFunctionBegin;
283   if (niranks) *niranks = bas->niranks;
284   if (iranks) *iranks = bas->iranks;
285   if (ioffset) *ioffset = bas->ioffset;
286   if (irootloc) *irootloc = bas->irootloc;
287   PetscFunctionReturn(PETSC_SUCCESS);
288 }
289 
290 /* An optimized PetscSFCreateEmbeddedRootSF. We aggressively make use of the established communication on sf.
291    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
292    was sorted before calling the routine.
293  */
294 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
295 {
296   PetscSF            esf;
297   PetscInt           esf_nranks, esf_ndranks, *esf_roffset, *esf_rmine, *esf_rremote;
298   PetscInt           i, j, p, q, nroots, esf_nleaves, *new_ilocal, nranks, ndranks, niranks, ndiranks, minleaf, maxleaf, maxlocal;
299   char              *rootdata, *leafdata, *leafmem; /* Only stores 0 or 1, so we can save memory with char */
300   PetscMPIInt       *esf_ranks;
301   const PetscMPIInt *ranks, *iranks;
302   const PetscInt    *roffset, *rmine, *rremote, *ioffset, *irootloc;
303   PetscBool          connected;
304   PetscSFNode       *new_iremote;
305   PetscSF_Basic     *bas;
306 
307   PetscFunctionBegin;
308   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), &esf));
309   PetscCall(PetscSFSetFromOptions(esf));
310   PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
311 
312   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
313   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
314   PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
315   maxlocal = maxleaf - minleaf + 1;
316   PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
317   leafdata = leafmem - minleaf;
318   /* Tag selected roots */
319   for (i = 0; i < nselected; ++i) rootdata[selected[i]] = 1;
320 
321   PetscCall(PetscSFBcastBegin(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
322   PetscCall(PetscSFBcastEnd(sf, MPI_CHAR, rootdata, leafdata, MPI_REPLACE));
323   PetscCall(PetscSFGetLeafInfo_Basic(sf, &nranks, &ndranks, &ranks, &roffset, &rmine, &rremote)); /* Get send info */
324   esf_nranks = esf_ndranks = esf_nleaves = 0;
325   for (i = 0; i < nranks; i++) {
326     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
327     for (j = roffset[i]; j < roffset[i + 1]; j++) {
328       if (leafdata[rmine[j]]) {
329         esf_nleaves++;
330         connected = PETSC_TRUE;
331       }
332     }
333     if (connected) {
334       esf_nranks++;
335       if (i < ndranks) esf_ndranks++;
336     }
337   }
338 
339   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
340   PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
341   PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
342   PetscCall(PetscMalloc4(esf_nranks, &esf_ranks, esf_nranks + 1, &esf_roffset, esf_nleaves, &esf_rmine, esf_nleaves, &esf_rremote));
343   p              = 0; /* Counter for connected root ranks */
344   q              = 0; /* Counter for connected leaves */
345   esf_roffset[0] = 0;
346   for (i = 0; i < nranks; i++) { /* Scan leaf data again to fill esf arrays */
347     connected = PETSC_FALSE;
348     for (j = roffset[i]; j < roffset[i + 1]; j++) {
349       if (leafdata[rmine[j]]) {
350         esf_rmine[q] = new_ilocal[q] = rmine[j];
351         esf_rremote[q]               = rremote[j];
352         new_iremote[q].index         = rremote[j];
353         new_iremote[q].rank          = ranks[i];
354         connected                    = PETSC_TRUE;
355         q++;
356       }
357     }
358     if (connected) {
359       esf_ranks[p]       = ranks[i];
360       esf_roffset[p + 1] = q;
361       p++;
362     }
363   }
364 
365   /* SetGraph internally resets the SF, so we only set its fields after the call */
366   PetscCall(PetscSFSetGraph(esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
367   esf->nranks    = esf_nranks;
368   esf->ndranks   = esf_ndranks;
369   esf->ranks     = esf_ranks;
370   esf->roffset   = esf_roffset;
371   esf->rmine     = esf_rmine;
372   esf->rremote   = esf_rremote;
373   esf->nleafreqs = esf_nranks - esf_ndranks;
374 
375   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
376   bas = (PetscSF_Basic *)esf->data;
377   PetscCall(PetscSFGetRootInfo_Basic(sf, &niranks, &ndiranks, &iranks, &ioffset, &irootloc)); /* Get recv info */
378   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
379      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
380    */
381   PetscCall(PetscMalloc2(niranks, &bas->iranks, niranks + 1, &bas->ioffset));
382   PetscCall(PetscMalloc1(ioffset[niranks], &bas->irootloc));
383   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
384   p                                              = 0; /* Counter for connected leaf ranks */
385   q                                              = 0; /* Counter for connected roots */
386   for (i = 0; i < niranks; i++) {
387     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
388     for (j = ioffset[i]; j < ioffset[i + 1]; j++) {
389       if (rootdata[irootloc[j]]) {
390         bas->irootloc[q++] = irootloc[j];
391         connected          = PETSC_TRUE;
392       }
393     }
394     if (connected) {
395       bas->niranks++;
396       if (i < ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
397       bas->iranks[p]      = iranks[i];
398       bas->ioffset[p + 1] = q;
399       p++;
400     }
401   }
402   bas->itotal     = q;
403   bas->nrootreqs  = bas->niranks - bas->ndiranks;
404   esf->persistent = PETSC_TRUE;
405   /* Setup packing related fields */
406   PetscCall(PetscSFSetUpPackFields(esf));
407 
408   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
409 #if defined(PETSC_HAVE_CUDA)
410   if (esf->backend == PETSCSF_BACKEND_CUDA) {
411     esf->ops->Malloc = PetscSFMalloc_CUDA;
412     esf->ops->Free   = PetscSFFree_CUDA;
413   }
414 #endif
415 
416 #if defined(PETSC_HAVE_HIP)
417   /* TODO: Needs debugging */
418   if (esf->backend == PETSCSF_BACKEND_HIP) {
419     esf->ops->Malloc = PetscSFMalloc_HIP;
420     esf->ops->Free   = PetscSFFree_HIP;
421   }
422 #endif
423 
424 #if defined(PETSC_HAVE_KOKKOS)
425   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
426     esf->ops->Malloc = PetscSFMalloc_Kokkos;
427     esf->ops->Free   = PetscSFFree_Kokkos;
428   }
429 #endif
430   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
431   PetscCall(PetscFree2(rootdata, leafmem));
432   *newsf = esf;
433   PetscFunctionReturn(PETSC_SUCCESS);
434 }
435 
436 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
437 {
438   PetscSF_Basic *dat;
439 
440   PetscFunctionBegin;
441   sf->ops->SetUp                = PetscSFSetUp_Basic;
442   sf->ops->Reset                = PetscSFReset_Basic;
443   sf->ops->Destroy              = PetscSFDestroy_Basic;
444   sf->ops->View                 = PetscSFView_Basic;
445   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
446   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
447   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
448   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
449   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
450   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
451   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
452   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
453 
454   PetscCall(PetscNew(&dat));
455   sf->data = (void *)dat;
456   PetscFunctionReturn(PETSC_SUCCESS);
457 }
458