xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 17f48955e3445b92d4f06cdd133e9c1d2d0ea0c8)
1 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2 #include <../src/vec/is/sf/impls/basic/sfpack.h>
3 #include <petsc/private/viewerimpl.h>
4 
5 /*===================================================================================*/
6 /*              SF public interface implementations                                  */
7 /*===================================================================================*/
8 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
9 {
10   PetscErrorCode ierr;
11   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
12   PetscInt       *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks;
13   PetscMPIInt    rank,niranks,*iranks,tag;
14   MPI_Comm       comm;
15   MPI_Group      group;
16   MPI_Request    *rootreqs,*leafreqs;
17 
18   PetscFunctionBegin;
19   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr);
20   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
21   ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
22   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
23   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
24   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
25   /*
26    * Inform roots about how many leaves and from which ranks
27    */
28   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
29   /* Determine number, sending ranks and length of incoming */
30   for (i=0; i<sf->nranks; i++) {
31     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
32   }
33   nRemoteRootRanks = sf->nranks-sf->ndranks;
34   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
35 
36   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
37      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
38      small and the sorting is cheap.
39    */
40   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
41 
42   /* Partition into distinguished and non-distinguished incoming ranks */
43   bas->ndiranks = sf->ndranks;
44   bas->niranks = bas->ndiranks + niranks;
45   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
46   bas->ioffset[0] = 0;
47   for (i=0; i<bas->ndiranks; i++) {
48     bas->iranks[i] = sf->ranks[i];
49     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
50   }
51   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
52   for (; i<bas->niranks; i++) {
53     bas->iranks[i] = iranks[i-bas->ndiranks];
54     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
55   }
56   bas->itotal = bas->ioffset[i];
57   ierr = PetscFree(rlengths);CHKERRQ(ierr);
58   ierr = PetscFree(iranks);CHKERRQ(ierr);
59   ierr = PetscFree(ilengths);CHKERRQ(ierr);
60 
61   /* Send leaf identities to roots */
62   nRemoteLeafRanks = bas->niranks-bas->ndiranks;
63   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
64   ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr);
65   for (i=bas->ndiranks; i<bas->niranks; i++) {
66     ierr = MPIU_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr);
67   }
68   for (i=0; i<sf->nranks; i++) {
69     PetscInt npoints = sf->roffset[i+1] - sf->roffset[i];
70     if (i < sf->ndranks) {
71       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
72       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
73       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
74       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
75       continue;
76     }
77     ierr = MPIU_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr);
78   }
79   ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
80   ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
81 
82   sf->nleafreqs  = nRemoteRootRanks;
83   bas->nrootreqs = nRemoteLeafRanks;
84   sf->persistent = PETSC_TRUE;
85 
86   /* Setup fields related to packing, such as rootbuflen[] */
87   ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr);
88   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
93 {
94   PetscErrorCode    ierr;
95   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
96   PetscSFLink       link = bas->avail,next;
97 
98   PetscFunctionBegin;
99   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
100   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
101   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
102 
103  #if defined(PETSC_HAVE_DEVICE)
104   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);}
105  #endif
106 
107  #if defined(PETSC_HAVE_NVSHMEM)
108   ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr);
109  #endif
110 
111   for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);}
112   bas->avail = NULL;
113   ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
118 {
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
123   ierr = PetscFree(sf->data);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #if defined(PETSC_USE_SINGLE_LIBRARY)
128 #include <petscmat.h>
129 
130 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)
131 {
132   PetscErrorCode       ierr;
133   PetscSF_Basic        *bas = (PetscSF_Basic*)sf->data;
134   PetscInt             i,nrootranks,ndrootranks;
135   const PetscInt       *rootoffset;
136   PetscMPIInt          rank,size;
137   const PetscMPIInt    *rootranks;
138   MPI_Comm             comm = PetscObjectComm((PetscObject)sf);
139   PetscScalar          unitbytes;
140   Mat                  A;
141 
142   PetscFunctionBegin;
143   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
144   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
145   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
146     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
147     in senders' view point: how many bytes I will send to my neighbors.
148 
149     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
150 
151     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
152     different interpretations for the same SF for different data types. Since we most care about VecScatter,
153     we uniformly treat each vertex as a PetscScalar.
154   */
155   unitbytes = (PetscScalar)sizeof(PetscScalar);
156 
157   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr);
158   ierr = MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A);CHKERRQ(ierr);
159   ierr = MatSetOptionsPrefix(A,"__petsc_internal__");CHKERRQ(ierr); /* To prevent the internal A from taking any command line options */
160   for (i=0; i<nrootranks; i++) {
161     ierr = MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES);CHKERRQ(ierr);
162   }
163   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
164   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165   ierr = MatView(A,viewer);CHKERRQ(ierr);
166   ierr = MatDestroy(&A);CHKERRQ(ierr);
167   PetscFunctionReturn(0);
168 }
169 #endif
170 
171 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
172 {
173   PetscErrorCode ierr;
174   PetscBool      isascii;
175 
176   PetscFunctionBegin;
177   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
178   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {ierr = PetscViewerASCIIPrintf(viewer,"  MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
179 #if defined(PETSC_USE_SINGLE_LIBRARY)
180   else {
181     PetscBool  isdraw,isbinary;
182     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
183     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
184     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) {
185       ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr);
186     }
187   }
188 #endif
189   PetscFunctionReturn(0);
190 }
191 
192 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
193 {
194   PetscErrorCode    ierr;
195   PetscSFLink       link = NULL;
196 
197   PetscFunctionBegin;
198   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
199   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
200   /* Pack rootdata to rootbuf for remote communication */
201   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
202   /* Start communcation, e.g., post MPI_Isend */
203   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
204   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
205   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr);
206   PetscFunctionReturn(0);
207 }
208 
209 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
210 {
211   PetscErrorCode    ierr;
212   PetscSFLink       link = NULL;
213 
214   PetscFunctionBegin;
215   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
216   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
217   /* Finish remote communication, e.g., post MPI_Waitall */
218   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
219   /* Unpack data in leafbuf to leafdata for remote communication */
220   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr);
221   /* Recycle the link */
222   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 /* Shared by ReduceBegin and FetchAndOpBegin */
227 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
228 {
229   PetscErrorCode    ierr;
230   PetscSFLink       link = NULL;
231 
232   PetscFunctionBegin;
233   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr);
234   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
235   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
236   *out = link;
237   PetscFunctionReturn(0);
238 }
239 
240 /* leaf -> root with reduction */
241 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
242 {
243   PetscErrorCode    ierr;
244   PetscSFLink       link = NULL;
245 
246   PetscFunctionBegin;
247   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
248   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr);
249   PetscFunctionReturn(0);
250 }
251 
252 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
253 {
254   PetscErrorCode    ierr;
255   PetscSFLink       link = NULL;
256 
257   PetscFunctionBegin;
258   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
259   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
260   ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
261   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
262   PetscFunctionReturn(0);
263 }
264 
265 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
266 {
267   PetscErrorCode    ierr;
268   PetscSFLink       link = NULL;
269 
270   PetscFunctionBegin;
271   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr);
272   ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 
276 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
277 {
278   PetscErrorCode    ierr;
279   PetscSFLink       link = NULL;
280 
281   PetscFunctionBegin;
282   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
283   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
284   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
285   /* Do fetch-and-op, the (remote) update results are in rootbuf */
286   ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr);
287   /* Bcast rootbuf to leafupdate */
288   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
289   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
290   /* Unpack and insert fetched data into leaves */
291   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr);
292   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
293   PetscFunctionReturn(0);
294 }
295 
296 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
297 {
298   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
299 
300   PetscFunctionBegin;
301   if (niranks)  *niranks  = bas->niranks;
302   if (iranks)   *iranks   = bas->iranks;
303   if (ioffset)  *ioffset  = bas->ioffset;
304   if (irootloc) *irootloc = bas->irootloc;
305   PetscFunctionReturn(0);
306 }
307 
308 /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf.
309    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
310    was sorted before calling the routine.
311  */
312 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
313 {
314   PetscSF           esf;
315   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
316   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
317   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
318   PetscMPIInt       *esf_ranks;
319   const PetscMPIInt *ranks,*iranks;
320   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
321   PetscBool         connected;
322   PetscSFNode       *new_iremote;
323   PetscSF_Basic     *bas;
324   PetscErrorCode    ierr;
325 
326   PetscFunctionBegin;
327   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
328   ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr);
329   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
330 
331   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
332   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
333   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
334   maxlocal = maxleaf - minleaf + 1;
335   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
336   leafdata = leafmem - minleaf;
337   /* Tag selected roots */
338   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
339 
340   ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
341   ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
342   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
343   esf_nranks = esf_ndranks = esf_nleaves = 0;
344   for (i=0; i<nranks; i++) {
345     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
346     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
347     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
348   }
349 
350   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
351   ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
352   ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
353   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr);
354   p    = 0; /* Counter for connected root ranks */
355   q    = 0; /* Counter for connected leaves */
356   esf_roffset[0] = 0;
357   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
358     connected = PETSC_FALSE;
359     for (j=roffset[i]; j<roffset[i+1]; j++) {
360       if (leafdata[rmine[j]]) {
361         esf_rmine[q]         = new_ilocal[q] = rmine[j];
362         esf_rremote[q]       = rremote[j];
363         new_iremote[q].index = rremote[j];
364         new_iremote[q].rank  = ranks[i];
365         connected            = PETSC_TRUE;
366         q++;
367       }
368     }
369     if (connected) {
370       esf_ranks[p]     = ranks[i];
371       esf_roffset[p+1] = q;
372       p++;
373     }
374   }
375 
376   /* SetGraph internally resets the SF, so we only set its fields after the call */
377   ierr           = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
378   esf->nranks    = esf_nranks;
379   esf->ndranks   = esf_ndranks;
380   esf->ranks     = esf_ranks;
381   esf->roffset   = esf_roffset;
382   esf->rmine     = esf_rmine;
383   esf->rremote   = esf_rremote;
384   esf->nleafreqs = esf_nranks - esf_ndranks;
385 
386   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
387   bas  = (PetscSF_Basic*)esf->data;
388   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
389   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
390      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
391    */
392   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
393   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
394   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
395   p = 0; /* Counter for connected leaf ranks */
396   q = 0; /* Counter for connected roots */
397   for (i=0; i<niranks; i++) {
398     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
399     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
400       if (rootdata[irootloc[j]]) {
401         bas->irootloc[q++] = irootloc[j];
402         connected = PETSC_TRUE;
403       }
404     }
405     if (connected) {
406       bas->niranks++;
407       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
408       bas->iranks[p]    = iranks[i];
409       bas->ioffset[p+1] = q;
410       p++;
411     }
412   }
413   bas->itotal     = q;
414   bas->nrootreqs  = bas->niranks - bas->ndiranks;
415   esf->persistent = PETSC_TRUE;
416   /* Setup packing related fields */
417   ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr);
418 
419   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
420 #if defined(PETSC_HAVE_CUDA)
421   if (esf->backend == PETSCSF_BACKEND_CUDA) {
422     esf->ops->Malloc = PetscSFMalloc_CUDA;
423     esf->ops->Free   = PetscSFFree_CUDA;
424   }
425 #endif
426 
427 #if defined(PETSC_HAVE_HIP)
428   /* TODO: Needs debugging */
429   if (esf->backend == PETSCSF_BACKEND_HIP) {
430     esf->ops->Malloc = PetscSFMalloc_HIP;
431     esf->ops->Free   = PetscSFFree_HIP;
432   }
433 #endif
434 
435 #if defined(PETSC_HAVE_KOKKOS)
436   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
437     esf->ops->Malloc = PetscSFMalloc_Kokkos;
438     esf->ops->Free   = PetscSFFree_Kokkos;
439   }
440 #endif
441   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
442   ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
443   *newsf = esf;
444   PetscFunctionReturn(0);
445 }
446 
447 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
448 {
449   PetscSF_Basic  *dat;
450   PetscErrorCode ierr;
451 
452   PetscFunctionBegin;
453   sf->ops->SetUp                = PetscSFSetUp_Basic;
454   sf->ops->Reset                = PetscSFReset_Basic;
455   sf->ops->Destroy              = PetscSFDestroy_Basic;
456   sf->ops->View                 = PetscSFView_Basic;
457   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
458   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
459   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
460   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
461   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
462   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
463   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
464   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
465 
466   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
467   sf->data = (void*)dat;
468   PetscFunctionReturn(0);
469 }
470