xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 4e278199b78715991f5c71ebbd945c1489263e6c)
1 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2 #include <../src/vec/is/sf/impls/basic/sfpack.h>
3 
4 /*===================================================================================*/
5 /*              SF public interface implementations                                  */
6 /*===================================================================================*/
7 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
8 {
9   PetscErrorCode ierr;
10   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
11   PetscInt       *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks;
12   PetscMPIInt    rank,niranks,*iranks,tag;
13   MPI_Comm       comm;
14   MPI_Group      group;
15   MPI_Request    *rootreqs,*leafreqs;
16 
17   PetscFunctionBegin;
18   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr);
19   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
20   ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
21   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
22   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
23   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
24   /*
25    * Inform roots about how many leaves and from which ranks
26    */
27   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
28   /* Determine number, sending ranks and length of incoming */
29   for (i=0; i<sf->nranks; i++) {
30     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
31   }
32   nRemoteRootRanks = sf->nranks-sf->ndranks;
33   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
34 
35   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
36      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
37      small and the sorting is cheap.
38    */
39   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
40 
41   /* Partition into distinguished and non-distinguished incoming ranks */
42   bas->ndiranks = sf->ndranks;
43   bas->niranks = bas->ndiranks + niranks;
44   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
45   bas->ioffset[0] = 0;
46   for (i=0; i<bas->ndiranks; i++) {
47     bas->iranks[i] = sf->ranks[i];
48     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
49   }
50   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
51   for (; i<bas->niranks; i++) {
52     bas->iranks[i] = iranks[i-bas->ndiranks];
53     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
54   }
55   bas->itotal = bas->ioffset[i];
56   ierr = PetscFree(rlengths);CHKERRQ(ierr);
57   ierr = PetscFree(iranks);CHKERRQ(ierr);
58   ierr = PetscFree(ilengths);CHKERRQ(ierr);
59 
60   /* Send leaf identities to roots */
61   nRemoteLeafRanks = bas->niranks-bas->ndiranks;
62   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
63   ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr);
64   for (i=bas->ndiranks; i<bas->niranks; i++) {
65     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr);
66   }
67   for (i=0; i<sf->nranks; i++) {
68     PetscMPIInt npoints;
69     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
70     if (i < sf->ndranks) {
71       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
72       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
73       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
74       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
75       continue;
76     }
77     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr);
78   }
79   ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
80   ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
81 
82   sf->nleafreqs  = nRemoteRootRanks;
83   bas->nrootreqs = nRemoteLeafRanks;
84   sf->persistent = PETSC_TRUE;
85 
86   /* Setup fields related to packing, such as rootbuflen[] */
87   ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr);
88   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
89   PetscFunctionReturn(0);
90 }
91 
92 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
93 {
94   PetscErrorCode    ierr;
95   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
96   PetscSFLink       link = bas->avail,next;
97 
98   PetscFunctionBegin;
99   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
100   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
101   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
102 
103  #if defined(PETSC_HAVE_DEVICE)
104   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);}
105  #endif
106 
107  #if defined(PETSC_HAVE_NVSHMEM)
108   ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr);
109  #endif
110 
111   for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);}
112   bas->avail = NULL;
113   ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr);
114   PetscFunctionReturn(0);
115 }
116 
117 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
118 {
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
123   ierr = PetscFree(sf->data);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #if defined(PETSC_USE_SINGLE_LIBRARY)
128 #include <petscmat.h>
129 
130 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)
131 {
132   PetscErrorCode       ierr;
133   PetscSF_Basic        *bas = (PetscSF_Basic*)sf->data;
134   PetscSFLink          link = bas->avail;
135   PetscInt             i,nrootranks,ndrootranks,myrank;
136   const PetscInt       *rootoffset;
137   PetscMPIInt          rank,size;
138   MPI_Comm             comm = PetscObjectComm((PetscObject)sf);
139   Mat                  A;
140 
141   PetscFunctionBegin;
142   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
143   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
144   myrank = rank;
145   if (sf->persistent) {
146     /* amount of data I send to other ranks - global to local */
147     ierr = MatCreateAIJ(comm,1,1,size,size,20,NULL,20,NULL,&A);CHKERRQ(ierr);
148     ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);CHKERRQ(ierr);
149     for (i=0; i<nrootranks; i++) {
150       ierr = MatSetValue(A,myrank,bas->iranks[i],(rootoffset[i+1] - rootoffset[i])*link->unitbytes,INSERT_VALUES);CHKERRQ(ierr);
151     }
152     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
153     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
154     ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
155     ierr = MatView(A,viewer);CHKERRQ(ierr);
156     ierr = MatDestroy(&A);CHKERRQ(ierr);
157   }
158   PetscFunctionReturn(0);
159 }
160 #endif
161 
162 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
163 {
164   PetscErrorCode ierr;
165   PetscBool      iascii;
166 
167   PetscFunctionBegin;
168   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
169   if (iascii) {ierr = PetscViewerASCIIPrintf(viewer,"  MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
170  #if defined(PETSC_USE_SINGLE_LIBRARY)
171   {
172     PetscBool ibinary;
173     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&ibinary);CHKERRQ(ierr);
174     if (ibinary) {ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr);}
175   }
176  #endif
177   PetscFunctionReturn(0);
178 }
179 
180 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
181 {
182   PetscErrorCode    ierr;
183   PetscSFLink       link = NULL;
184 
185   PetscFunctionBegin;
186   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
187   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
188   /* Pack rootdata to rootbuf for remote communication */
189   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
190   /* Start communcation, e.g., post MPI_Isend */
191   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
192   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
193   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr);
194   PetscFunctionReturn(0);
195 }
196 
197 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
198 {
199   PetscErrorCode    ierr;
200   PetscSFLink       link = NULL;
201 
202   PetscFunctionBegin;
203   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
204   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
205   /* Finish remote communication, e.g., post MPI_Waitall */
206   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
207   /* Unpack data in leafbuf to leafdata for remote communication */
208   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr);
209   /* Recycle the link */
210   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 /* Shared by ReduceBegin and FetchAndOpBegin */
215 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
216 {
217   PetscErrorCode    ierr;
218   PetscSFLink       link = NULL;
219 
220   PetscFunctionBegin;
221   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr);
222   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
223   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
224   *out = link;
225   PetscFunctionReturn(0);
226 }
227 
228 /* leaf -> root with reduction */
229 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
230 {
231   PetscErrorCode    ierr;
232   PetscSFLink       link = NULL;
233 
234   PetscFunctionBegin;
235   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
236   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
241 {
242   PetscErrorCode    ierr;
243   PetscSFLink       link = NULL;
244 
245   PetscFunctionBegin;
246   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
247   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
248   ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
249   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
250   PetscFunctionReturn(0);
251 }
252 
253 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
254 {
255   PetscErrorCode    ierr;
256   PetscSFLink       link = NULL;
257 
258   PetscFunctionBegin;
259   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr);
260   ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
265 {
266   PetscErrorCode    ierr;
267   PetscSFLink       link = NULL;
268 
269   PetscFunctionBegin;
270   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
271   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
272   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
273   /* Do fetch-and-op, the (remote) update results are in rootbuf */
274   ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr);
275   /* Bcast rootbuf to leafupdate */
276   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
277   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
278   /* Unpack and insert fetched data into leaves */
279   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr);
280   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 
284 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
285 {
286   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
287 
288   PetscFunctionBegin;
289   if (niranks)  *niranks  = bas->niranks;
290   if (iranks)   *iranks   = bas->iranks;
291   if (ioffset)  *ioffset  = bas->ioffset;
292   if (irootloc) *irootloc = bas->irootloc;
293   PetscFunctionReturn(0);
294 }
295 
296 /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf.
297    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
298    was sorted before calling the routine.
299  */
300 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
301 {
302   PetscSF           esf;
303   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
304   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
305   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
306   PetscMPIInt       *esf_ranks;
307   const PetscMPIInt *ranks,*iranks;
308   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
309   PetscBool         connected;
310   PetscSFNode       *new_iremote;
311   PetscSF_Basic     *bas;
312   PetscErrorCode    ierr;
313 
314   PetscFunctionBegin;
315   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
316   ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr);
317   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
318 
319   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
320   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
321   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
322   maxlocal = maxleaf - minleaf + 1;
323   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
324   leafdata = leafmem - minleaf;
325   /* Tag selected roots */
326   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
327 
328   ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
329   ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
330   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
331   esf_nranks = esf_ndranks = esf_nleaves = 0;
332   for (i=0; i<nranks; i++) {
333     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
334     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
335     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
336   }
337 
338   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
339   ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
340   ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
341   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr);
342   p    = 0; /* Counter for connected root ranks */
343   q    = 0; /* Counter for connected leaves */
344   esf_roffset[0] = 0;
345   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
346     connected = PETSC_FALSE;
347     for (j=roffset[i]; j<roffset[i+1]; j++) {
348       if (leafdata[rmine[j]]) {
349         esf_rmine[q]         = new_ilocal[q] = rmine[j];
350         esf_rremote[q]       = rremote[j];
351         new_iremote[q].index = rremote[j];
352         new_iremote[q].rank  = ranks[i];
353         connected            = PETSC_TRUE;
354         q++;
355       }
356     }
357     if (connected) {
358       esf_ranks[p]     = ranks[i];
359       esf_roffset[p+1] = q;
360       p++;
361     }
362   }
363 
364   /* SetGraph internally resets the SF, so we only set its fields after the call */
365   ierr           = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
366   esf->nranks    = esf_nranks;
367   esf->ndranks   = esf_ndranks;
368   esf->ranks     = esf_ranks;
369   esf->roffset   = esf_roffset;
370   esf->rmine     = esf_rmine;
371   esf->rremote   = esf_rremote;
372   esf->nleafreqs = esf_nranks - esf_ndranks;
373 
374   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
375   bas  = (PetscSF_Basic*)esf->data;
376   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
377   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
378      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
379    */
380   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
381   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
382   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
383   p = 0; /* Counter for connected leaf ranks */
384   q = 0; /* Counter for connected roots */
385   for (i=0; i<niranks; i++) {
386     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
387     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
388       if (rootdata[irootloc[j]]) {
389         bas->irootloc[q++] = irootloc[j];
390         connected = PETSC_TRUE;
391       }
392     }
393     if (connected) {
394       bas->niranks++;
395       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
396       bas->iranks[p]    = iranks[i];
397       bas->ioffset[p+1] = q;
398       p++;
399     }
400   }
401   bas->itotal     = q;
402   bas->nrootreqs  = bas->niranks - bas->ndiranks;
403   esf->persistent = PETSC_TRUE;
404   /* Setup packing related fields */
405   ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr);
406 
407   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
408 #if defined(PETSC_HAVE_CUDA)
409   if (esf->backend == PETSCSF_BACKEND_CUDA) {
410     esf->ops->Malloc = PetscSFMalloc_CUDA;
411     esf->ops->Free   = PetscSFFree_CUDA;
412   }
413 #endif
414 
415 #if defined(PETSC_HAVE_HIP)
416   /* TODO: Needs debugging */
417   if (esf->backend == PETSCSF_BACKEND_HIP) {
418     esf->ops->Malloc = PetscSFMalloc_HIP;
419     esf->ops->Free   = PetscSFFree_HIP;
420   }
421 #endif
422 
423 #if defined(PETSC_HAVE_KOKKOS)
424   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
425     esf->ops->Malloc = PetscSFMalloc_Kokkos;
426     esf->ops->Free   = PetscSFFree_Kokkos;
427   }
428 #endif
429   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
430   ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
431   *newsf = esf;
432   PetscFunctionReturn(0);
433 }
434 
435 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
436 {
437   PetscSF_Basic  *dat;
438   PetscErrorCode ierr;
439 
440   PetscFunctionBegin;
441   sf->ops->SetUp                = PetscSFSetUp_Basic;
442   sf->ops->Reset                = PetscSFReset_Basic;
443   sf->ops->Destroy              = PetscSFDestroy_Basic;
444   sf->ops->View                 = PetscSFView_Basic;
445   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
446   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
447   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
448   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
449   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
450   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
451   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
452   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
453 
454   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
455   sf->data = (void*)dat;
456   PetscFunctionReturn(0);
457 }
458