xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 84ff8c8b54fd7c9cb88641c01dfe6357ec5f72d0)
1 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2 #include <../src/vec/is/sf/impls/basic/sfpack.h>
3 #include <petsc/private/viewerimpl.h>
4 
5 /*===================================================================================*/
6 /*              SF public interface implementations                                  */
7 /*===================================================================================*/
8 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
9 {
10   PetscErrorCode ierr;
11   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
12   PetscInt       *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks;
13   PetscMPIInt    rank,niranks,*iranks,tag;
14   MPI_Comm       comm;
15   MPI_Group      group;
16   MPI_Request    *rootreqs,*leafreqs;
17 
18   PetscFunctionBegin;
19   ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr);
20   ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr);
21   ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
22   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
23   ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr);
24   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
25   /*
26    * Inform roots about how many leaves and from which ranks
27    */
28   ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr);
29   /* Determine number, sending ranks and length of incoming */
30   for (i=0; i<sf->nranks; i++) {
31     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
32   }
33   nRemoteRootRanks = sf->nranks-sf->ndranks;
34   ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr);
35 
36   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
37      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
38      small and the sorting is cheap.
39    */
40   ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr);
41 
42   /* Partition into distinguished and non-distinguished incoming ranks */
43   bas->ndiranks = sf->ndranks;
44   bas->niranks = bas->ndiranks + niranks;
45   ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr);
46   bas->ioffset[0] = 0;
47   for (i=0; i<bas->ndiranks; i++) {
48     bas->iranks[i] = sf->ranks[i];
49     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
50   }
51   if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
52   for (; i<bas->niranks; i++) {
53     bas->iranks[i] = iranks[i-bas->ndiranks];
54     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
55   }
56   bas->itotal = bas->ioffset[i];
57   ierr = PetscFree(rlengths);CHKERRQ(ierr);
58   ierr = PetscFree(iranks);CHKERRQ(ierr);
59   ierr = PetscFree(ilengths);CHKERRQ(ierr);
60 
61   /* Send leaf identities to roots */
62   nRemoteLeafRanks = bas->niranks-bas->ndiranks;
63   ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr);
64   ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr);
65   for (i=bas->ndiranks; i<bas->niranks; i++) {
66     ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr);
67   }
68   for (i=0; i<sf->nranks; i++) {
69     PetscMPIInt npoints;
70     ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr);
71     if (i < sf->ndranks) {
72       if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
73       if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
74       if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
75       ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr);
76       continue;
77     }
78     ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr);
79   }
80   ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
81   ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
82 
83   sf->nleafreqs  = nRemoteRootRanks;
84   bas->nrootreqs = nRemoteLeafRanks;
85   sf->persistent = PETSC_TRUE;
86 
87   /* Setup fields related to packing, such as rootbuflen[] */
88   ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr);
89   ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr);
90   PetscFunctionReturn(0);
91 }
92 
93 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
94 {
95   PetscErrorCode    ierr;
96   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
97   PetscSFLink       link = bas->avail,next;
98 
99   PetscFunctionBegin;
100   if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
101   ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr);
102   ierr = PetscFree(bas->irootloc);CHKERRQ(ierr);
103 
104  #if defined(PETSC_HAVE_DEVICE)
105   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);}
106  #endif
107 
108  #if defined(PETSC_HAVE_NVSHMEM)
109   ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr);
110  #endif
111 
112   for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);}
113   bas->avail = NULL;
114   ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr);
115   PetscFunctionReturn(0);
116 }
117 
118 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
119 {
120   PetscErrorCode ierr;
121 
122   PetscFunctionBegin;
123   ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr);
124   ierr = PetscFree(sf->data);CHKERRQ(ierr);
125   PetscFunctionReturn(0);
126 }
127 
128 #if defined(PETSC_USE_SINGLE_LIBRARY)
129 #include <petscmat.h>
130 
131 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)
132 {
133   PetscErrorCode       ierr;
134   PetscSF_Basic        *bas = (PetscSF_Basic*)sf->data;
135   PetscInt             i,nrootranks,ndrootranks;
136   const PetscInt       *rootoffset;
137   PetscMPIInt          rank,size;
138   const PetscMPIInt    *rootranks;
139   MPI_Comm             comm = PetscObjectComm((PetscObject)sf);
140   PetscScalar          unitbytes;
141   Mat                  A;
142 
143   PetscFunctionBegin;
144   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
145   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
146   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
147     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
148     in senders' view point: how many bytes I will send to my neighbors.
149 
150     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
151 
152     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
153     different interpretations for the same SF for different data types. Since we most care about VecScatter,
154     we uniformly treat each vertex as a PetscScalar.
155   */
156   unitbytes = (PetscScalar)sizeof(PetscScalar);
157 
158   ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr);
159   ierr = MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A);CHKERRQ(ierr);
160   ierr = MatSetOptionsPrefix(A,"__petsc_internal__");CHKERRQ(ierr); /* To prevent the internal A from taking any command line options */
161   for (i=0; i<nrootranks; i++) {
162     ierr = MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES);CHKERRQ(ierr);
163   }
164   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
165   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
166   ierr = MatView(A,viewer);CHKERRQ(ierr);
167   ierr = MatDestroy(&A);CHKERRQ(ierr);
168   PetscFunctionReturn(0);
169 }
170 #endif
171 
172 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
173 {
174   PetscErrorCode ierr;
175   PetscBool      isascii;
176 
177   PetscFunctionBegin;
178   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
179   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {ierr = PetscViewerASCIIPrintf(viewer,"  MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);}
180 #if defined(PETSC_USE_SINGLE_LIBRARY)
181   else {
182     PetscBool  isdraw,isbinary;
183     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
184     ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
185     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) {
186       ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr);
187     }
188   }
189 #endif
190   PetscFunctionReturn(0);
191 }
192 
193 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
194 {
195   PetscErrorCode    ierr;
196   PetscSFLink       link = NULL;
197 
198   PetscFunctionBegin;
199   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
200   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
201   /* Pack rootdata to rootbuf for remote communication */
202   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
203   /* Start communcation, e.g., post MPI_Isend */
204   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
205   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
206   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr);
207   PetscFunctionReturn(0);
208 }
209 
210 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
211 {
212   PetscErrorCode    ierr;
213   PetscSFLink       link = NULL;
214 
215   PetscFunctionBegin;
216   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
217   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
218   /* Finish remote communication, e.g., post MPI_Waitall */
219   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
220   /* Unpack data in leafbuf to leafdata for remote communication */
221   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr);
222   /* Recycle the link */
223   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 /* Shared by ReduceBegin and FetchAndOpBegin */
228 PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
229 {
230   PetscErrorCode    ierr;
231   PetscSFLink       link = NULL;
232 
233   PetscFunctionBegin;
234   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr);
235   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
236   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
237   *out = link;
238   PetscFunctionReturn(0);
239 }
240 
241 /* leaf -> root with reduction */
242 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
243 {
244   PetscErrorCode    ierr;
245   PetscSFLink       link = NULL;
246 
247   PetscFunctionBegin;
248   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
249   ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr);
250   PetscFunctionReturn(0);
251 }
252 
253 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
254 {
255   PetscErrorCode    ierr;
256   PetscSFLink       link = NULL;
257 
258   PetscFunctionBegin;
259   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
260   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
261   ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr);
262   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
267 {
268   PetscErrorCode    ierr;
269   PetscSFLink       link = NULL;
270 
271   PetscFunctionBegin;
272   ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr);
273   ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
278 {
279   PetscErrorCode    ierr;
280   PetscSFLink       link = NULL;
281 
282   PetscFunctionBegin;
283   ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr);
284   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
285   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr);
286   /* Do fetch-and-op, the (remote) update results are in rootbuf */
287   ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr);
288   /* Bcast rootbuf to leafupdate */
289   ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
290   ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr);
291   /* Unpack and insert fetched data into leaves */
292   ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr);
293   ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr);
294   PetscFunctionReturn(0);
295 }
296 
297 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
298 {
299   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
300 
301   PetscFunctionBegin;
302   if (niranks)  *niranks  = bas->niranks;
303   if (iranks)   *iranks   = bas->iranks;
304   if (ioffset)  *ioffset  = bas->ioffset;
305   if (irootloc) *irootloc = bas->irootloc;
306   PetscFunctionReturn(0);
307 }
308 
309 /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf.
310    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
311    was sorted before calling the routine.
312  */
313 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
314 {
315   PetscSF           esf;
316   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
317   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
318   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
319   PetscMPIInt       *esf_ranks;
320   const PetscMPIInt *ranks,*iranks;
321   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
322   PetscBool         connected;
323   PetscSFNode       *new_iremote;
324   PetscSF_Basic     *bas;
325   PetscErrorCode    ierr;
326 
327   PetscFunctionBegin;
328   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr);
329   ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr);
330   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
331 
332   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
333   ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr);
334   ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
335   maxlocal = maxleaf - minleaf + 1;
336   ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
337   leafdata = leafmem - minleaf;
338   /* Tag selected roots */
339   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
340 
341   ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
342   ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
343   ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */
344   esf_nranks = esf_ndranks = esf_nleaves = 0;
345   for (i=0; i<nranks; i++) {
346     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
347     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
348     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
349   }
350 
351   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
352   ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
353   ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
354   ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr);
355   p    = 0; /* Counter for connected root ranks */
356   q    = 0; /* Counter for connected leaves */
357   esf_roffset[0] = 0;
358   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
359     connected = PETSC_FALSE;
360     for (j=roffset[i]; j<roffset[i+1]; j++) {
361       if (leafdata[rmine[j]]) {
362         esf_rmine[q]         = new_ilocal[q] = rmine[j];
363         esf_rremote[q]       = rremote[j];
364         new_iremote[q].index = rremote[j];
365         new_iremote[q].rank  = ranks[i];
366         connected            = PETSC_TRUE;
367         q++;
368       }
369     }
370     if (connected) {
371       esf_ranks[p]     = ranks[i];
372       esf_roffset[p+1] = q;
373       p++;
374     }
375   }
376 
377   /* SetGraph internally resets the SF, so we only set its fields after the call */
378   ierr           = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
379   esf->nranks    = esf_nranks;
380   esf->ndranks   = esf_ndranks;
381   esf->ranks     = esf_ranks;
382   esf->roffset   = esf_roffset;
383   esf->rmine     = esf_rmine;
384   esf->rremote   = esf_rremote;
385   esf->nleafreqs = esf_nranks - esf_ndranks;
386 
387   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
388   bas  = (PetscSF_Basic*)esf->data;
389   ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */
390   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
391      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
392    */
393   ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr);
394   ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr);
395   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
396   p = 0; /* Counter for connected leaf ranks */
397   q = 0; /* Counter for connected roots */
398   for (i=0; i<niranks; i++) {
399     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
400     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
401       if (rootdata[irootloc[j]]) {
402         bas->irootloc[q++] = irootloc[j];
403         connected = PETSC_TRUE;
404       }
405     }
406     if (connected) {
407       bas->niranks++;
408       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
409       bas->iranks[p]    = iranks[i];
410       bas->ioffset[p+1] = q;
411       p++;
412     }
413   }
414   bas->itotal     = q;
415   bas->nrootreqs  = bas->niranks - bas->ndiranks;
416   esf->persistent = PETSC_TRUE;
417   /* Setup packing related fields */
418   ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr);
419 
420   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
421 #if defined(PETSC_HAVE_CUDA)
422   if (esf->backend == PETSCSF_BACKEND_CUDA) {
423     esf->ops->Malloc = PetscSFMalloc_CUDA;
424     esf->ops->Free   = PetscSFFree_CUDA;
425   }
426 #endif
427 
428 #if defined(PETSC_HAVE_HIP)
429   /* TODO: Needs debugging */
430   if (esf->backend == PETSCSF_BACKEND_HIP) {
431     esf->ops->Malloc = PetscSFMalloc_HIP;
432     esf->ops->Free   = PetscSFFree_HIP;
433   }
434 #endif
435 
436 #if defined(PETSC_HAVE_KOKKOS)
437   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
438     esf->ops->Malloc = PetscSFMalloc_Kokkos;
439     esf->ops->Free   = PetscSFFree_Kokkos;
440   }
441 #endif
442   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
443   ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
444   *newsf = esf;
445   PetscFunctionReturn(0);
446 }
447 
448 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
449 {
450   PetscSF_Basic  *dat;
451   PetscErrorCode ierr;
452 
453   PetscFunctionBegin;
454   sf->ops->SetUp                = PetscSFSetUp_Basic;
455   sf->ops->Reset                = PetscSFReset_Basic;
456   sf->ops->Destroy              = PetscSFDestroy_Basic;
457   sf->ops->View                 = PetscSFView_Basic;
458   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
459   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
460   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
461   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
462   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
463   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
464   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
465   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
466 
467   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
468   sf->data = (void*)dat;
469   PetscFunctionReturn(0);
470 }
471