xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.c (revision 40badf4fbc550ac1f60bd080eaff6de6d55b946d)
1 #include <../src/vec/is/sf/impls/basic/sfbasic.h>
2 #include <../src/vec/is/sf/impls/basic/sfpack.h>
3 #include <petsc/private/viewerimpl.h>
4 
5 /*===================================================================================*/
6 /*              SF public interface implementations                                  */
7 /*===================================================================================*/
8 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
9 {
10   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
11   PetscInt       *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks;
12   PetscMPIInt    rank,niranks,*iranks,tag;
13   MPI_Comm       comm;
14   MPI_Group      group;
15   MPI_Request    *rootreqs,*leafreqs;
16 
17   PetscFunctionBegin;
18   CHKERRMPI(MPI_Comm_group(PETSC_COMM_SELF,&group));
19   CHKERRQ(PetscSFSetUpRanks(sf,group));
20   CHKERRMPI(MPI_Group_free(&group));
21   CHKERRQ(PetscObjectGetComm((PetscObject)sf,&comm));
22   CHKERRQ(PetscObjectGetNewTag((PetscObject)sf,&tag));
23   CHKERRMPI(MPI_Comm_rank(comm,&rank));
24   /*
25    * Inform roots about how many leaves and from which ranks
26    */
27   CHKERRQ(PetscMalloc1(sf->nranks,&rlengths));
28   /* Determine number, sending ranks and length of incoming */
29   for (i=0; i<sf->nranks; i++) {
30     rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
31   }
32   nRemoteRootRanks = sf->nranks-sf->ndranks;
33   CHKERRQ(PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths));
34 
35   /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why.
36      We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is
37      small and the sorting is cheap.
38    */
39   CHKERRQ(PetscSortMPIIntWithIntArray(niranks,iranks,ilengths));
40 
41   /* Partition into distinguished and non-distinguished incoming ranks */
42   bas->ndiranks = sf->ndranks;
43   bas->niranks = bas->ndiranks + niranks;
44   CHKERRQ(PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset));
45   bas->ioffset[0] = 0;
46   for (i=0; i<bas->ndiranks; i++) {
47     bas->iranks[i] = sf->ranks[i];
48     bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
49   }
50   PetscCheckFalse(bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
51   for (; i<bas->niranks; i++) {
52     bas->iranks[i] = iranks[i-bas->ndiranks];
53     bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
54   }
55   bas->itotal = bas->ioffset[i];
56   CHKERRQ(PetscFree(rlengths));
57   CHKERRQ(PetscFree(iranks));
58   CHKERRQ(PetscFree(ilengths));
59 
60   /* Send leaf identities to roots */
61   nRemoteLeafRanks = bas->niranks-bas->ndiranks;
62   CHKERRQ(PetscMalloc1(bas->itotal,&bas->irootloc));
63   CHKERRQ(PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs));
64   for (i=bas->ndiranks; i<bas->niranks; i++) {
65     CHKERRMPI(MPIU_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]));
66   }
67   for (i=0; i<sf->nranks; i++) {
68     PetscInt npoints = sf->roffset[i+1] - sf->roffset[i];
69     if (i < sf->ndranks) {
70       PetscCheckFalse(sf->ranks[i] != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
71       PetscCheckFalse(bas->iranks[0] != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
72       PetscCheckFalse(npoints != bas->ioffset[1]-bas->ioffset[0],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
73       CHKERRQ(PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints));
74       continue;
75     }
76     CHKERRMPI(MPIU_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]));
77   }
78   CHKERRMPI(MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE));
79   CHKERRMPI(MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE));
80 
81   sf->nleafreqs  = nRemoteRootRanks;
82   bas->nrootreqs = nRemoteLeafRanks;
83   sf->persistent = PETSC_TRUE;
84 
85   /* Setup fields related to packing, such as rootbuflen[] */
86   CHKERRQ(PetscSFSetUpPackFields(sf));
87   CHKERRQ(PetscFree2(rootreqs,leafreqs));
88   PetscFunctionReturn(0);
89 }
90 
91 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf)
92 {
93   PetscSF_Basic     *bas = (PetscSF_Basic*)sf->data;
94   PetscSFLink       link = bas->avail,next;
95 
96   PetscFunctionBegin;
97   PetscCheckFalse(bas->inuse,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
98   CHKERRQ(PetscFree2(bas->iranks,bas->ioffset));
99   CHKERRQ(PetscFree(bas->irootloc));
100 
101  #if defined(PETSC_HAVE_DEVICE)
102   for (PetscInt i=0; i<2; i++) CHKERRQ(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]));
103  #endif
104 
105  #if defined(PETSC_HAVE_NVSHMEM)
106   CHKERRQ(PetscSFReset_Basic_NVSHMEM(sf));
107  #endif
108 
109   for (; link; link=next) {next = link->next; CHKERRQ(PetscSFLinkDestroy(sf,link));}
110   bas->avail = NULL;
111   CHKERRQ(PetscSFResetPackFields(sf));
112   PetscFunctionReturn(0);
113 }
114 
115 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
116 {
117   PetscFunctionBegin;
118   CHKERRQ(PetscSFReset_Basic(sf));
119   CHKERRQ(PetscFree(sf->data));
120   PetscFunctionReturn(0);
121 }
122 
123 #if defined(PETSC_USE_SINGLE_LIBRARY)
124 #include <petscmat.h>
125 
126 PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer)
127 {
128   PetscSF_Basic        *bas = (PetscSF_Basic*)sf->data;
129   PetscInt             i,nrootranks,ndrootranks;
130   const PetscInt       *rootoffset;
131   PetscMPIInt          rank,size;
132   const PetscMPIInt    *rootranks;
133   MPI_Comm             comm = PetscObjectComm((PetscObject)sf);
134   PetscScalar          unitbytes;
135   Mat                  A;
136 
137   PetscFunctionBegin;
138   CHKERRMPI(MPI_Comm_size(comm,&size));
139   CHKERRMPI(MPI_Comm_rank(comm,&rank));
140   /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do
141     PetscSFBcast, i.e., roots send data to leaves.  We dump the communication pattern into a matrix
142     in senders' view point: how many bytes I will send to my neighbors.
143 
144     Looking at a column of the matrix, one can also know how many bytes the rank will receive from others.
145 
146     If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give
147     different interpretations for the same SF for different data types. Since we most care about VecScatter,
148     we uniformly treat each vertex as a PetscScalar.
149   */
150   unitbytes = (PetscScalar)sizeof(PetscScalar);
151 
152   CHKERRQ(PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL));
153   CHKERRQ(MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A));
154   CHKERRQ(MatSetOptionsPrefix(A,"__petsc_internal__")); /* To prevent the internal A from taking any command line options */
155   for (i=0; i<nrootranks; i++) {
156     CHKERRQ(MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES));
157   }
158   CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
159   CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
160   CHKERRQ(MatView(A,viewer));
161   CHKERRQ(MatDestroy(&A));
162   PetscFunctionReturn(0);
163 }
164 #endif
165 
166 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
167 {
168   PetscBool      isascii;
169 
170   PetscFunctionBegin;
171   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
172   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) CHKERRQ(PetscViewerASCIIPrintf(viewer,"  MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered"));
173 #if defined(PETSC_USE_SINGLE_LIBRARY)
174   else {
175     PetscBool  isdraw,isbinary;
176     CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
177     CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
178     if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) {
179       CHKERRQ(PetscSFView_Basic_PatternAndSizes(sf,viewer));
180     }
181   }
182 #endif
183   PetscFunctionReturn(0);
184 }
185 
186 static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
187 {
188   PetscSFLink       link = NULL;
189 
190   PetscFunctionBegin;
191   /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */
192   CHKERRQ(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link));
193   /* Pack rootdata to rootbuf for remote communication */
194   CHKERRQ(PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata));
195   /* Start communcation, e.g., post MPI_Isend */
196   CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF));
197   /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */
198   CHKERRQ(PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op));
199   PetscFunctionReturn(0);
200 }
201 
202 PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
203 {
204   PetscSFLink       link = NULL;
205 
206   PetscFunctionBegin;
207   /* Retrieve the link used in XxxBegin() with root/leafdata as key */
208   CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link));
209   /* Finish remote communication, e.g., post MPI_Waitall */
210   CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF));
211   /* Unpack data in leafbuf to leafdata for remote communication */
212   CHKERRQ(PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op));
213   /* Recycle the link */
214   CHKERRQ(PetscSFLinkReclaim(sf,&link));
215   PetscFunctionReturn(0);
216 }
217 
218 /* Shared by ReduceBegin and FetchAndOpBegin */
219 static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out)
220 {
221   PetscSFLink       link = NULL;
222 
223   PetscFunctionBegin;
224   CHKERRQ(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link));
225   CHKERRQ(PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata));
226   CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT));
227   *out = link;
228   PetscFunctionReturn(0);
229 }
230 
231 /* leaf -> root with reduction */
232 static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
233 {
234   PetscSFLink       link = NULL;
235 
236   PetscFunctionBegin;
237   CHKERRQ(PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link));
238   CHKERRQ(PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op));
239   PetscFunctionReturn(0);
240 }
241 
242 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
243 {
244   PetscSFLink       link = NULL;
245 
246   PetscFunctionBegin;
247   CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link));
248   CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT));
249   CHKERRQ(PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op));
250   CHKERRQ(PetscSFLinkReclaim(sf,&link));
251   PetscFunctionReturn(0);
252 }
253 
254 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
255 {
256   PetscSFLink       link = NULL;
257 
258   PetscFunctionBegin;
259   CHKERRQ(PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link));
260   CHKERRQ(PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op));
261   PetscFunctionReturn(0);
262 }
263 
264 static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
265 {
266   PetscSFLink       link = NULL;
267 
268   PetscFunctionBegin;
269   CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link));
270   /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
271   CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT));
272   /* Do fetch-and-op, the (remote) update results are in rootbuf */
273   CHKERRQ(PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op));
274   /* Bcast rootbuf to leafupdate */
275   CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF));
276   CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF));
277   /* Unpack and insert fetched data into leaves */
278   CHKERRQ(PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE));
279   CHKERRQ(PetscSFLinkReclaim(sf,&link));
280   PetscFunctionReturn(0);
281 }
282 
283 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
284 {
285   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
286 
287   PetscFunctionBegin;
288   if (niranks)  *niranks  = bas->niranks;
289   if (iranks)   *iranks   = bas->iranks;
290   if (ioffset)  *ioffset  = bas->ioffset;
291   if (irootloc) *irootloc = bas->irootloc;
292   PetscFunctionReturn(0);
293 }
294 
295 /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf.
296    We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[]
297    was sorted before calling the routine.
298  */
299 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
300 {
301   PetscSF           esf;
302   PetscInt          esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote;
303   PetscInt          i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal;
304   char              *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */
305   PetscMPIInt       *esf_ranks;
306   const PetscMPIInt *ranks,*iranks;
307   const PetscInt    *roffset,*rmine,*rremote,*ioffset,*irootloc;
308   PetscBool         connected;
309   PetscSFNode       *new_iremote;
310   PetscSF_Basic     *bas;
311 
312   PetscFunctionBegin;
313   CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf));
314   CHKERRQ(PetscSFSetFromOptions(esf));
315   CHKERRQ(PetscSFSetType(esf,PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
316 
317   /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */
318   CHKERRQ(PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL));
319   CHKERRQ(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
320   maxlocal = maxleaf - minleaf + 1;
321   CHKERRQ(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
322   leafdata = leafmem - minleaf;
323   /* Tag selected roots */
324   for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1;
325 
326   CHKERRQ(PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE));
327   CHKERRQ(PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE));
328   CHKERRQ(PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote)); /* Get send info */
329   esf_nranks = esf_ndranks = esf_nleaves = 0;
330   for (i=0; i<nranks; i++) {
331     connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */
332     for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}}
333     if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;}
334   }
335 
336   /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */
337   CHKERRQ(PetscMalloc1(esf_nleaves,&new_ilocal));
338   CHKERRQ(PetscMalloc1(esf_nleaves,&new_iremote));
339   CHKERRQ(PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote));
340   p    = 0; /* Counter for connected root ranks */
341   q    = 0; /* Counter for connected leaves */
342   esf_roffset[0] = 0;
343   for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */
344     connected = PETSC_FALSE;
345     for (j=roffset[i]; j<roffset[i+1]; j++) {
346       if (leafdata[rmine[j]]) {
347         esf_rmine[q]         = new_ilocal[q] = rmine[j];
348         esf_rremote[q]       = rremote[j];
349         new_iremote[q].index = rremote[j];
350         new_iremote[q].rank  = ranks[i];
351         connected            = PETSC_TRUE;
352         q++;
353       }
354     }
355     if (connected) {
356       esf_ranks[p]     = ranks[i];
357       esf_roffset[p+1] = q;
358       p++;
359     }
360   }
361 
362   /* SetGraph internally resets the SF, so we only set its fields after the call */
363   CHKERRQ(PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
364   esf->nranks    = esf_nranks;
365   esf->ndranks   = esf_ndranks;
366   esf->ranks     = esf_ranks;
367   esf->roffset   = esf_roffset;
368   esf->rmine     = esf_rmine;
369   esf->rremote   = esf_rremote;
370   esf->nleafreqs = esf_nranks - esf_ndranks;
371 
372   /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */
373   bas  = (PetscSF_Basic*)esf->data;
374   CHKERRQ(PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc)); /* Get recv info */
375   /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we
376      we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once.
377    */
378   CHKERRQ(PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset));
379   CHKERRQ(PetscMalloc1(ioffset[niranks],&bas->irootloc));
380   bas->niranks = bas->ndiranks = bas->ioffset[0] = 0;
381   p = 0; /* Counter for connected leaf ranks */
382   q = 0; /* Counter for connected roots */
383   for (i=0; i<niranks; i++) {
384     connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */
385     for (j=ioffset[i]; j<ioffset[i+1]; j++) {
386       if (rootdata[irootloc[j]]) {
387         bas->irootloc[q++] = irootloc[j];
388         connected = PETSC_TRUE;
389       }
390     }
391     if (connected) {
392       bas->niranks++;
393       if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */
394       bas->iranks[p]    = iranks[i];
395       bas->ioffset[p+1] = q;
396       p++;
397     }
398   }
399   bas->itotal     = q;
400   bas->nrootreqs  = bas->niranks - bas->ndiranks;
401   esf->persistent = PETSC_TRUE;
402   /* Setup packing related fields */
403   CHKERRQ(PetscSFSetUpPackFields(esf));
404 
405   /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */
406 #if defined(PETSC_HAVE_CUDA)
407   if (esf->backend == PETSCSF_BACKEND_CUDA) {
408     esf->ops->Malloc = PetscSFMalloc_CUDA;
409     esf->ops->Free   = PetscSFFree_CUDA;
410   }
411 #endif
412 
413 #if defined(PETSC_HAVE_HIP)
414   /* TODO: Needs debugging */
415   if (esf->backend == PETSCSF_BACKEND_HIP) {
416     esf->ops->Malloc = PetscSFMalloc_HIP;
417     esf->ops->Free   = PetscSFFree_HIP;
418   }
419 #endif
420 
421 #if defined(PETSC_HAVE_KOKKOS)
422   if (esf->backend == PETSCSF_BACKEND_KOKKOS) {
423     esf->ops->Malloc = PetscSFMalloc_Kokkos;
424     esf->ops->Free   = PetscSFFree_Kokkos;
425   }
426 #endif
427   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
428   CHKERRQ(PetscFree2(rootdata,leafmem));
429   *newsf = esf;
430   PetscFunctionReturn(0);
431 }
432 
433 PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
434 {
435   PetscSF_Basic  *dat;
436 
437   PetscFunctionBegin;
438   sf->ops->SetUp                = PetscSFSetUp_Basic;
439   sf->ops->Reset                = PetscSFReset_Basic;
440   sf->ops->Destroy              = PetscSFDestroy_Basic;
441   sf->ops->View                 = PetscSFView_Basic;
442   sf->ops->BcastBegin           = PetscSFBcastBegin_Basic;
443   sf->ops->BcastEnd             = PetscSFBcastEnd_Basic;
444   sf->ops->ReduceBegin          = PetscSFReduceBegin_Basic;
445   sf->ops->ReduceEnd            = PetscSFReduceEnd_Basic;
446   sf->ops->FetchAndOpBegin      = PetscSFFetchAndOpBegin_Basic;
447   sf->ops->FetchAndOpEnd        = PetscSFFetchAndOpEnd_Basic;
448   sf->ops->GetLeafRanks         = PetscSFGetLeafRanks_Basic;
449   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic;
450 
451   CHKERRQ(PetscNewLog(sf,&dat));
452   sf->data = (void*)dat;
453   PetscFunctionReturn(0);
454 }
455