xref: /petsc/src/vec/is/sf/interface/sf.c (revision 606c028001d6169ccbb8dc4a5aa61aa3bda31a09)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscErrorCode ierr;
57   PetscSF        b;
58 
59   PetscFunctionBegin;
60   PetscValidPointer(sf,2);
61   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
62 
63   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
64 
65   b->nroots    = -1;
66   b->nleaves   = -1;
67   b->minleaf   = PETSC_MAX_INT;
68   b->maxleaf   = PETSC_MIN_INT;
69   b->nranks    = -1;
70   b->rankorder = PETSC_TRUE;
71   b->ingroup   = MPI_GROUP_NULL;
72   b->outgroup  = MPI_GROUP_NULL;
73   b->graphset  = PETSC_FALSE;
74 #if defined(PETSC_HAVE_DEVICE)
75   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
76   b->use_stream_aware_mpi = PETSC_FALSE;
77   b->unknown_input_stream= PETSC_FALSE;
78   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79     b->backend = PETSCSF_BACKEND_KOKKOS;
80   #elif defined(PETSC_HAVE_CUDA)
81     b->backend = PETSCSF_BACKEND_CUDA;
82   #elif defined(PETSC_HAVE_HIP)
83     b->backend = PETSCSF_BACKEND_HIP;
84   #endif
85 
86   #if defined(PETSC_HAVE_NVSHMEM)
87     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
88     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr);
90     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr);
91   #endif
92 #endif
93   b->vscat.from_n = -1;
94   b->vscat.to_n   = -1;
95   b->vscat.unit   = MPIU_SCALAR;
96  *sf = b;
97   PetscFunctionReturn(0);
98 }
99 
100 /*@
101    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
102 
103    Collective
104 
105    Input Parameter:
106 .  sf - star forest
107 
108    Level: advanced
109 
110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
111 @*/
112 PetscErrorCode PetscSFReset(PetscSF sf)
113 {
114   PetscErrorCode ierr;
115 
116   PetscFunctionBegin;
117   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
118   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
119   sf->nroots   = -1;
120   sf->nleaves  = -1;
121   sf->minleaf  = PETSC_MAX_INT;
122   sf->maxleaf  = PETSC_MIN_INT;
123   sf->mine     = NULL;
124   sf->remote   = NULL;
125   sf->graphset = PETSC_FALSE;
126   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
127   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
128   sf->nranks = -1;
129   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
130   sf->degreeknown = PETSC_FALSE;
131   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
132   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);}
133   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);}
134   if (sf->multi) sf->multi->multi = NULL;
135   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
136   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
137 
138  #if defined(PETSC_HAVE_DEVICE)
139   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
140  #endif
141 
142   sf->setupcalled = PETSC_FALSE;
143   PetscFunctionReturn(0);
144 }
145 
146 /*@C
147    PetscSFSetType - Set the PetscSF communication implementation
148 
149    Collective on PetscSF
150 
151    Input Parameters:
152 +  sf - the PetscSF context
153 -  type - a known method
154 
155    Options Database Key:
156 .  -sf_type <type> - Sets the method; use -help for a list
157    of available methods (for instance, window, basic, neighbor)
158 
159    Notes:
160    See "include/petscsf.h" for available methods (for instance)
161 +    PETSCSFWINDOW - MPI-2/3 one-sided
162 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
163 
164   Level: intermediate
165 
166 .seealso: PetscSFType, PetscSFCreate()
167 @*/
168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
169 {
170   PetscErrorCode ierr,(*r)(PetscSF);
171   PetscBool      match;
172 
173   PetscFunctionBegin;
174   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
175   PetscValidCharPointer(type,2);
176 
177   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
178   if (match) PetscFunctionReturn(0);
179 
180   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
181   PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
182   /* Destroy the previous PetscSF implementation context */
183   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
184   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
185   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
186   ierr = (*r)(sf);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 /*@C
191   PetscSFGetType - Get the PetscSF communication implementation
192 
193   Not Collective
194 
195   Input Parameter:
196 . sf  - the PetscSF context
197 
198   Output Parameter:
199 . type - the PetscSF type name
200 
201   Level: intermediate
202 
203 .seealso: PetscSFSetType(), PetscSFCreate()
204 @*/
205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
206 {
207   PetscFunctionBegin;
208   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
209   PetscValidPointer(type,2);
210   *type = ((PetscObject)sf)->type_name;
211   PetscFunctionReturn(0);
212 }
213 
214 /*@C
215    PetscSFDestroy - destroy star forest
216 
217    Collective
218 
219    Input Parameter:
220 .  sf - address of star forest
221 
222    Level: intermediate
223 
224 .seealso: PetscSFCreate(), PetscSFReset()
225 @*/
226 PetscErrorCode PetscSFDestroy(PetscSF *sf)
227 {
228   PetscErrorCode ierr;
229 
230   PetscFunctionBegin;
231   if (!*sf) PetscFunctionReturn(0);
232   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
233   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
234   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
235   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
236   ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr);
237   if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);}
238   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
243 {
244   PetscInt           i, nleaves;
245   PetscMPIInt        size;
246   const PetscInt    *ilocal;
247   const PetscSFNode *iremote;
248   PetscErrorCode     ierr;
249 
250   PetscFunctionBegin;
251   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
252   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
254   for (i = 0; i < nleaves; i++) {
255     const PetscInt rank = iremote[i].rank;
256     const PetscInt remote = iremote[i].index;
257     const PetscInt leaf = ilocal ? ilocal[i] : i;
258     PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
259     PetscCheckFalse(remote < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
260     PetscCheckFalse(leaf < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 /*@
266    PetscSFSetUp - set up communication structures
267 
268    Collective
269 
270    Input Parameter:
271 .  sf - star forest communication object
272 
273    Level: beginner
274 
275 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
276 @*/
277 PetscErrorCode PetscSFSetUp(PetscSF sf)
278 {
279   PetscErrorCode ierr;
280 
281   PetscFunctionBegin;
282   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
283   PetscSFCheckGraphSet(sf,1);
284   if (sf->setupcalled) PetscFunctionReturn(0);
285   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
286   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
287   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
288   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
289 #if defined(PETSC_HAVE_CUDA)
290   if (sf->backend == PETSCSF_BACKEND_CUDA) {
291     sf->ops->Malloc = PetscSFMalloc_CUDA;
292     sf->ops->Free   = PetscSFFree_CUDA;
293   }
294 #endif
295 #if defined(PETSC_HAVE_HIP)
296   if (sf->backend == PETSCSF_BACKEND_HIP) {
297     sf->ops->Malloc = PetscSFMalloc_HIP;
298     sf->ops->Free   = PetscSFFree_HIP;
299   }
300 #endif
301 
302 #
303 #if defined(PETSC_HAVE_KOKKOS)
304   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
305     sf->ops->Malloc = PetscSFMalloc_Kokkos;
306     sf->ops->Free   = PetscSFFree_Kokkos;
307   }
308 #endif
309   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
310   sf->setupcalled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
314 /*@
315    PetscSFSetFromOptions - set PetscSF options using the options database
316 
317    Logically Collective
318 
319    Input Parameter:
320 .  sf - star forest
321 
322    Options Database Keys:
323 +  -sf_type               - implementation type, see PetscSFSetType()
324 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
325 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
326                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
327                             If true, this option only works with -use_gpu_aware_mpi 1.
328 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
329                                If true, this option only works with -use_gpu_aware_mpi 1.
330 
331 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
332                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
333                               the only available is kokkos.
334 
335    Level: intermediate
336 @*/
337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
338 {
339   PetscSFType    deft;
340   char           type[256];
341   PetscErrorCode ierr;
342   PetscBool      flg;
343 
344   PetscFunctionBegin;
345   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
346   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
347   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
348   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
349   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
350   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
351  #if defined(PETSC_HAVE_DEVICE)
352   {
353     char        backendstr[32] = {0};
354     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
355     /* Change the defaults set in PetscSFCreate() with command line options */
356     ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr);
357     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
358     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
359     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
360     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
361     ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr);
362   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
363     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
364     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
365     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
366     else PetscCheckFalse(set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
367   #elif defined(PETSC_HAVE_KOKKOS)
368     PetscCheckFalse(set && !isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
369    #endif
370   }
371  #endif
372   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
373   ierr = PetscOptionsEnd();CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 /*@
378    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
379 
380    Logically Collective
381 
382    Input Parameters:
383 +  sf - star forest
384 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
385 
386    Level: advanced
387 
388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
389 @*/
390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
391 {
392   PetscFunctionBegin;
393   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
394   PetscValidLogicalCollectiveBool(sf,flg,2);
395   PetscCheckFalse(sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
396   sf->rankorder = flg;
397   PetscFunctionReturn(0);
398 }
399 
400 /*@C
401    PetscSFSetGraph - Set a parallel star forest
402 
403    Collective
404 
405    Input Parameters:
406 +  sf - star forest
407 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
408 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
409 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
410 during setup in debug mode)
411 .  localmode - copy mode for ilocal
412 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
413 during setup in debug mode)
414 -  remotemode - copy mode for iremote
415 
416    Level: intermediate
417 
418    Notes:
419    Leaf indices in ilocal must be unique, otherwise an error occurs.
420 
421    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
422    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
423    PETSc might modify the respective array;
424    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
425 
426    Fortran Notes:
427    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
428 
429    Developer Notes:
430    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
431    This also allows to compare leaf sets of two SFs easily.
432 
433 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
434 @*/
435 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
436 {
437   PetscBool       unique, contiguous;
438   PetscErrorCode  ierr;
439 
440   PetscFunctionBegin;
441   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
442   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
443   if (nleaves > 0) PetscValidPointer(iremote,6);
444   PetscCheckFalse(nroots  < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
445   PetscCheckFalse(nleaves < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
446   PetscCheck(localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
447   PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
448 
449   if (sf->nroots >= 0) { /* Reset only if graph already set */
450     ierr = PetscSFReset(sf);CHKERRQ(ierr);
451   }
452 
453   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
454 
455   sf->nroots  = nroots;
456   sf->nleaves = nleaves;
457 
458   if (localmode == PETSC_COPY_VALUES && ilocal) {
459     PetscInt *tlocal = NULL;
460 
461     ierr = PetscMalloc1(nleaves,&tlocal);CHKERRQ(ierr);
462     ierr = PetscArraycpy(tlocal,ilocal,nleaves);CHKERRQ(ierr);
463     ilocal = tlocal;
464   }
465   if (remotemode == PETSC_COPY_VALUES) {
466     PetscSFNode *tremote = NULL;
467 
468     ierr = PetscMalloc1(nleaves,&tremote);CHKERRQ(ierr);
469     ierr = PetscArraycpy(tremote,iremote,nleaves);CHKERRQ(ierr);
470     iremote = tremote;
471   }
472 
473   if (nleaves && ilocal) {
474     PetscSFNode   work;
475 
476     ierr = PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);CHKERRQ(ierr);
477     ierr = PetscSortedCheckDupsInt(nleaves, ilocal, &unique);CHKERRQ(ierr);
478     unique = PetscNot(unique);
479     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
480     sf->minleaf = ilocal[0];
481     sf->maxleaf = ilocal[nleaves-1];
482     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
483   } else {
484     sf->minleaf = 0;
485     sf->maxleaf = nleaves - 1;
486     unique      = PETSC_TRUE;
487     contiguous  = PETSC_TRUE;
488   }
489 
490   if (contiguous) {
491     if (localmode == PETSC_USE_POINTER) {
492       ilocal = NULL;
493     } else {
494       ierr = PetscFree(ilocal);CHKERRQ(ierr);
495     }
496   }
497   sf->mine            = ilocal;
498   if (localmode == PETSC_USE_POINTER) {
499     sf->mine_alloc    = NULL;
500   } else {
501     sf->mine_alloc    = ilocal;
502   }
503   sf->remote          = iremote;
504   if (remotemode == PETSC_USE_POINTER) {
505     sf->remote_alloc  = NULL;
506   } else {
507     sf->remote_alloc  = iremote;
508   }
509   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
510   sf->graphset = PETSC_TRUE;
511   PetscFunctionReturn(0);
512 }
513 
514 /*@
515   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
516 
517   Collective
518 
519   Input Parameters:
520 + sf      - The PetscSF
521 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
522 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
523 
524   Notes:
525   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
526   n and N are local and global sizes of x respectively.
527 
528   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
529   sequential vectors y on all ranks.
530 
531   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
532   sequential vector y on rank 0.
533 
534   In above cases, entries of x are roots and entries of y are leaves.
535 
536   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
537   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
538   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
539   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
540   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
541 
542   In this case, roots and leaves are symmetric.
543 
544   Level: intermediate
545  @*/
546 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
547 {
548   MPI_Comm       comm;
549   PetscInt       n,N,res[2];
550   PetscMPIInt    rank,size;
551   PetscSFType    type;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
556   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
557   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
558   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
559   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
560   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
561 
562   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
563     type = PETSCSFALLTOALL;
564     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
565     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
566     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
567     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
568   } else {
569     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
570     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
571     res[0] = n;
572     res[1] = -n;
573     /* Check if n are same over all ranks so that we can optimize it */
574     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr);
575     if (res[0] == -res[1]) { /* same n */
576       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
577     } else {
578       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
579     }
580     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
581   }
582   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
583 
584   sf->pattern = pattern;
585   sf->mine    = NULL; /* Contiguous */
586 
587   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
588      Also set other easy stuff.
589    */
590   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
591     sf->nleaves      = N;
592     sf->nroots       = n;
593     sf->nranks       = size;
594     sf->minleaf      = 0;
595     sf->maxleaf      = N - 1;
596   } else if (pattern == PETSCSF_PATTERN_GATHER) {
597     sf->nleaves      = rank ? 0 : N;
598     sf->nroots       = n;
599     sf->nranks       = rank ? 0 : size;
600     sf->minleaf      = 0;
601     sf->maxleaf      = rank ? -1 : N - 1;
602   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
603     sf->nleaves      = size;
604     sf->nroots       = size;
605     sf->nranks       = size;
606     sf->minleaf      = 0;
607     sf->maxleaf      = size - 1;
608   }
609   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
610   sf->graphset = PETSC_TRUE;
611   PetscFunctionReturn(0);
612 }
613 
614 /*@
615    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
616 
617    Collective
618 
619    Input Parameter:
620 .  sf - star forest to invert
621 
622    Output Parameter:
623 .  isf - inverse of sf
624 
625    Level: advanced
626 
627    Notes:
628    All roots must have degree 1.
629 
630    The local space may be a permutation, but cannot be sparse.
631 
632 .seealso: PetscSFSetGraph()
633 @*/
634 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
635 {
636   PetscErrorCode ierr;
637   PetscMPIInt    rank;
638   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
639   const PetscInt *ilocal;
640   PetscSFNode    *roots,*leaves;
641 
642   PetscFunctionBegin;
643   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
644   PetscSFCheckGraphSet(sf,1);
645   PetscValidPointer(isf,2);
646 
647   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
648   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
649 
650   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
651   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
652   for (i=0; i<maxlocal; i++) {
653     leaves[i].rank  = rank;
654     leaves[i].index = i;
655   }
656   for (i=0; i <nroots; i++) {
657     roots[i].rank  = -1;
658     roots[i].index = -1;
659   }
660   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
661   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
662 
663   /* Check whether our leaves are sparse */
664   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
665   if (count == nroots) newilocal = NULL;
666   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
667     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
668     for (i=0,count=0; i<nroots; i++) {
669       if (roots[i].rank >= 0) {
670         newilocal[count]   = i;
671         roots[count].rank  = roots[i].rank;
672         roots[count].index = roots[i].index;
673         count++;
674       }
675     }
676   }
677 
678   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
679   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
680   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 /*@
685    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
686 
687    Collective
688 
689    Input Parameters:
690 +  sf - communication object to duplicate
691 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
692 
693    Output Parameter:
694 .  newsf - new communication object
695 
696    Level: beginner
697 
698 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
699 @*/
700 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
701 {
702   PetscSFType    type;
703   PetscErrorCode ierr;
704   MPI_Datatype   dtype=MPIU_SCALAR;
705 
706   PetscFunctionBegin;
707   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
708   PetscValidLogicalCollectiveEnum(sf,opt,2);
709   PetscValidPointer(newsf,3);
710   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
711   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
712   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
713   if (opt == PETSCSF_DUPLICATE_GRAPH) {
714     PetscSFCheckGraphSet(sf,1);
715     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
716       PetscInt          nroots,nleaves;
717       const PetscInt    *ilocal;
718       const PetscSFNode *iremote;
719       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
720       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
721     } else {
722       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
723     }
724   }
725   /* Since oldtype is committed, so is newtype, according to MPI */
726   if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);}
727   (*newsf)->vscat.bs     = sf->vscat.bs;
728   (*newsf)->vscat.unit   = dtype;
729   (*newsf)->vscat.to_n   = sf->vscat.to_n;
730   (*newsf)->vscat.from_n = sf->vscat.from_n;
731   /* Do not copy lsf. Build it on demand since it is rarely used */
732 
733 #if defined(PETSC_HAVE_DEVICE)
734   (*newsf)->backend              = sf->backend;
735   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
736   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
737   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
738 #endif
739   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
740   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
741   PetscFunctionReturn(0);
742 }
743 
744 /*@C
745    PetscSFGetGraph - Get the graph specifying a parallel star forest
746 
747    Not Collective
748 
749    Input Parameter:
750 .  sf - star forest
751 
752    Output Parameters:
753 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
754 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
755 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
756 -  iremote - remote locations of root vertices for each leaf on the current process
757 
758    Notes:
759      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
760 
761      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
762      see its manpage for details.
763 
764    Fortran Notes:
765      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
766      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
767 
768      To check for a NULL ilocal use
769 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
770 
771    Level: intermediate
772 
773 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
774 @*/
775 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
776 {
777   PetscErrorCode ierr;
778 
779   PetscFunctionBegin;
780   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
781   if (sf->ops->GetGraph) {
782     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
783   } else {
784     if (nroots) *nroots = sf->nroots;
785     if (nleaves) *nleaves = sf->nleaves;
786     if (ilocal) *ilocal = sf->mine;
787     if (iremote) *iremote = sf->remote;
788   }
789   PetscFunctionReturn(0);
790 }
791 
792 /*@
793    PetscSFGetLeafRange - Get the active leaf ranges
794 
795    Not Collective
796 
797    Input Parameter:
798 .  sf - star forest
799 
800    Output Parameters:
801 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
802 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
803 
804    Level: developer
805 
806 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
807 @*/
808 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
809 {
810   PetscFunctionBegin;
811   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
812   PetscSFCheckGraphSet(sf,1);
813   if (minleaf) *minleaf = sf->minleaf;
814   if (maxleaf) *maxleaf = sf->maxleaf;
815   PetscFunctionReturn(0);
816 }
817 
818 /*@C
819    PetscSFViewFromOptions - View from Options
820 
821    Collective on PetscSF
822 
823    Input Parameters:
824 +  A - the star forest
825 .  obj - Optional object
826 -  name - command line option
827 
828    Level: intermediate
829 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
830 @*/
831 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
832 {
833   PetscErrorCode ierr;
834 
835   PetscFunctionBegin;
836   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
837   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*@C
842    PetscSFView - view a star forest
843 
844    Collective
845 
846    Input Parameters:
847 +  sf - star forest
848 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
849 
850    Level: beginner
851 
852 .seealso: PetscSFCreate(), PetscSFSetGraph()
853 @*/
854 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
855 {
856   PetscErrorCode    ierr;
857   PetscBool         iascii;
858   PetscViewerFormat format;
859 
860   PetscFunctionBegin;
861   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
862   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
863   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
864   PetscCheckSameComm(sf,1,viewer,2);
865   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
866   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
867   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
868     PetscMPIInt rank;
869     PetscInt    ii,i,j;
870 
871     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
872     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
873     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
874       if (!sf->graphset) {
875         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
876         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
877         PetscFunctionReturn(0);
878       }
879       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
880       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
881       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
882       for (i=0; i<sf->nleaves; i++) {
883         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
884       }
885       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
886       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
887       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
888         PetscMPIInt *tmpranks,*perm;
889         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
890         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
891         for (i=0; i<sf->nranks; i++) perm[i] = i;
892         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
894         for (ii=0; ii<sf->nranks; ii++) {
895           i = perm[ii];
896           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
897           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
898             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
899           }
900         }
901         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
902       }
903       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
904       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
905     }
906     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
907   }
908   if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
909   PetscFunctionReturn(0);
910 }
911 
912 /*@C
913    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
914 
915    Not Collective
916 
917    Input Parameter:
918 .  sf - star forest
919 
920    Output Parameters:
921 +  nranks - number of ranks referenced by local part
922 .  ranks - array of ranks
923 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
924 .  rmine - concatenated array holding local indices referencing each remote rank
925 -  rremote - concatenated array holding remote indices referenced for each remote rank
926 
927    Level: developer
928 
929 .seealso: PetscSFGetLeafRanks()
930 @*/
931 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
932 {
933   PetscErrorCode ierr;
934 
935   PetscFunctionBegin;
936   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
937   PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
938   if (sf->ops->GetRootRanks) {
939     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
940   } else {
941     /* The generic implementation */
942     if (nranks)  *nranks  = sf->nranks;
943     if (ranks)   *ranks   = sf->ranks;
944     if (roffset) *roffset = sf->roffset;
945     if (rmine)   *rmine   = sf->rmine;
946     if (rremote) *rremote = sf->rremote;
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 /*@C
952    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
953 
954    Not Collective
955 
956    Input Parameter:
957 .  sf - star forest
958 
959    Output Parameters:
960 +  niranks - number of leaf ranks referencing roots on this process
961 .  iranks - array of ranks
962 .  ioffset - offset in irootloc for each rank (length niranks+1)
963 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
964 
965    Level: developer
966 
967 .seealso: PetscSFGetRootRanks()
968 @*/
969 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
970 {
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
975   PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
976   if (sf->ops->GetLeafRanks) {
977     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
978   } else {
979     PetscSFType type;
980     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
981     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
987   PetscInt i;
988   for (i=0; i<n; i++) {
989     if (needle == list[i]) return PETSC_TRUE;
990   }
991   return PETSC_FALSE;
992 }
993 
994 /*@C
995    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
996 
997    Collective
998 
999    Input Parameters:
1000 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
1001 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
1002 
1003    Level: developer
1004 
1005 .seealso: PetscSFGetRootRanks()
1006 @*/
1007 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1008 {
1009   PetscErrorCode     ierr;
1010   PetscTable         table;
1011   PetscTablePosition pos;
1012   PetscMPIInt        size,groupsize,*groupranks;
1013   PetscInt           *rcount,*ranks;
1014   PetscInt           i, irank = -1,orank = -1;
1015 
1016   PetscFunctionBegin;
1017   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1018   PetscSFCheckGraphSet(sf,1);
1019   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
1020   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1021   for (i=0; i<sf->nleaves; i++) {
1022     /* Log 1-based rank */
1023     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1024   }
1025   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1026   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1027   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1028   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1029   for (i=0; i<sf->nranks; i++) {
1030     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1031     ranks[i]--;             /* Convert back to 0-based */
1032   }
1033   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1034 
1035   /* We expect that dgroup is reliably "small" while nranks could be large */
1036   {
1037     MPI_Group group = MPI_GROUP_NULL;
1038     PetscMPIInt *dgroupranks;
1039     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1040     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr);
1041     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1042     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1043     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1044     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);}
1045     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1046     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1047   }
1048 
1049   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1050   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1051     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1052       if (InList(ranks[i],groupsize,groupranks)) break;
1053     }
1054     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1055       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1056     }
1057     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1058       PetscInt tmprank,tmpcount;
1059 
1060       tmprank             = ranks[i];
1061       tmpcount            = rcount[i];
1062       ranks[i]            = ranks[sf->ndranks];
1063       rcount[i]           = rcount[sf->ndranks];
1064       ranks[sf->ndranks]  = tmprank;
1065       rcount[sf->ndranks] = tmpcount;
1066       sf->ndranks++;
1067     }
1068   }
1069   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1070   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1071   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1072   sf->roffset[0] = 0;
1073   for (i=0; i<sf->nranks; i++) {
1074     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1075     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1076     rcount[i]        = 0;
1077   }
1078   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1079     /* short circuit */
1080     if (orank != sf->remote[i].rank) {
1081       /* Search for index of iremote[i].rank in sf->ranks */
1082       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1083       if (irank < 0) {
1084         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1085         if (irank >= 0) irank += sf->ndranks;
1086       }
1087       orank = sf->remote[i].rank;
1088     }
1089     PetscCheckFalse(irank < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1090     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1091     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1092     rcount[irank]++;
1093   }
1094   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /*@C
1099    PetscSFGetGroups - gets incoming and outgoing process groups
1100 
1101    Collective
1102 
1103    Input Parameter:
1104 .  sf - star forest
1105 
1106    Output Parameters:
1107 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1108 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1109 
1110    Level: developer
1111 
1112 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1113 @*/
1114 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1115 {
1116   PetscErrorCode ierr;
1117   MPI_Group      group = MPI_GROUP_NULL;
1118 
1119   PetscFunctionBegin;
1120   PetscCheckFalse(sf->nranks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1121   if (sf->ingroup == MPI_GROUP_NULL) {
1122     PetscInt       i;
1123     const PetscInt *indegree;
1124     PetscMPIInt    rank,*outranks,*inranks;
1125     PetscSFNode    *remote;
1126     PetscSF        bgcount;
1127 
1128     /* Compute the number of incoming ranks */
1129     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1130     for (i=0; i<sf->nranks; i++) {
1131       remote[i].rank  = sf->ranks[i];
1132       remote[i].index = 0;
1133     }
1134     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1135     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1136     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1137     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1138     /* Enumerate the incoming ranks */
1139     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1140     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1141     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1142     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1143     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1144     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1145     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr);
1146     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1147     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1148     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1149   }
1150   *incoming = sf->ingroup;
1151 
1152   if (sf->outgroup == MPI_GROUP_NULL) {
1153     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1154     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr);
1155     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1156   }
1157   *outgoing = sf->outgroup;
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 /*@
1162    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1163 
1164    Collective
1165 
1166    Input Parameter:
1167 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1168 
1169    Output Parameter:
1170 .  multi - star forest with split roots, such that each root has degree exactly 1
1171 
1172    Level: developer
1173 
1174    Notes:
1175 
1176    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1177    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1178    edge, it is a candidate for future optimization that might involve its removal.
1179 
1180 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1181 @*/
1182 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1183 {
1184   PetscErrorCode ierr;
1185 
1186   PetscFunctionBegin;
1187   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1188   PetscValidPointer(multi,2);
1189   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1190     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1191     *multi = sf->multi;
1192     sf->multi->multi = sf->multi;
1193     PetscFunctionReturn(0);
1194   }
1195   if (!sf->multi) {
1196     const PetscInt *indegree;
1197     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1198     PetscSFNode    *remote;
1199     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1200     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1201     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1202     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1203     inoffset[0] = 0;
1204     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1205     for (i=0; i<maxlocal; i++) outones[i] = 1;
1206     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1207     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1208     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1209     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1210       for (i=0; i<sf->nroots; i++) {
1211         PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1212       }
1213     }
1214     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1215     for (i=0; i<sf->nleaves; i++) {
1216       remote[i].rank  = sf->remote[i].rank;
1217       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1218     }
1219     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1220     sf->multi->multi = sf->multi;
1221     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1222     if (sf->rankorder) {        /* Sort the ranks */
1223       PetscMPIInt rank;
1224       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1225       PetscSFNode *newremote;
1226       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1227       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1228       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1229       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1230       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1231       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1232       /* Sort the incoming ranks at each vertex, build the inverse map */
1233       for (i=0; i<sf->nroots; i++) {
1234         PetscInt j;
1235         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1236         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1237         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1238       }
1239       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1240       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1241       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1242       for (i=0; i<sf->nleaves; i++) {
1243         newremote[i].rank  = sf->remote[i].rank;
1244         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1245       }
1246       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1247       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1248     }
1249     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1250   }
1251   *multi = sf->multi;
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 /*@C
1256    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1257 
1258    Collective
1259 
1260    Input Parameters:
1261 +  sf - original star forest
1262 .  nselected  - number of selected roots on this process
1263 -  selected   - indices of the selected roots on this process
1264 
1265    Output Parameter:
1266 .  esf - new star forest
1267 
1268    Level: advanced
1269 
1270    Note:
1271    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1272    be done by calling PetscSFGetGraph().
1273 
1274 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1275 @*/
1276 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1277 {
1278   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1279   const PetscInt    *ilocal;
1280   signed char       *rootdata,*leafdata,*leafmem;
1281   const PetscSFNode *iremote;
1282   PetscSFNode       *new_iremote;
1283   MPI_Comm          comm;
1284   PetscErrorCode    ierr;
1285 
1286   PetscFunctionBegin;
1287   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1288   PetscSFCheckGraphSet(sf,1);
1289   if (nselected) PetscValidPointer(selected,3);
1290   PetscValidPointer(esf,4);
1291 
1292   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1293   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1294   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1295   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1296 
1297   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1298     PetscBool dups;
1299     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1300     PetscCheckFalse(dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1301     for (i=0; i<nselected; i++)
1302       PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1303   }
1304 
1305   if (sf->ops->CreateEmbeddedRootSF) {
1306     ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1307   } else {
1308     /* A generic version of creating embedded sf */
1309     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1310     maxlocal = maxleaf - minleaf + 1;
1311     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1312     leafdata = leafmem - minleaf;
1313     /* Tag selected roots and bcast to leaves */
1314     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1315     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1316     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1317 
1318     /* Build esf with leaves that are still connected */
1319     esf_nleaves = 0;
1320     for (i=0; i<nleaves; i++) {
1321       j = ilocal ? ilocal[i] : i;
1322       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1323          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1324       */
1325       esf_nleaves += (leafdata[j] ? 1 : 0);
1326     }
1327     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1328     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1329     for (i=n=0; i<nleaves; i++) {
1330       j = ilocal ? ilocal[i] : i;
1331       if (leafdata[j]) {
1332         new_ilocal[n]        = j;
1333         new_iremote[n].rank  = iremote[i].rank;
1334         new_iremote[n].index = iremote[i].index;
1335         ++n;
1336       }
1337     }
1338     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1339     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1340     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1341     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1342   }
1343   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 /*@C
1348   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1349 
1350   Collective
1351 
1352   Input Parameters:
1353 + sf - original star forest
1354 . nselected  - number of selected leaves on this process
1355 - selected   - indices of the selected leaves on this process
1356 
1357   Output Parameter:
1358 .  newsf - new star forest
1359 
1360   Level: advanced
1361 
1362 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1363 @*/
1364 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1365 {
1366   const PetscSFNode *iremote;
1367   PetscSFNode       *new_iremote;
1368   const PetscInt    *ilocal;
1369   PetscInt          i,nroots,*leaves,*new_ilocal;
1370   MPI_Comm          comm;
1371   PetscErrorCode    ierr;
1372 
1373   PetscFunctionBegin;
1374   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1375   PetscSFCheckGraphSet(sf,1);
1376   if (nselected) PetscValidPointer(selected,3);
1377   PetscValidPointer(newsf,4);
1378 
1379   /* Uniq selected[] and put results in leaves[] */
1380   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1381   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1382   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1383   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1384   PetscCheckFalse(nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1385 
1386   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1387   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1388     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1389   } else {
1390     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1391     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1392     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1393     for (i=0; i<nselected; ++i) {
1394       const PetscInt l     = leaves[i];
1395       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1396       new_iremote[i].rank  = iremote[l].rank;
1397       new_iremote[i].index = iremote[l].index;
1398     }
1399     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1400     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1401   }
1402   ierr = PetscFree(leaves);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /*@C
1407    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1408 
1409    Collective on PetscSF
1410 
1411    Input Parameters:
1412 +  sf - star forest on which to communicate
1413 .  unit - data type associated with each node
1414 .  rootdata - buffer to broadcast
1415 -  op - operation to use for reduction
1416 
1417    Output Parameter:
1418 .  leafdata - buffer to be reduced with values from each leaf's respective root
1419 
1420    Level: intermediate
1421 
1422    Notes:
1423     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1424     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1425     use PetscSFBcastWithMemTypeBegin() instead.
1426 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1427 @*/
1428 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1429 {
1430   PetscErrorCode ierr;
1431   PetscMemType   rootmtype,leafmtype;
1432 
1433   PetscFunctionBegin;
1434   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1435   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1436   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1437   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1438   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1439   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1440   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 /*@C
1445    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1446 
1447    Collective on PetscSF
1448 
1449    Input Parameters:
1450 +  sf - star forest on which to communicate
1451 .  unit - data type associated with each node
1452 .  rootmtype - memory type of rootdata
1453 .  rootdata - buffer to broadcast
1454 .  leafmtype - memory type of leafdata
1455 -  op - operation to use for reduction
1456 
1457    Output Parameter:
1458 .  leafdata - buffer to be reduced with values from each leaf's respective root
1459 
1460    Level: intermediate
1461 
1462 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1463 @*/
1464 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1465 {
1466   PetscErrorCode ierr;
1467 
1468   PetscFunctionBegin;
1469   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1470   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1471   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1472   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1473   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 /*@C
1478    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1479 
1480    Collective
1481 
1482    Input Parameters:
1483 +  sf - star forest
1484 .  unit - data type
1485 .  rootdata - buffer to broadcast
1486 -  op - operation to use for reduction
1487 
1488    Output Parameter:
1489 .  leafdata - buffer to be reduced with values from each leaf's respective root
1490 
1491    Level: intermediate
1492 
1493 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1494 @*/
1495 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1496 {
1497   PetscErrorCode ierr;
1498 
1499   PetscFunctionBegin;
1500   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1501   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1502   ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1503   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 /*@C
1508    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1509 
1510    Collective
1511 
1512    Input Parameters:
1513 +  sf - star forest
1514 .  unit - data type
1515 .  leafdata - values to reduce
1516 -  op - reduction operation
1517 
1518    Output Parameter:
1519 .  rootdata - result of reduction of values from all leaves of each root
1520 
1521    Level: intermediate
1522 
1523    Notes:
1524     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1525     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1526     use PetscSFReduceWithMemTypeBegin() instead.
1527 
1528 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1529 @*/
1530 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1531 {
1532   PetscErrorCode ierr;
1533   PetscMemType   rootmtype,leafmtype;
1534 
1535   PetscFunctionBegin;
1536   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1537   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1538   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1539   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1540   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1541   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1542   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 /*@C
1547    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1548 
1549    Collective
1550 
1551    Input Parameters:
1552 +  sf - star forest
1553 .  unit - data type
1554 .  leafmtype - memory type of leafdata
1555 .  leafdata - values to reduce
1556 .  rootmtype - memory type of rootdata
1557 -  op - reduction operation
1558 
1559    Output Parameter:
1560 .  rootdata - result of reduction of values from all leaves of each root
1561 
1562    Level: intermediate
1563 
1564 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1565 @*/
1566 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1567 {
1568   PetscErrorCode ierr;
1569 
1570   PetscFunctionBegin;
1571   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1572   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1573   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1574   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1575   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 /*@C
1580    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1581 
1582    Collective
1583 
1584    Input Parameters:
1585 +  sf - star forest
1586 .  unit - data type
1587 .  leafdata - values to reduce
1588 -  op - reduction operation
1589 
1590    Output Parameter:
1591 .  rootdata - result of reduction of values from all leaves of each root
1592 
1593    Level: intermediate
1594 
1595 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1596 @*/
1597 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1598 {
1599   PetscErrorCode ierr;
1600 
1601   PetscFunctionBegin;
1602   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1603   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1604   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1605   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 /*@C
1610    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1611 
1612    Collective
1613 
1614    Input Parameters:
1615 +  sf - star forest
1616 .  unit - data type
1617 .  leafdata - leaf values to use in reduction
1618 -  op - operation to use for reduction
1619 
1620    Output Parameters:
1621 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1622 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1623 
1624    Level: advanced
1625 
1626    Note:
1627    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1628    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1629    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1630    integers.
1631 
1632 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1633 @*/
1634 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1635 {
1636   PetscErrorCode ierr;
1637   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1638 
1639   PetscFunctionBegin;
1640   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1641   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1642   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1643   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1644   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1645   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1646   PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1647   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1648   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 /*@C
1653    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1654 
1655    Collective
1656 
1657    Input Parameters:
1658 +  sf - star forest
1659 .  unit - data type
1660 .  rootmtype - memory type of rootdata
1661 .  leafmtype - memory type of leafdata
1662 .  leafdata - leaf values to use in reduction
1663 .  leafupdatemtype - memory type of leafupdate
1664 -  op - operation to use for reduction
1665 
1666    Output Parameters:
1667 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1668 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1669 
1670    Level: advanced
1671 
1672    Note: See PetscSFFetchAndOpBegin() for more details.
1673 
1674 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1675 @*/
1676 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1677 {
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1682   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1683   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1684   PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1685   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1686   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 /*@C
1691    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1692 
1693    Collective
1694 
1695    Input Parameters:
1696 +  sf - star forest
1697 .  unit - data type
1698 .  leafdata - leaf values to use in reduction
1699 -  op - operation to use for reduction
1700 
1701    Output Parameters:
1702 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1703 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1704 
1705    Level: advanced
1706 
1707 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1708 @*/
1709 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1710 {
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1715   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1716   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1717   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 /*@C
1722    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1723 
1724    Collective
1725 
1726    Input Parameter:
1727 .  sf - star forest
1728 
1729    Output Parameter:
1730 .  degree - degree of each root vertex
1731 
1732    Level: advanced
1733 
1734    Notes:
1735    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1736 
1737 .seealso: PetscSFGatherBegin()
1738 @*/
1739 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1740 {
1741   PetscErrorCode ierr;
1742 
1743   PetscFunctionBegin;
1744   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1745   PetscSFCheckGraphSet(sf,1);
1746   PetscValidPointer(degree,2);
1747   if (!sf->degreeknown) {
1748     PetscInt i, nroots = sf->nroots, maxlocal;
1749     PetscCheckFalse(sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1750     maxlocal = sf->maxleaf-sf->minleaf+1;
1751     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1752     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1753     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1754     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1755     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1756   }
1757   *degree = NULL;
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 /*@C
1762    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1763 
1764    Collective
1765 
1766    Input Parameter:
1767 .  sf - star forest
1768 
1769    Output Parameter:
1770 .  degree - degree of each root vertex
1771 
1772    Level: developer
1773 
1774    Notes:
1775    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1776 
1777 .seealso:
1778 @*/
1779 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1780 {
1781   PetscErrorCode ierr;
1782 
1783   PetscFunctionBegin;
1784   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1785   PetscSFCheckGraphSet(sf,1);
1786   PetscValidPointer(degree,2);
1787   if (!sf->degreeknown) {
1788     PetscCheckFalse(!sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1789     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1790     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1791     sf->degreeknown = PETSC_TRUE;
1792   }
1793   *degree = sf->degree;
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 /*@C
1798    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1799    Each multi-root is assigned index of the corresponding original root.
1800 
1801    Collective
1802 
1803    Input Parameters:
1804 +  sf - star forest
1805 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1806 
1807    Output Parameters:
1808 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1809 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1810 
1811    Level: developer
1812 
1813    Notes:
1814    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1815 
1816 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1817 @*/
1818 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1819 {
1820   PetscSF             msf;
1821   PetscInt            i, j, k, nroots, nmroots;
1822   PetscErrorCode      ierr;
1823 
1824   PetscFunctionBegin;
1825   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1826   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1827   if (nroots) PetscValidIntPointer(degree,2);
1828   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1829   PetscValidPointer(multiRootsOrigNumbering,4);
1830   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1831   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1832   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1833   for (i=0,j=0,k=0; i<nroots; i++) {
1834     if (!degree[i]) continue;
1835     for (j=0; j<degree[i]; j++,k++) {
1836       (*multiRootsOrigNumbering)[k] = i;
1837     }
1838   }
1839   PetscCheckFalse(k != nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1840   if (nMultiRoots) *nMultiRoots = nmroots;
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 /*@C
1845    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1846 
1847    Collective
1848 
1849    Input Parameters:
1850 +  sf - star forest
1851 .  unit - data type
1852 -  leafdata - leaf data to gather to roots
1853 
1854    Output Parameter:
1855 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1856 
1857    Level: intermediate
1858 
1859 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1860 @*/
1861 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1862 {
1863   PetscErrorCode ierr;
1864   PetscSF        multi = NULL;
1865 
1866   PetscFunctionBegin;
1867   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1868   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1869   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1870   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 /*@C
1875    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1876 
1877    Collective
1878 
1879    Input Parameters:
1880 +  sf - star forest
1881 .  unit - data type
1882 -  leafdata - leaf data to gather to roots
1883 
1884    Output Parameter:
1885 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1886 
1887    Level: intermediate
1888 
1889 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1890 @*/
1891 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1892 {
1893   PetscErrorCode ierr;
1894   PetscSF        multi = NULL;
1895 
1896   PetscFunctionBegin;
1897   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1898   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1899   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 /*@C
1904    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1905 
1906    Collective
1907 
1908    Input Parameters:
1909 +  sf - star forest
1910 .  unit - data type
1911 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1912 
1913    Output Parameter:
1914 .  leafdata - leaf data to be update with personal data from each respective root
1915 
1916    Level: intermediate
1917 
1918 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1919 @*/
1920 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1921 {
1922   PetscErrorCode ierr;
1923   PetscSF        multi = NULL;
1924 
1925   PetscFunctionBegin;
1926   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1927   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1928   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1929   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 /*@C
1934    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1935 
1936    Collective
1937 
1938    Input Parameters:
1939 +  sf - star forest
1940 .  unit - data type
1941 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1942 
1943    Output Parameter:
1944 .  leafdata - leaf data to be update with personal data from each respective root
1945 
1946    Level: intermediate
1947 
1948 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1949 @*/
1950 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1951 {
1952   PetscErrorCode ierr;
1953   PetscSF        multi = NULL;
1954 
1955   PetscFunctionBegin;
1956   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1957   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1958   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1963 {
1964   PetscInt        i, n, nleaves;
1965   const PetscInt *ilocal = NULL;
1966   PetscHSetI      seen;
1967   PetscErrorCode  ierr;
1968 
1969   PetscFunctionBegin;
1970   if (PetscDefined(USE_DEBUG)) {
1971     ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1972     ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1973     for (i = 0; i < nleaves; i++) {
1974       const PetscInt leaf = ilocal ? ilocal[i] : i;
1975       ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1976     }
1977     ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1978     PetscCheckFalse(n != nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1979     ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1980   }
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 /*@
1985   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1986 
1987   Input Parameters:
1988 + sfA - The first PetscSF
1989 - sfB - The second PetscSF
1990 
1991   Output Parameters:
1992 . sfBA - The composite SF
1993 
1994   Level: developer
1995 
1996   Notes:
1997   Currently, the two SFs must be defined on congruent communicators and they must be true star
1998   forests, i.e. the same leaf is not connected with different roots.
1999 
2000   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
2001   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
2002   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
2003   Bcast on sfA, then a Bcast on sfB, on connected nodes.
2004 
2005 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
2006 @*/
2007 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2008 {
2009   PetscErrorCode    ierr;
2010   const PetscSFNode *remotePointsA,*remotePointsB;
2011   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
2012   const PetscInt    *localPointsA,*localPointsB;
2013   PetscInt          *localPointsBA;
2014   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
2015   PetscBool         denseB;
2016 
2017   PetscFunctionBegin;
2018   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2019   PetscSFCheckGraphSet(sfA,1);
2020   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2021   PetscSFCheckGraphSet(sfB,2);
2022   PetscCheckSameComm(sfA,1,sfB,2);
2023   PetscValidPointer(sfBA,3);
2024   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2025   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2026 
2027   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
2028   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
2029   if (localPointsA) {
2030     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
2031     for (i=0; i<numRootsB; i++) {
2032       reorderedRemotePointsA[i].rank = -1;
2033       reorderedRemotePointsA[i].index = -1;
2034     }
2035     for (i=0; i<numLeavesA; i++) {
2036       if (localPointsA[i] >= numRootsB) continue;
2037       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2038     }
2039     remotePointsA = reorderedRemotePointsA;
2040   }
2041   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
2042   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
2043   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2044   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2045   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2046 
2047   denseB = (PetscBool)!localPointsB;
2048   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2049     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2050     else numLeavesBA++;
2051   }
2052   if (denseB) {
2053     localPointsBA  = NULL;
2054     remotePointsBA = leafdataB;
2055   } else {
2056     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
2057     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
2058     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2059       const PetscInt l = localPointsB ? localPointsB[i] : i;
2060 
2061       if (leafdataB[l-minleaf].rank == -1) continue;
2062       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2063       localPointsBA[numLeavesBA] = l;
2064       numLeavesBA++;
2065     }
2066     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2067   }
2068   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2069   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2070   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 /*@
2075   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2076 
2077   Input Parameters:
2078 + sfA - The first PetscSF
2079 - sfB - The second PetscSF
2080 
2081   Output Parameters:
2082 . sfBA - The composite SF.
2083 
2084   Level: developer
2085 
2086   Notes:
2087   Currently, the two SFs must be defined on congruent communicators and they must be true star
2088   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2089   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2090 
2091   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2092   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2093   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2094   a Reduce on sfB, on connected roots.
2095 
2096 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2097 @*/
2098 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2099 {
2100   PetscErrorCode    ierr;
2101   const PetscSFNode *remotePointsA,*remotePointsB;
2102   PetscSFNode       *remotePointsBA;
2103   const PetscInt    *localPointsA,*localPointsB;
2104   PetscSFNode       *reorderedRemotePointsA = NULL;
2105   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2106   MPI_Op            op;
2107 #if defined(PETSC_USE_64BIT_INDICES)
2108   PetscBool         iswin;
2109 #endif
2110 
2111   PetscFunctionBegin;
2112   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2113   PetscSFCheckGraphSet(sfA,1);
2114   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2115   PetscSFCheckGraphSet(sfB,2);
2116   PetscCheckSameComm(sfA,1,sfB,2);
2117   PetscValidPointer(sfBA,3);
2118   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2119   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2120 
2121   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
2122   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
2123 
2124   /* TODO: Check roots of sfB have degree of 1 */
2125   /* Once we implement it, we can replace the MPI_MAXLOC
2126      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2127      We use MPI_MAXLOC only to have a deterministic output from this routine if
2128      the root condition is not meet.
2129    */
2130   op = MPI_MAXLOC;
2131 #if defined(PETSC_USE_64BIT_INDICES)
2132   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2133   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2134   if (iswin) op = MPI_REPLACE;
2135 #endif
2136 
2137   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2138   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2139   for (i=0; i<maxleaf - minleaf + 1; i++) {
2140     reorderedRemotePointsA[i].rank = -1;
2141     reorderedRemotePointsA[i].index = -1;
2142   }
2143   if (localPointsA) {
2144     for (i=0; i<numLeavesA; i++) {
2145       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2146       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2147     }
2148   } else {
2149     for (i=0; i<numLeavesA; i++) {
2150       if (i > maxleaf || i < minleaf) continue;
2151       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2152     }
2153   }
2154 
2155   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2156   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2157   for (i=0; i<numRootsB; i++) {
2158     remotePointsBA[i].rank = -1;
2159     remotePointsBA[i].index = -1;
2160   }
2161 
2162   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2163   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2164   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2165   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2166     if (remotePointsBA[i].rank == -1) continue;
2167     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2168     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2169     localPointsBA[numLeavesBA] = i;
2170     numLeavesBA++;
2171   }
2172   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2173   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2174   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 /*
2179   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2180 
2181   Input Parameters:
2182 . sf - The global PetscSF
2183 
2184   Output Parameters:
2185 . out - The local PetscSF
2186  */
2187 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2188 {
2189   MPI_Comm           comm;
2190   PetscMPIInt        myrank;
2191   const PetscInt     *ilocal;
2192   const PetscSFNode  *iremote;
2193   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2194   PetscSFNode        *liremote;
2195   PetscSF            lsf;
2196   PetscErrorCode     ierr;
2197 
2198   PetscFunctionBegin;
2199   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2200   if (sf->ops->CreateLocalSF) {
2201     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2202   } else {
2203     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2204     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2205     ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr);
2206 
2207     /* Find out local edges and build a local SF */
2208     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2209     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2210     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2211     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2212 
2213     for (i=j=0; i<nleaves; i++) {
2214       if (iremote[i].rank == (PetscInt)myrank) {
2215         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2216         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2217         liremote[j].index = iremote[i].index;
2218         j++;
2219       }
2220     }
2221     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2222     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2223     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2224     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2225     *out = lsf;
2226   }
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2231 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2232 {
2233   PetscErrorCode     ierr;
2234   PetscMemType       rootmtype,leafmtype;
2235 
2236   PetscFunctionBegin;
2237   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2238   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2239   ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2240   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2241   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2242   if (sf->ops->BcastToZero) {
2243     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2244   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2245   ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2246   PetscFunctionReturn(0);
2247 }
2248 
2249