xref: /petsc/src/vec/is/sf/interface/sf.c (revision 72bf85985f2a1c6c09644c9c2d5850c32b835aeb)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscErrorCode ierr;
57   PetscSF        b;
58 
59   PetscFunctionBegin;
60   PetscValidPointer(sf,2);
61   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
62 
63   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
64 
65   b->nroots    = -1;
66   b->nleaves   = -1;
67   b->minleaf   = PETSC_MAX_INT;
68   b->maxleaf   = PETSC_MIN_INT;
69   b->nranks    = -1;
70   b->rankorder = PETSC_TRUE;
71   b->ingroup   = MPI_GROUP_NULL;
72   b->outgroup  = MPI_GROUP_NULL;
73   b->graphset  = PETSC_FALSE;
74 #if defined(PETSC_HAVE_DEVICE)
75   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
76   b->use_stream_aware_mpi = PETSC_FALSE;
77   b->unknown_input_stream= PETSC_FALSE;
78   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79     b->backend = PETSCSF_BACKEND_KOKKOS;
80   #elif defined(PETSC_HAVE_CUDA)
81     b->backend = PETSCSF_BACKEND_CUDA;
82   #elif defined(PETSC_HAVE_HIP)
83     b->backend = PETSCSF_BACKEND_HIP;
84   #endif
85 
86   #if defined(PETSC_HAVE_NVSHMEM)
87     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
88     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr);
90     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr);
91   #endif
92 #endif
93   b->vscat.from_n = -1;
94   b->vscat.to_n   = -1;
95   b->vscat.unit   = MPIU_SCALAR;
96  *sf = b;
97   PetscFunctionReturn(0);
98 }
99 
100 /*@
101    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
102 
103    Collective
104 
105    Input Parameter:
106 .  sf - star forest
107 
108    Level: advanced
109 
110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
111 @*/
112 PetscErrorCode PetscSFReset(PetscSF sf)
113 {
114   PetscErrorCode ierr;
115 
116   PetscFunctionBegin;
117   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
118   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
119   sf->nroots   = -1;
120   sf->nleaves  = -1;
121   sf->minleaf  = PETSC_MAX_INT;
122   sf->maxleaf  = PETSC_MIN_INT;
123   sf->mine     = NULL;
124   sf->remote   = NULL;
125   sf->graphset = PETSC_FALSE;
126   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
127   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
128   sf->nranks = -1;
129   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
130   sf->degreeknown = PETSC_FALSE;
131   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
132   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);}
133   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);}
134   if (sf->multi) sf->multi->multi = NULL;
135   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
136   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
137 
138  #if defined(PETSC_HAVE_DEVICE)
139   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
140  #endif
141 
142   sf->setupcalled = PETSC_FALSE;
143   PetscFunctionReturn(0);
144 }
145 
146 /*@C
147    PetscSFSetType - Set the PetscSF communication implementation
148 
149    Collective on PetscSF
150 
151    Input Parameters:
152 +  sf - the PetscSF context
153 -  type - a known method
154 
155    Options Database Key:
156 .  -sf_type <type> - Sets the method; use -help for a list
157    of available methods (for instance, window, basic, neighbor)
158 
159    Notes:
160    See "include/petscsf.h" for available methods (for instance)
161 +    PETSCSFWINDOW - MPI-2/3 one-sided
162 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
163 
164   Level: intermediate
165 
166 .seealso: PetscSFType, PetscSFCreate()
167 @*/
168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
169 {
170   PetscErrorCode ierr,(*r)(PetscSF);
171   PetscBool      match;
172 
173   PetscFunctionBegin;
174   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
175   PetscValidCharPointer(type,2);
176 
177   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
178   if (match) PetscFunctionReturn(0);
179 
180   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
181   PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
182   /* Destroy the previous PetscSF implementation context */
183   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
184   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
185   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
186   ierr = (*r)(sf);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 /*@C
191   PetscSFGetType - Get the PetscSF communication implementation
192 
193   Not Collective
194 
195   Input Parameter:
196 . sf  - the PetscSF context
197 
198   Output Parameter:
199 . type - the PetscSF type name
200 
201   Level: intermediate
202 
203 .seealso: PetscSFSetType(), PetscSFCreate()
204 @*/
205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
206 {
207   PetscFunctionBegin;
208   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
209   PetscValidPointer(type,2);
210   *type = ((PetscObject)sf)->type_name;
211   PetscFunctionReturn(0);
212 }
213 
214 /*@C
215    PetscSFDestroy - destroy star forest
216 
217    Collective
218 
219    Input Parameter:
220 .  sf - address of star forest
221 
222    Level: intermediate
223 
224 .seealso: PetscSFCreate(), PetscSFReset()
225 @*/
226 PetscErrorCode PetscSFDestroy(PetscSF *sf)
227 {
228   PetscErrorCode ierr;
229 
230   PetscFunctionBegin;
231   if (!*sf) PetscFunctionReturn(0);
232   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
233   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
234   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
235   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
236   ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr);
237   if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);}
238   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
243 {
244   PetscInt           i, nleaves;
245   PetscMPIInt        size;
246   const PetscInt    *ilocal;
247   const PetscSFNode *iremote;
248   PetscErrorCode     ierr;
249 
250   PetscFunctionBegin;
251   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
252   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
254   for (i = 0; i < nleaves; i++) {
255     const PetscInt rank = iremote[i].rank;
256     const PetscInt remote = iremote[i].index;
257     const PetscInt leaf = ilocal ? ilocal[i] : i;
258     PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
259     PetscCheckFalse(remote < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
260     PetscCheckFalse(leaf < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 /*@
266    PetscSFSetUp - set up communication structures
267 
268    Collective
269 
270    Input Parameter:
271 .  sf - star forest communication object
272 
273    Level: beginner
274 
275 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
276 @*/
277 PetscErrorCode PetscSFSetUp(PetscSF sf)
278 {
279   PetscErrorCode ierr;
280 
281   PetscFunctionBegin;
282   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
283   PetscSFCheckGraphSet(sf,1);
284   if (sf->setupcalled) PetscFunctionReturn(0);
285   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
286   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
287   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
288   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
289 #if defined(PETSC_HAVE_CUDA)
290   if (sf->backend == PETSCSF_BACKEND_CUDA) {
291     sf->ops->Malloc = PetscSFMalloc_CUDA;
292     sf->ops->Free   = PetscSFFree_CUDA;
293   }
294 #endif
295 #if defined(PETSC_HAVE_HIP)
296   if (sf->backend == PETSCSF_BACKEND_HIP) {
297     sf->ops->Malloc = PetscSFMalloc_HIP;
298     sf->ops->Free   = PetscSFFree_HIP;
299   }
300 #endif
301 
302 #
303 #if defined(PETSC_HAVE_KOKKOS)
304   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
305     sf->ops->Malloc = PetscSFMalloc_Kokkos;
306     sf->ops->Free   = PetscSFFree_Kokkos;
307   }
308 #endif
309   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
310   sf->setupcalled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
314 /*@
315    PetscSFSetFromOptions - set PetscSF options using the options database
316 
317    Logically Collective
318 
319    Input Parameter:
320 .  sf - star forest
321 
322    Options Database Keys:
323 +  -sf_type               - implementation type, see PetscSFSetType()
324 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
325 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
326                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
327                             If true, this option only works with -use_gpu_aware_mpi 1.
328 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
329                                If true, this option only works with -use_gpu_aware_mpi 1.
330 
331 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
332                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
333                               the only available is kokkos.
334 
335    Level: intermediate
336 @*/
337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
338 {
339   PetscSFType    deft;
340   char           type[256];
341   PetscErrorCode ierr;
342   PetscBool      flg;
343 
344   PetscFunctionBegin;
345   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
346   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
347   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
348   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
349   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
350   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
351  #if defined(PETSC_HAVE_DEVICE)
352   {
353     char        backendstr[32] = {0};
354     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
355     /* Change the defaults set in PetscSFCreate() with command line options */
356     ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr);
357     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
358     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
359     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
360     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
361     ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr);
362   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
363     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
364     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
365     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
366     else PetscCheckFalse(set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
367   #elif defined(PETSC_HAVE_KOKKOS)
368     PetscCheckFalse(set && !isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
369    #endif
370   }
371  #endif
372   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
373   ierr = PetscOptionsEnd();CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 /*@
378    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
379 
380    Logically Collective
381 
382    Input Parameters:
383 +  sf - star forest
384 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
385 
386    Level: advanced
387 
388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
389 @*/
390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
391 {
392   PetscFunctionBegin;
393   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
394   PetscValidLogicalCollectiveBool(sf,flg,2);
395   PetscCheckFalse(sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
396   sf->rankorder = flg;
397   PetscFunctionReturn(0);
398 }
399 
400 /*@C
401    PetscSFSetGraph - Set a parallel star forest
402 
403    Collective
404 
405    Input Parameters:
406 +  sf - star forest
407 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
408 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
409 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
410 during setup in debug mode)
411 .  localmode - copy mode for ilocal
412 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
413 during setup in debug mode)
414 -  remotemode - copy mode for iremote
415 
416    Level: intermediate
417 
418    Notes:
419    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
420 
421    This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
422    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
423 
424    Developer Notes:
425    Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
426    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
427    needed.
428 
429 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
430 @*/
431 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
432 {
433   PetscErrorCode ierr;
434 
435   PetscFunctionBegin;
436   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
437   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
438   if (nleaves > 0) PetscValidPointer(iremote,6);
439   PetscCheckFalse(nroots  < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
440   PetscCheckFalse(nleaves < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
441 
442   if (sf->nroots >= 0) { /* Reset only if graph already set */
443     ierr = PetscSFReset(sf);CHKERRQ(ierr);
444   }
445 
446   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
447 
448   sf->nroots  = nroots;
449   sf->nleaves = nleaves;
450 
451   if (nleaves && ilocal) {
452     PetscInt i;
453     PetscInt minleaf = PETSC_MAX_INT;
454     PetscInt maxleaf = PETSC_MIN_INT;
455     int      contiguous = 1;
456     for (i=0; i<nleaves; i++) {
457       minleaf = PetscMin(minleaf,ilocal[i]);
458       maxleaf = PetscMax(maxleaf,ilocal[i]);
459       contiguous &= (ilocal[i] == i);
460     }
461     sf->minleaf = minleaf;
462     sf->maxleaf = maxleaf;
463     if (contiguous) {
464       if (localmode == PETSC_OWN_POINTER) {
465         ierr = PetscFree(ilocal);CHKERRQ(ierr);
466       }
467       ilocal = NULL;
468     }
469   } else {
470     sf->minleaf = 0;
471     sf->maxleaf = nleaves - 1;
472   }
473 
474   if (ilocal) {
475     switch (localmode) {
476     case PETSC_COPY_VALUES:
477       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
478       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
479       sf->mine = sf->mine_alloc;
480       break;
481     case PETSC_OWN_POINTER:
482       sf->mine_alloc = (PetscInt*)ilocal;
483       sf->mine       = sf->mine_alloc;
484       break;
485     case PETSC_USE_POINTER:
486       sf->mine_alloc = NULL;
487       sf->mine       = (PetscInt*)ilocal;
488       break;
489     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
490     }
491   }
492 
493   switch (remotemode) {
494   case PETSC_COPY_VALUES:
495     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
496     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
497     sf->remote = sf->remote_alloc;
498     break;
499   case PETSC_OWN_POINTER:
500     sf->remote_alloc = (PetscSFNode*)iremote;
501     sf->remote       = sf->remote_alloc;
502     break;
503   case PETSC_USE_POINTER:
504     sf->remote_alloc = NULL;
505     sf->remote       = (PetscSFNode*)iremote;
506     break;
507   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
508   }
509 
510   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
511   sf->graphset = PETSC_TRUE;
512   PetscFunctionReturn(0);
513 }
514 
515 /*@
516   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
517 
518   Collective
519 
520   Input Parameters:
521 + sf      - The PetscSF
522 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
523 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
524 
525   Notes:
526   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
527   n and N are local and global sizes of x respectively.
528 
529   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
530   sequential vectors y on all ranks.
531 
532   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
533   sequential vector y on rank 0.
534 
535   In above cases, entries of x are roots and entries of y are leaves.
536 
537   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
538   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
539   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
540   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
541   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
542 
543   In this case, roots and leaves are symmetric.
544 
545   Level: intermediate
546  @*/
547 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
548 {
549   MPI_Comm       comm;
550   PetscInt       n,N,res[2];
551   PetscMPIInt    rank,size;
552   PetscSFType    type;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
557   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
558   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
559   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
560   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
561   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
562 
563   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
564     type = PETSCSFALLTOALL;
565     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
566     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
567     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
568     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
569   } else {
570     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
571     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
572     res[0] = n;
573     res[1] = -n;
574     /* Check if n are same over all ranks so that we can optimize it */
575     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr);
576     if (res[0] == -res[1]) { /* same n */
577       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
578     } else {
579       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
580     }
581     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
582   }
583   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
584 
585   sf->pattern = pattern;
586   sf->mine    = NULL; /* Contiguous */
587 
588   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
589      Also set other easy stuff.
590    */
591   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
592     sf->nleaves      = N;
593     sf->nroots       = n;
594     sf->nranks       = size;
595     sf->minleaf      = 0;
596     sf->maxleaf      = N - 1;
597   } else if (pattern == PETSCSF_PATTERN_GATHER) {
598     sf->nleaves      = rank ? 0 : N;
599     sf->nroots       = n;
600     sf->nranks       = rank ? 0 : size;
601     sf->minleaf      = 0;
602     sf->maxleaf      = rank ? -1 : N - 1;
603   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
604     sf->nleaves      = size;
605     sf->nroots       = size;
606     sf->nranks       = size;
607     sf->minleaf      = 0;
608     sf->maxleaf      = size - 1;
609   }
610   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
611   sf->graphset = PETSC_TRUE;
612   PetscFunctionReturn(0);
613 }
614 
615 /*@
616    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
617 
618    Collective
619 
620    Input Parameter:
621 .  sf - star forest to invert
622 
623    Output Parameter:
624 .  isf - inverse of sf
625 
626    Level: advanced
627 
628    Notes:
629    All roots must have degree 1.
630 
631    The local space may be a permutation, but cannot be sparse.
632 
633 .seealso: PetscSFSetGraph()
634 @*/
635 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
636 {
637   PetscErrorCode ierr;
638   PetscMPIInt    rank;
639   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
640   const PetscInt *ilocal;
641   PetscSFNode    *roots,*leaves;
642 
643   PetscFunctionBegin;
644   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
645   PetscSFCheckGraphSet(sf,1);
646   PetscValidPointer(isf,2);
647 
648   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
649   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
650 
651   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
652   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
653   for (i=0; i<maxlocal; i++) {
654     leaves[i].rank  = rank;
655     leaves[i].index = i;
656   }
657   for (i=0; i <nroots; i++) {
658     roots[i].rank  = -1;
659     roots[i].index = -1;
660   }
661   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
662   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
663 
664   /* Check whether our leaves are sparse */
665   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
666   if (count == nroots) newilocal = NULL;
667   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
668     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
669     for (i=0,count=0; i<nroots; i++) {
670       if (roots[i].rank >= 0) {
671         newilocal[count]   = i;
672         roots[count].rank  = roots[i].rank;
673         roots[count].index = roots[i].index;
674         count++;
675       }
676     }
677   }
678 
679   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
680   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
681   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 /*@
686    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
687 
688    Collective
689 
690    Input Parameters:
691 +  sf - communication object to duplicate
692 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
693 
694    Output Parameter:
695 .  newsf - new communication object
696 
697    Level: beginner
698 
699 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
700 @*/
701 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
702 {
703   PetscSFType    type;
704   PetscErrorCode ierr;
705   MPI_Datatype   dtype=MPIU_SCALAR;
706 
707   PetscFunctionBegin;
708   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
709   PetscValidLogicalCollectiveEnum(sf,opt,2);
710   PetscValidPointer(newsf,3);
711   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
712   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
713   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
714   if (opt == PETSCSF_DUPLICATE_GRAPH) {
715     PetscSFCheckGraphSet(sf,1);
716     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
717       PetscInt          nroots,nleaves;
718       const PetscInt    *ilocal;
719       const PetscSFNode *iremote;
720       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
721       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
722     } else {
723       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
724     }
725   }
726   /* Since oldtype is committed, so is newtype, according to MPI */
727   if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);}
728   (*newsf)->vscat.bs     = sf->vscat.bs;
729   (*newsf)->vscat.unit   = dtype;
730   (*newsf)->vscat.to_n   = sf->vscat.to_n;
731   (*newsf)->vscat.from_n = sf->vscat.from_n;
732   /* Do not copy lsf. Build it on demand since it is rarely used */
733 
734 #if defined(PETSC_HAVE_DEVICE)
735   (*newsf)->backend              = sf->backend;
736   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
737   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
738   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
739 #endif
740   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
741   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
742   PetscFunctionReturn(0);
743 }
744 
745 /*@C
746    PetscSFGetGraph - Get the graph specifying a parallel star forest
747 
748    Not Collective
749 
750    Input Parameter:
751 .  sf - star forest
752 
753    Output Parameters:
754 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
755 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
756 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
757 -  iremote - remote locations of root vertices for each leaf on the current process
758 
759    Notes:
760      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
761 
762    Fortran Notes:
763      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
764      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
765 
766      To check for a NULL ilocal use
767 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
768 
769    Level: intermediate
770 
771 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
772 @*/
773 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
774 {
775   PetscErrorCode ierr;
776 
777   PetscFunctionBegin;
778   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
779   if (sf->ops->GetGraph) {
780     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
781   } else {
782     if (nroots) *nroots = sf->nroots;
783     if (nleaves) *nleaves = sf->nleaves;
784     if (ilocal) *ilocal = sf->mine;
785     if (iremote) *iremote = sf->remote;
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 /*@
791    PetscSFGetLeafRange - Get the active leaf ranges
792 
793    Not Collective
794 
795    Input Parameter:
796 .  sf - star forest
797 
798    Output Parameters:
799 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
800 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
801 
802    Level: developer
803 
804 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
805 @*/
806 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
807 {
808   PetscFunctionBegin;
809   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
810   PetscSFCheckGraphSet(sf,1);
811   if (minleaf) *minleaf = sf->minleaf;
812   if (maxleaf) *maxleaf = sf->maxleaf;
813   PetscFunctionReturn(0);
814 }
815 
816 /*@C
817    PetscSFViewFromOptions - View from Options
818 
819    Collective on PetscSF
820 
821    Input Parameters:
822 +  A - the star forest
823 .  obj - Optional object
824 -  name - command line option
825 
826    Level: intermediate
827 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
828 @*/
829 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
830 {
831   PetscErrorCode ierr;
832 
833   PetscFunctionBegin;
834   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
835   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
836   PetscFunctionReturn(0);
837 }
838 
839 /*@C
840    PetscSFView - view a star forest
841 
842    Collective
843 
844    Input Parameters:
845 +  sf - star forest
846 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
847 
848    Level: beginner
849 
850 .seealso: PetscSFCreate(), PetscSFSetGraph()
851 @*/
852 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
853 {
854   PetscErrorCode    ierr;
855   PetscBool         iascii;
856   PetscViewerFormat format;
857 
858   PetscFunctionBegin;
859   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
860   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
861   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
862   PetscCheckSameComm(sf,1,viewer,2);
863   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
864   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
865   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
866     PetscMPIInt rank;
867     PetscInt    ii,i,j;
868 
869     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
870     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
871     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
872       if (!sf->graphset) {
873         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
874         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
875         PetscFunctionReturn(0);
876       }
877       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
878       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
879       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
880       for (i=0; i<sf->nleaves; i++) {
881         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
882       }
883       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
884       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
885       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
886         PetscMPIInt *tmpranks,*perm;
887         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
888         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
889         for (i=0; i<sf->nranks; i++) perm[i] = i;
890         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
892         for (ii=0; ii<sf->nranks; ii++) {
893           i = perm[ii];
894           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
895           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
896             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
897           }
898         }
899         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
900       }
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
903     }
904     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
905   }
906   if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
907   PetscFunctionReturn(0);
908 }
909 
910 /*@C
911    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
912 
913    Not Collective
914 
915    Input Parameter:
916 .  sf - star forest
917 
918    Output Parameters:
919 +  nranks - number of ranks referenced by local part
920 .  ranks - array of ranks
921 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
922 .  rmine - concatenated array holding local indices referencing each remote rank
923 -  rremote - concatenated array holding remote indices referenced for each remote rank
924 
925    Level: developer
926 
927 .seealso: PetscSFGetLeafRanks()
928 @*/
929 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
930 {
931   PetscErrorCode ierr;
932 
933   PetscFunctionBegin;
934   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
935   PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
936   if (sf->ops->GetRootRanks) {
937     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
938   } else {
939     /* The generic implementation */
940     if (nranks)  *nranks  = sf->nranks;
941     if (ranks)   *ranks   = sf->ranks;
942     if (roffset) *roffset = sf->roffset;
943     if (rmine)   *rmine   = sf->rmine;
944     if (rremote) *rremote = sf->rremote;
945   }
946   PetscFunctionReturn(0);
947 }
948 
949 /*@C
950    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
951 
952    Not Collective
953 
954    Input Parameter:
955 .  sf - star forest
956 
957    Output Parameters:
958 +  niranks - number of leaf ranks referencing roots on this process
959 .  iranks - array of ranks
960 .  ioffset - offset in irootloc for each rank (length niranks+1)
961 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
962 
963    Level: developer
964 
965 .seealso: PetscSFGetRootRanks()
966 @*/
967 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
968 {
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
973   PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
974   if (sf->ops->GetLeafRanks) {
975     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
976   } else {
977     PetscSFType type;
978     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
979     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
985   PetscInt i;
986   for (i=0; i<n; i++) {
987     if (needle == list[i]) return PETSC_TRUE;
988   }
989   return PETSC_FALSE;
990 }
991 
992 /*@C
993    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
994 
995    Collective
996 
997    Input Parameters:
998 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
999 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
1000 
1001    Level: developer
1002 
1003 .seealso: PetscSFGetRootRanks()
1004 @*/
1005 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1006 {
1007   PetscErrorCode     ierr;
1008   PetscTable         table;
1009   PetscTablePosition pos;
1010   PetscMPIInt        size,groupsize,*groupranks;
1011   PetscInt           *rcount,*ranks;
1012   PetscInt           i, irank = -1,orank = -1;
1013 
1014   PetscFunctionBegin;
1015   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1016   PetscSFCheckGraphSet(sf,1);
1017   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
1018   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1019   for (i=0; i<sf->nleaves; i++) {
1020     /* Log 1-based rank */
1021     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1022   }
1023   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1024   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1025   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1026   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1027   for (i=0; i<sf->nranks; i++) {
1028     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1029     ranks[i]--;             /* Convert back to 0-based */
1030   }
1031   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1032 
1033   /* We expect that dgroup is reliably "small" while nranks could be large */
1034   {
1035     MPI_Group group = MPI_GROUP_NULL;
1036     PetscMPIInt *dgroupranks;
1037     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1038     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr);
1039     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1040     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1041     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1042     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);}
1043     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1044     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1045   }
1046 
1047   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1048   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1049     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1050       if (InList(ranks[i],groupsize,groupranks)) break;
1051     }
1052     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1053       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1054     }
1055     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1056       PetscInt tmprank,tmpcount;
1057 
1058       tmprank             = ranks[i];
1059       tmpcount            = rcount[i];
1060       ranks[i]            = ranks[sf->ndranks];
1061       rcount[i]           = rcount[sf->ndranks];
1062       ranks[sf->ndranks]  = tmprank;
1063       rcount[sf->ndranks] = tmpcount;
1064       sf->ndranks++;
1065     }
1066   }
1067   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1068   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1069   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1070   sf->roffset[0] = 0;
1071   for (i=0; i<sf->nranks; i++) {
1072     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1073     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1074     rcount[i]        = 0;
1075   }
1076   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1077     /* short circuit */
1078     if (orank != sf->remote[i].rank) {
1079       /* Search for index of iremote[i].rank in sf->ranks */
1080       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1081       if (irank < 0) {
1082         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1083         if (irank >= 0) irank += sf->ndranks;
1084       }
1085       orank = sf->remote[i].rank;
1086     }
1087     PetscCheckFalse(irank < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1088     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1089     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1090     rcount[irank]++;
1091   }
1092   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 /*@C
1097    PetscSFGetGroups - gets incoming and outgoing process groups
1098 
1099    Collective
1100 
1101    Input Parameter:
1102 .  sf - star forest
1103 
1104    Output Parameters:
1105 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1106 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1107 
1108    Level: developer
1109 
1110 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1111 @*/
1112 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1113 {
1114   PetscErrorCode ierr;
1115   MPI_Group      group = MPI_GROUP_NULL;
1116 
1117   PetscFunctionBegin;
1118   PetscCheckFalse(sf->nranks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1119   if (sf->ingroup == MPI_GROUP_NULL) {
1120     PetscInt       i;
1121     const PetscInt *indegree;
1122     PetscMPIInt    rank,*outranks,*inranks;
1123     PetscSFNode    *remote;
1124     PetscSF        bgcount;
1125 
1126     /* Compute the number of incoming ranks */
1127     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1128     for (i=0; i<sf->nranks; i++) {
1129       remote[i].rank  = sf->ranks[i];
1130       remote[i].index = 0;
1131     }
1132     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1133     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1134     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1135     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1136     /* Enumerate the incoming ranks */
1137     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1138     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1139     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1140     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1141     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1142     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1143     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr);
1144     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1145     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1146     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1147   }
1148   *incoming = sf->ingroup;
1149 
1150   if (sf->outgroup == MPI_GROUP_NULL) {
1151     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1152     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr);
1153     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1154   }
1155   *outgoing = sf->outgroup;
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 /*@
1160    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1161 
1162    Collective
1163 
1164    Input Parameter:
1165 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1166 
1167    Output Parameter:
1168 .  multi - star forest with split roots, such that each root has degree exactly 1
1169 
1170    Level: developer
1171 
1172    Notes:
1173 
1174    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1175    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1176    edge, it is a candidate for future optimization that might involve its removal.
1177 
1178 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1179 @*/
1180 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1181 {
1182   PetscErrorCode ierr;
1183 
1184   PetscFunctionBegin;
1185   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1186   PetscValidPointer(multi,2);
1187   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1188     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1189     *multi = sf->multi;
1190     sf->multi->multi = sf->multi;
1191     PetscFunctionReturn(0);
1192   }
1193   if (!sf->multi) {
1194     const PetscInt *indegree;
1195     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1196     PetscSFNode    *remote;
1197     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1198     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1199     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1200     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1201     inoffset[0] = 0;
1202     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1203     for (i=0; i<maxlocal; i++) outones[i] = 1;
1204     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1205     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1206     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1207     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1208       for (i=0; i<sf->nroots; i++) {
1209         PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1210       }
1211     }
1212     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1213     for (i=0; i<sf->nleaves; i++) {
1214       remote[i].rank  = sf->remote[i].rank;
1215       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1216     }
1217     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1218     sf->multi->multi = sf->multi;
1219     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1220     if (sf->rankorder) {        /* Sort the ranks */
1221       PetscMPIInt rank;
1222       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1223       PetscSFNode *newremote;
1224       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1225       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1226       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1227       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1228       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1229       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1230       /* Sort the incoming ranks at each vertex, build the inverse map */
1231       for (i=0; i<sf->nroots; i++) {
1232         PetscInt j;
1233         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1234         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1235         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1236       }
1237       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1238       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1239       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1240       for (i=0; i<sf->nleaves; i++) {
1241         newremote[i].rank  = sf->remote[i].rank;
1242         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1243       }
1244       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1245       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1246     }
1247     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1248   }
1249   *multi = sf->multi;
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 /*@C
1254    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1255 
1256    Collective
1257 
1258    Input Parameters:
1259 +  sf - original star forest
1260 .  nselected  - number of selected roots on this process
1261 -  selected   - indices of the selected roots on this process
1262 
1263    Output Parameter:
1264 .  esf - new star forest
1265 
1266    Level: advanced
1267 
1268    Note:
1269    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1270    be done by calling PetscSFGetGraph().
1271 
1272 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1273 @*/
1274 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1275 {
1276   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1277   const PetscInt    *ilocal;
1278   signed char       *rootdata,*leafdata,*leafmem;
1279   const PetscSFNode *iremote;
1280   PetscSFNode       *new_iremote;
1281   MPI_Comm          comm;
1282   PetscErrorCode    ierr;
1283 
1284   PetscFunctionBegin;
1285   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1286   PetscSFCheckGraphSet(sf,1);
1287   if (nselected) PetscValidPointer(selected,3);
1288   PetscValidPointer(esf,4);
1289 
1290   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1291   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1292   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1293   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1294 
1295   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1296     PetscBool dups;
1297     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1298     PetscCheckFalse(dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1299     for (i=0; i<nselected; i++)
1300       PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1301   }
1302 
1303   if (sf->ops->CreateEmbeddedRootSF) {
1304     ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1305   } else {
1306     /* A generic version of creating embedded sf */
1307     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1308     maxlocal = maxleaf - minleaf + 1;
1309     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1310     leafdata = leafmem - minleaf;
1311     /* Tag selected roots and bcast to leaves */
1312     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1313     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1314     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1315 
1316     /* Build esf with leaves that are still connected */
1317     esf_nleaves = 0;
1318     for (i=0; i<nleaves; i++) {
1319       j = ilocal ? ilocal[i] : i;
1320       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1321          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1322       */
1323       esf_nleaves += (leafdata[j] ? 1 : 0);
1324     }
1325     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1326     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1327     for (i=n=0; i<nleaves; i++) {
1328       j = ilocal ? ilocal[i] : i;
1329       if (leafdata[j]) {
1330         new_ilocal[n]        = j;
1331         new_iremote[n].rank  = iremote[i].rank;
1332         new_iremote[n].index = iremote[i].index;
1333         ++n;
1334       }
1335     }
1336     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1337     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1338     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1339     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1340   }
1341   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 /*@C
1346   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1347 
1348   Collective
1349 
1350   Input Parameters:
1351 + sf - original star forest
1352 . nselected  - number of selected leaves on this process
1353 - selected   - indices of the selected leaves on this process
1354 
1355   Output Parameter:
1356 .  newsf - new star forest
1357 
1358   Level: advanced
1359 
1360 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1361 @*/
1362 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1363 {
1364   const PetscSFNode *iremote;
1365   PetscSFNode       *new_iremote;
1366   const PetscInt    *ilocal;
1367   PetscInt          i,nroots,*leaves,*new_ilocal;
1368   MPI_Comm          comm;
1369   PetscErrorCode    ierr;
1370 
1371   PetscFunctionBegin;
1372   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1373   PetscSFCheckGraphSet(sf,1);
1374   if (nselected) PetscValidPointer(selected,3);
1375   PetscValidPointer(newsf,4);
1376 
1377   /* Uniq selected[] and put results in leaves[] */
1378   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1379   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1380   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1381   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1382   PetscCheckFalse(nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1383 
1384   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1385   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1386     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1387   } else {
1388     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1389     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1390     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1391     for (i=0; i<nselected; ++i) {
1392       const PetscInt l     = leaves[i];
1393       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1394       new_iremote[i].rank  = iremote[l].rank;
1395       new_iremote[i].index = iremote[l].index;
1396     }
1397     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1398     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(leaves);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 /*@C
1405    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1406 
1407    Collective on PetscSF
1408 
1409    Input Parameters:
1410 +  sf - star forest on which to communicate
1411 .  unit - data type associated with each node
1412 .  rootdata - buffer to broadcast
1413 -  op - operation to use for reduction
1414 
1415    Output Parameter:
1416 .  leafdata - buffer to be reduced with values from each leaf's respective root
1417 
1418    Level: intermediate
1419 
1420    Notes:
1421     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1422     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1423     use PetscSFBcastWithMemTypeBegin() instead.
1424 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1425 @*/
1426 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1427 {
1428   PetscErrorCode ierr;
1429   PetscMemType   rootmtype,leafmtype;
1430 
1431   PetscFunctionBegin;
1432   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1433   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1434   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1435   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1436   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1437   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1438   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 /*@C
1443    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1444 
1445    Collective on PetscSF
1446 
1447    Input Parameters:
1448 +  sf - star forest on which to communicate
1449 .  unit - data type associated with each node
1450 .  rootmtype - memory type of rootdata
1451 .  rootdata - buffer to broadcast
1452 .  leafmtype - memory type of leafdata
1453 -  op - operation to use for reduction
1454 
1455    Output Parameter:
1456 .  leafdata - buffer to be reduced with values from each leaf's respective root
1457 
1458    Level: intermediate
1459 
1460 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1461 @*/
1462 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1463 {
1464   PetscErrorCode ierr;
1465 
1466   PetscFunctionBegin;
1467   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1468   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1469   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1470   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1471   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 /*@C
1476    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1477 
1478    Collective
1479 
1480    Input Parameters:
1481 +  sf - star forest
1482 .  unit - data type
1483 .  rootdata - buffer to broadcast
1484 -  op - operation to use for reduction
1485 
1486    Output Parameter:
1487 .  leafdata - buffer to be reduced with values from each leaf's respective root
1488 
1489    Level: intermediate
1490 
1491 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1492 @*/
1493 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1494 {
1495   PetscErrorCode ierr;
1496 
1497   PetscFunctionBegin;
1498   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1499   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1500   ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1501   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 /*@C
1506    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1507 
1508    Collective
1509 
1510    Input Parameters:
1511 +  sf - star forest
1512 .  unit - data type
1513 .  leafdata - values to reduce
1514 -  op - reduction operation
1515 
1516    Output Parameter:
1517 .  rootdata - result of reduction of values from all leaves of each root
1518 
1519    Level: intermediate
1520 
1521    Notes:
1522     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1523     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1524     use PetscSFReduceWithMemTypeBegin() instead.
1525 
1526 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1527 @*/
1528 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1529 {
1530   PetscErrorCode ierr;
1531   PetscMemType   rootmtype,leafmtype;
1532 
1533   PetscFunctionBegin;
1534   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1535   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1536   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1537   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1538   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1539   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1540   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 /*@C
1545    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1546 
1547    Collective
1548 
1549    Input Parameters:
1550 +  sf - star forest
1551 .  unit - data type
1552 .  leafmtype - memory type of leafdata
1553 .  leafdata - values to reduce
1554 .  rootmtype - memory type of rootdata
1555 -  op - reduction operation
1556 
1557    Output Parameter:
1558 .  rootdata - result of reduction of values from all leaves of each root
1559 
1560    Level: intermediate
1561 
1562 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1563 @*/
1564 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1565 {
1566   PetscErrorCode ierr;
1567 
1568   PetscFunctionBegin;
1569   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1570   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1571   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1572   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1573   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 /*@C
1578    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1579 
1580    Collective
1581 
1582    Input Parameters:
1583 +  sf - star forest
1584 .  unit - data type
1585 .  leafdata - values to reduce
1586 -  op - reduction operation
1587 
1588    Output Parameter:
1589 .  rootdata - result of reduction of values from all leaves of each root
1590 
1591    Level: intermediate
1592 
1593 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1594 @*/
1595 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1596 {
1597   PetscErrorCode ierr;
1598 
1599   PetscFunctionBegin;
1600   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1601   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1602   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1603   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 /*@C
1608    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1609 
1610    Collective
1611 
1612    Input Parameters:
1613 +  sf - star forest
1614 .  unit - data type
1615 .  leafdata - leaf values to use in reduction
1616 -  op - operation to use for reduction
1617 
1618    Output Parameters:
1619 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1620 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1621 
1622    Level: advanced
1623 
1624    Note:
1625    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1626    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1627    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1628    integers.
1629 
1630 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1631 @*/
1632 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1633 {
1634   PetscErrorCode ierr;
1635   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1636 
1637   PetscFunctionBegin;
1638   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1639   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1640   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1641   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1642   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1643   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1644   PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1645   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1646   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 /*@C
1651    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1652 
1653    Collective
1654 
1655    Input Parameters:
1656 +  sf - star forest
1657 .  unit - data type
1658 .  rootmtype - memory type of rootdata
1659 .  leafmtype - memory type of leafdata
1660 .  leafdata - leaf values to use in reduction
1661 .  leafupdatemtype - memory type of leafupdate
1662 -  op - operation to use for reduction
1663 
1664    Output Parameters:
1665 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1666 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1667 
1668    Level: advanced
1669 
1670    Note: See PetscSFFetchAndOpBegin() for more details.
1671 
1672 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1673 @*/
1674 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1675 {
1676   PetscErrorCode ierr;
1677 
1678   PetscFunctionBegin;
1679   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1680   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1681   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1682   PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1683   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1684   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 /*@C
1689    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1690 
1691    Collective
1692 
1693    Input Parameters:
1694 +  sf - star forest
1695 .  unit - data type
1696 .  leafdata - leaf values to use in reduction
1697 -  op - operation to use for reduction
1698 
1699    Output Parameters:
1700 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1701 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1702 
1703    Level: advanced
1704 
1705 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1706 @*/
1707 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1708 {
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1713   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1714   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1715   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 /*@C
1720    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1721 
1722    Collective
1723 
1724    Input Parameter:
1725 .  sf - star forest
1726 
1727    Output Parameter:
1728 .  degree - degree of each root vertex
1729 
1730    Level: advanced
1731 
1732    Notes:
1733    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1734 
1735 .seealso: PetscSFGatherBegin()
1736 @*/
1737 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1738 {
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1743   PetscSFCheckGraphSet(sf,1);
1744   PetscValidPointer(degree,2);
1745   if (!sf->degreeknown) {
1746     PetscInt i, nroots = sf->nroots, maxlocal;
1747     PetscCheckFalse(sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1748     maxlocal = sf->maxleaf-sf->minleaf+1;
1749     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1750     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1751     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1752     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1753     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1754   }
1755   *degree = NULL;
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 /*@C
1760    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1761 
1762    Collective
1763 
1764    Input Parameter:
1765 .  sf - star forest
1766 
1767    Output Parameter:
1768 .  degree - degree of each root vertex
1769 
1770    Level: developer
1771 
1772    Notes:
1773    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1774 
1775 .seealso:
1776 @*/
1777 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1778 {
1779   PetscErrorCode ierr;
1780 
1781   PetscFunctionBegin;
1782   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1783   PetscSFCheckGraphSet(sf,1);
1784   PetscValidPointer(degree,2);
1785   if (!sf->degreeknown) {
1786     PetscCheckFalse(!sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1787     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1788     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1789     sf->degreeknown = PETSC_TRUE;
1790   }
1791   *degree = sf->degree;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 /*@C
1796    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1797    Each multi-root is assigned index of the corresponding original root.
1798 
1799    Collective
1800 
1801    Input Parameters:
1802 +  sf - star forest
1803 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1804 
1805    Output Parameters:
1806 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1807 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1808 
1809    Level: developer
1810 
1811    Notes:
1812    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1813 
1814 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1815 @*/
1816 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1817 {
1818   PetscSF             msf;
1819   PetscInt            i, j, k, nroots, nmroots;
1820   PetscErrorCode      ierr;
1821 
1822   PetscFunctionBegin;
1823   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1824   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1825   if (nroots) PetscValidIntPointer(degree,2);
1826   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1827   PetscValidPointer(multiRootsOrigNumbering,4);
1828   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1829   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1830   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1831   for (i=0,j=0,k=0; i<nroots; i++) {
1832     if (!degree[i]) continue;
1833     for (j=0; j<degree[i]; j++,k++) {
1834       (*multiRootsOrigNumbering)[k] = i;
1835     }
1836   }
1837   PetscCheckFalse(k != nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1838   if (nMultiRoots) *nMultiRoots = nmroots;
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 /*@C
1843    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1844 
1845    Collective
1846 
1847    Input Parameters:
1848 +  sf - star forest
1849 .  unit - data type
1850 -  leafdata - leaf data to gather to roots
1851 
1852    Output Parameter:
1853 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1854 
1855    Level: intermediate
1856 
1857 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1858 @*/
1859 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1860 {
1861   PetscErrorCode ierr;
1862   PetscSF        multi = NULL;
1863 
1864   PetscFunctionBegin;
1865   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1866   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1867   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1868   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 /*@C
1873    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1874 
1875    Collective
1876 
1877    Input Parameters:
1878 +  sf - star forest
1879 .  unit - data type
1880 -  leafdata - leaf data to gather to roots
1881 
1882    Output Parameter:
1883 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1884 
1885    Level: intermediate
1886 
1887 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1888 @*/
1889 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1890 {
1891   PetscErrorCode ierr;
1892   PetscSF        multi = NULL;
1893 
1894   PetscFunctionBegin;
1895   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1896   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1897   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 /*@C
1902    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1903 
1904    Collective
1905 
1906    Input Parameters:
1907 +  sf - star forest
1908 .  unit - data type
1909 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1910 
1911    Output Parameter:
1912 .  leafdata - leaf data to be update with personal data from each respective root
1913 
1914    Level: intermediate
1915 
1916 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1917 @*/
1918 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1919 {
1920   PetscErrorCode ierr;
1921   PetscSF        multi = NULL;
1922 
1923   PetscFunctionBegin;
1924   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1925   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1926   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1927   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 /*@C
1932    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1933 
1934    Collective
1935 
1936    Input Parameters:
1937 +  sf - star forest
1938 .  unit - data type
1939 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1940 
1941    Output Parameter:
1942 .  leafdata - leaf data to be update with personal data from each respective root
1943 
1944    Level: intermediate
1945 
1946 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1947 @*/
1948 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1949 {
1950   PetscErrorCode ierr;
1951   PetscSF        multi = NULL;
1952 
1953   PetscFunctionBegin;
1954   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1955   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1956   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1961 {
1962   PetscInt        i, n, nleaves;
1963   const PetscInt *ilocal = NULL;
1964   PetscHSetI      seen;
1965   PetscErrorCode  ierr;
1966 
1967   PetscFunctionBegin;
1968   if (PetscDefined(USE_DEBUG)) {
1969     ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1970     ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1971     for (i = 0; i < nleaves; i++) {
1972       const PetscInt leaf = ilocal ? ilocal[i] : i;
1973       ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1974     }
1975     ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1976     PetscCheckFalse(n != nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1977     ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1978   }
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 /*@
1983   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1984 
1985   Input Parameters:
1986 + sfA - The first PetscSF
1987 - sfB - The second PetscSF
1988 
1989   Output Parameters:
1990 . sfBA - The composite SF
1991 
1992   Level: developer
1993 
1994   Notes:
1995   Currently, the two SFs must be defined on congruent communicators and they must be true star
1996   forests, i.e. the same leaf is not connected with different roots.
1997 
1998   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1999   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
2000   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
2001   Bcast on sfA, then a Bcast on sfB, on connected nodes.
2002 
2003 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
2004 @*/
2005 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2006 {
2007   PetscErrorCode    ierr;
2008   const PetscSFNode *remotePointsA,*remotePointsB;
2009   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
2010   const PetscInt    *localPointsA,*localPointsB;
2011   PetscInt          *localPointsBA;
2012   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
2013   PetscBool         denseB;
2014 
2015   PetscFunctionBegin;
2016   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2017   PetscSFCheckGraphSet(sfA,1);
2018   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2019   PetscSFCheckGraphSet(sfB,2);
2020   PetscCheckSameComm(sfA,1,sfB,2);
2021   PetscValidPointer(sfBA,3);
2022   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2023   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2024 
2025   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
2026   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
2027   if (localPointsA) {
2028     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
2029     for (i=0; i<numRootsB; i++) {
2030       reorderedRemotePointsA[i].rank = -1;
2031       reorderedRemotePointsA[i].index = -1;
2032     }
2033     for (i=0; i<numLeavesA; i++) {
2034       if (localPointsA[i] >= numRootsB) continue;
2035       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2036     }
2037     remotePointsA = reorderedRemotePointsA;
2038   }
2039   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
2040   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
2041   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2042   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2043   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2044 
2045   denseB = (PetscBool)!localPointsB;
2046   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2047     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2048     else numLeavesBA++;
2049   }
2050   if (denseB) {
2051     localPointsBA  = NULL;
2052     remotePointsBA = leafdataB;
2053   } else {
2054     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
2055     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
2056     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2057       const PetscInt l = localPointsB ? localPointsB[i] : i;
2058 
2059       if (leafdataB[l-minleaf].rank == -1) continue;
2060       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2061       localPointsBA[numLeavesBA] = l;
2062       numLeavesBA++;
2063     }
2064     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2065   }
2066   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2067   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2068   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 /*@
2073   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2074 
2075   Input Parameters:
2076 + sfA - The first PetscSF
2077 - sfB - The second PetscSF
2078 
2079   Output Parameters:
2080 . sfBA - The composite SF.
2081 
2082   Level: developer
2083 
2084   Notes:
2085   Currently, the two SFs must be defined on congruent communicators and they must be true star
2086   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2087   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2088 
2089   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2090   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2091   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2092   a Reduce on sfB, on connected roots.
2093 
2094 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2095 @*/
2096 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2097 {
2098   PetscErrorCode    ierr;
2099   const PetscSFNode *remotePointsA,*remotePointsB;
2100   PetscSFNode       *remotePointsBA;
2101   const PetscInt    *localPointsA,*localPointsB;
2102   PetscSFNode       *reorderedRemotePointsA = NULL;
2103   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2104   MPI_Op            op;
2105 #if defined(PETSC_USE_64BIT_INDICES)
2106   PetscBool         iswin;
2107 #endif
2108 
2109   PetscFunctionBegin;
2110   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2111   PetscSFCheckGraphSet(sfA,1);
2112   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2113   PetscSFCheckGraphSet(sfB,2);
2114   PetscCheckSameComm(sfA,1,sfB,2);
2115   PetscValidPointer(sfBA,3);
2116   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2117   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2118 
2119   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
2120   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
2121 
2122   /* TODO: Check roots of sfB have degree of 1 */
2123   /* Once we implement it, we can replace the MPI_MAXLOC
2124      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2125      We use MPI_MAXLOC only to have a deterministic output from this routine if
2126      the root condition is not meet.
2127    */
2128   op = MPI_MAXLOC;
2129 #if defined(PETSC_USE_64BIT_INDICES)
2130   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2131   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2132   if (iswin) op = MPI_REPLACE;
2133 #endif
2134 
2135   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2136   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2137   for (i=0; i<maxleaf - minleaf + 1; i++) {
2138     reorderedRemotePointsA[i].rank = -1;
2139     reorderedRemotePointsA[i].index = -1;
2140   }
2141   if (localPointsA) {
2142     for (i=0; i<numLeavesA; i++) {
2143       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2144       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2145     }
2146   } else {
2147     for (i=0; i<numLeavesA; i++) {
2148       if (i > maxleaf || i < minleaf) continue;
2149       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2150     }
2151   }
2152 
2153   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2154   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2155   for (i=0; i<numRootsB; i++) {
2156     remotePointsBA[i].rank = -1;
2157     remotePointsBA[i].index = -1;
2158   }
2159 
2160   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2161   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2162   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2163   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2164     if (remotePointsBA[i].rank == -1) continue;
2165     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2166     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2167     localPointsBA[numLeavesBA] = i;
2168     numLeavesBA++;
2169   }
2170   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2171   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2172   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 /*
2177   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2178 
2179   Input Parameters:
2180 . sf - The global PetscSF
2181 
2182   Output Parameters:
2183 . out - The local PetscSF
2184  */
2185 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2186 {
2187   MPI_Comm           comm;
2188   PetscMPIInt        myrank;
2189   const PetscInt     *ilocal;
2190   const PetscSFNode  *iremote;
2191   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2192   PetscSFNode        *liremote;
2193   PetscSF            lsf;
2194   PetscErrorCode     ierr;
2195 
2196   PetscFunctionBegin;
2197   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2198   if (sf->ops->CreateLocalSF) {
2199     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2200   } else {
2201     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2202     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2203     ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr);
2204 
2205     /* Find out local edges and build a local SF */
2206     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2207     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2208     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2209     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2210 
2211     for (i=j=0; i<nleaves; i++) {
2212       if (iremote[i].rank == (PetscInt)myrank) {
2213         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2214         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2215         liremote[j].index = iremote[i].index;
2216         j++;
2217       }
2218     }
2219     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2220     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2221     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2222     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2223     *out = lsf;
2224   }
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2229 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2230 {
2231   PetscErrorCode     ierr;
2232   PetscMemType       rootmtype,leafmtype;
2233 
2234   PetscFunctionBegin;
2235   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2236   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2237   ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2238   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2239   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2240   if (sf->ops->BcastToZero) {
2241     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2242   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2243   ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2244   PetscFunctionReturn(0);
2245 }
2246 
2247