xref: /petsc/src/vec/is/sf/interface/sf.c (revision 780b99b1de388ece75e7a7b40a1d9cd0ed44a873)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
30 {
31   PetscFunctionBegin;
32   PetscValidPointer(type,2);
33   *type = PETSC_MEMTYPE_HOST;
34 #if defined(PETSC_HAVE_CUDA)
35   if (PetscDeviceInitialized(PETSC_DEVICE_CUDA) && data) {
36     cudaError_t                  cerr;
37     struct cudaPointerAttributes attr;
38     enum cudaMemoryType          mtype;
39     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
40     cudaGetLastError(); /* Reset the last error */
41     #if (CUDART_VERSION < 10000)
42       mtype = attr.memoryType;
43     #else
44       mtype = attr.type;
45     #endif
46     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
47   }
48 #endif
49 
50 #if defined(PETSC_HAVE_HIP)
51   if (PetscDeviceInitialized(PETSC_DEVICE_HIP) && data) {
52     hipError_t                   cerr;
53     struct hipPointerAttribute_t attr;
54     enum hipMemoryType           mtype;
55     cerr = hipPointerGetAttributes(&attr,data);
56     hipGetLastError(); /* Reset the last error */
57     mtype = attr.memoryType;
58     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
59   }
60 #endif
61   PetscFunctionReturn(0);
62 }
63 
64 /*@
65    PetscSFCreate - create a star forest communication context
66 
67    Collective
68 
69    Input Parameter:
70 .  comm - communicator on which the star forest will operate
71 
72    Output Parameter:
73 .  sf - new star forest context
74 
75    Options Database Keys:
76 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
77 .  -sf_type window    -Use MPI-3 one-sided window for communication
78 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
79 
80    Level: intermediate
81 
82    Notes:
83    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
84    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
85    SFs are optimized and they have better performance than general SFs.
86 
87 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
88 @*/
89 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
90 {
91   PetscErrorCode ierr;
92   PetscSF        b;
93 
94   PetscFunctionBegin;
95   PetscValidPointer(sf,2);
96   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
97 
98   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
99 
100   b->nroots    = -1;
101   b->nleaves   = -1;
102   b->minleaf   = PETSC_MAX_INT;
103   b->maxleaf   = PETSC_MIN_INT;
104   b->nranks    = -1;
105   b->rankorder = PETSC_TRUE;
106   b->ingroup   = MPI_GROUP_NULL;
107   b->outgroup  = MPI_GROUP_NULL;
108   b->graphset  = PETSC_FALSE;
109 #if defined(PETSC_HAVE_DEVICE)
110   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
111   b->use_stream_aware_mpi = PETSC_FALSE;
112   b->unknown_input_stream= PETSC_FALSE;
113   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
114     b->backend = PETSCSF_BACKEND_KOKKOS;
115   #elif defined(PETSC_HAVE_CUDA)
116     b->backend = PETSCSF_BACKEND_CUDA;
117   #elif defined(PETSC_HAVE_HIP)
118     b->backend = PETSCSF_BACKEND_HIP;
119   #endif
120 
121   #if defined(PETSC_HAVE_NVSHMEM)
122     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
123     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
124     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr);
125     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr);
126   #endif
127 #endif
128   b->vscat.from_n = -1;
129   b->vscat.to_n   = -1;
130   b->vscat.unit   = MPIU_SCALAR;
131  *sf = b;
132   PetscFunctionReturn(0);
133 }
134 
135 /*@
136    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
137 
138    Collective
139 
140    Input Parameter:
141 .  sf - star forest
142 
143    Level: advanced
144 
145 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
146 @*/
147 PetscErrorCode PetscSFReset(PetscSF sf)
148 {
149   PetscErrorCode ierr;
150 
151   PetscFunctionBegin;
152   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
153   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
154   sf->nroots   = -1;
155   sf->nleaves  = -1;
156   sf->minleaf  = PETSC_MAX_INT;
157   sf->maxleaf  = PETSC_MIN_INT;
158   sf->mine     = NULL;
159   sf->remote   = NULL;
160   sf->graphset = PETSC_FALSE;
161   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
162   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
163   sf->nranks = -1;
164   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
165   sf->degreeknown = PETSC_FALSE;
166   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
167   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);}
168   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);}
169   if (sf->multi) sf->multi->multi = NULL;
170   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
171   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
172 
173  #if defined(PETSC_HAVE_DEVICE)
174   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
175  #endif
176 
177   sf->setupcalled = PETSC_FALSE;
178   PetscFunctionReturn(0);
179 }
180 
181 /*@C
182    PetscSFSetType - Set the PetscSF communication implementation
183 
184    Collective on PetscSF
185 
186    Input Parameters:
187 +  sf - the PetscSF context
188 -  type - a known method
189 
190    Options Database Key:
191 .  -sf_type <type> - Sets the method; use -help for a list
192    of available methods (for instance, window, basic, neighbor)
193 
194    Notes:
195    See "include/petscsf.h" for available methods (for instance)
196 +    PETSCSFWINDOW - MPI-2/3 one-sided
197 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
198 
199   Level: intermediate
200 
201 .seealso: PetscSFType, PetscSFCreate()
202 @*/
203 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
204 {
205   PetscErrorCode ierr,(*r)(PetscSF);
206   PetscBool      match;
207 
208   PetscFunctionBegin;
209   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
210   PetscValidCharPointer(type,2);
211 
212   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
213   if (match) PetscFunctionReturn(0);
214 
215   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
216   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
217   /* Destroy the previous PetscSF implementation context */
218   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
219   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
220   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
221   ierr = (*r)(sf);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 /*@C
226   PetscSFGetType - Get the PetscSF communication implementation
227 
228   Not Collective
229 
230   Input Parameter:
231 . sf  - the PetscSF context
232 
233   Output Parameter:
234 . type - the PetscSF type name
235 
236   Level: intermediate
237 
238 .seealso: PetscSFSetType(), PetscSFCreate()
239 @*/
240 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
241 {
242   PetscFunctionBegin;
243   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
244   PetscValidPointer(type,2);
245   *type = ((PetscObject)sf)->type_name;
246   PetscFunctionReturn(0);
247 }
248 
249 /*@C
250    PetscSFDestroy - destroy star forest
251 
252    Collective
253 
254    Input Parameter:
255 .  sf - address of star forest
256 
257    Level: intermediate
258 
259 .seealso: PetscSFCreate(), PetscSFReset()
260 @*/
261 PetscErrorCode PetscSFDestroy(PetscSF *sf)
262 {
263   PetscErrorCode ierr;
264 
265   PetscFunctionBegin;
266   if (!*sf) PetscFunctionReturn(0);
267   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
268   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
269   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
270   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
271   ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr);
272   if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);}
273   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
278 {
279   PetscInt           i, nleaves;
280   PetscMPIInt        size;
281   const PetscInt    *ilocal;
282   const PetscSFNode *iremote;
283   PetscErrorCode     ierr;
284 
285   PetscFunctionBegin;
286   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
287   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
288   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
289   for (i = 0; i < nleaves; i++) {
290     const PetscInt rank = iremote[i].rank;
291     const PetscInt remote = iremote[i].index;
292     const PetscInt leaf = ilocal ? ilocal[i] : i;
293     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
294     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
295     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
296   }
297   PetscFunctionReturn(0);
298 }
299 
300 /*@
301    PetscSFSetUp - set up communication structures
302 
303    Collective
304 
305    Input Parameter:
306 .  sf - star forest communication object
307 
308    Level: beginner
309 
310 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
311 @*/
312 PetscErrorCode PetscSFSetUp(PetscSF sf)
313 {
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
318   PetscSFCheckGraphSet(sf,1);
319   if (sf->setupcalled) PetscFunctionReturn(0);
320   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
321   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
322   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
323   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
324 #if defined(PETSC_HAVE_CUDA)
325   if (sf->backend == PETSCSF_BACKEND_CUDA) {
326     sf->ops->Malloc = PetscSFMalloc_CUDA;
327     sf->ops->Free   = PetscSFFree_CUDA;
328   }
329 #endif
330 #if defined(PETSC_HAVE_HIP)
331   if (sf->backend == PETSCSF_BACKEND_HIP) {
332     sf->ops->Malloc = PetscSFMalloc_HIP;
333     sf->ops->Free   = PetscSFFree_HIP;
334   }
335 #endif
336 
337 #
338 #if defined(PETSC_HAVE_KOKKOS)
339   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
340     sf->ops->Malloc = PetscSFMalloc_Kokkos;
341     sf->ops->Free   = PetscSFFree_Kokkos;
342   }
343 #endif
344   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
345   sf->setupcalled = PETSC_TRUE;
346   PetscFunctionReturn(0);
347 }
348 
349 /*@
350    PetscSFSetFromOptions - set PetscSF options using the options database
351 
352    Logically Collective
353 
354    Input Parameter:
355 .  sf - star forest
356 
357    Options Database Keys:
358 +  -sf_type               - implementation type, see PetscSFSetType()
359 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
360 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
361                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
362                             If true, this option only works with -use_gpu_aware_mpi 1.
363 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
364                                If true, this option only works with -use_gpu_aware_mpi 1.
365 
366 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
367                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
368                               the only available is kokkos.
369 
370    Level: intermediate
371 @*/
372 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
373 {
374   PetscSFType    deft;
375   char           type[256];
376   PetscErrorCode ierr;
377   PetscBool      flg;
378 
379   PetscFunctionBegin;
380   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
381   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
382   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
383   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
384   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
385   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
386  #if defined(PETSC_HAVE_DEVICE)
387   {
388     char        backendstr[32] = {0};
389     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
390     /* Change the defaults set in PetscSFCreate() with command line options */
391     ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr);
392     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
393     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
394     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
395     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
396     ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr);
397   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
398     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
399     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
400     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
401     else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
402   #elif defined(PETSC_HAVE_KOKKOS)
403     if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
404    #endif
405   }
406  #endif
407   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
408   ierr = PetscOptionsEnd();CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 /*@
413    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
414 
415    Logically Collective
416 
417    Input Parameters:
418 +  sf - star forest
419 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
420 
421    Level: advanced
422 
423 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
424 @*/
425 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
426 {
427   PetscFunctionBegin;
428   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
429   PetscValidLogicalCollectiveBool(sf,flg,2);
430   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
431   sf->rankorder = flg;
432   PetscFunctionReturn(0);
433 }
434 
435 /*@C
436    PetscSFSetGraph - Set a parallel star forest
437 
438    Collective
439 
440    Input Parameters:
441 +  sf - star forest
442 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
443 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
444 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
445 during setup in debug mode)
446 .  localmode - copy mode for ilocal
447 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
448 during setup in debug mode)
449 -  remotemode - copy mode for iremote
450 
451    Level: intermediate
452 
453    Notes:
454     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
455 
456    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
457    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
458    needed
459 
460    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
461    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
462 
463 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
464 @*/
465 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
466 {
467   PetscErrorCode ierr;
468 
469   PetscFunctionBegin;
470   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
471   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
472   if (nleaves > 0) PetscValidPointer(iremote,6);
473   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
474   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
475 
476   if (sf->nroots >= 0) { /* Reset only if graph already set */
477     ierr = PetscSFReset(sf);CHKERRQ(ierr);
478   }
479 
480   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
481 
482   sf->nroots  = nroots;
483   sf->nleaves = nleaves;
484 
485   if (nleaves && ilocal) {
486     PetscInt i;
487     PetscInt minleaf = PETSC_MAX_INT;
488     PetscInt maxleaf = PETSC_MIN_INT;
489     int      contiguous = 1;
490     for (i=0; i<nleaves; i++) {
491       minleaf = PetscMin(minleaf,ilocal[i]);
492       maxleaf = PetscMax(maxleaf,ilocal[i]);
493       contiguous &= (ilocal[i] == i);
494     }
495     sf->minleaf = minleaf;
496     sf->maxleaf = maxleaf;
497     if (contiguous) {
498       if (localmode == PETSC_OWN_POINTER) {
499         ierr = PetscFree(ilocal);CHKERRQ(ierr);
500       }
501       ilocal = NULL;
502     }
503   } else {
504     sf->minleaf = 0;
505     sf->maxleaf = nleaves - 1;
506   }
507 
508   if (ilocal) {
509     switch (localmode) {
510     case PETSC_COPY_VALUES:
511       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
512       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
513       sf->mine = sf->mine_alloc;
514       break;
515     case PETSC_OWN_POINTER:
516       sf->mine_alloc = (PetscInt*)ilocal;
517       sf->mine       = sf->mine_alloc;
518       break;
519     case PETSC_USE_POINTER:
520       sf->mine_alloc = NULL;
521       sf->mine       = (PetscInt*)ilocal;
522       break;
523     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
524     }
525   }
526 
527   switch (remotemode) {
528   case PETSC_COPY_VALUES:
529     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
530     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
531     sf->remote = sf->remote_alloc;
532     break;
533   case PETSC_OWN_POINTER:
534     sf->remote_alloc = (PetscSFNode*)iremote;
535     sf->remote       = sf->remote_alloc;
536     break;
537   case PETSC_USE_POINTER:
538     sf->remote_alloc = NULL;
539     sf->remote       = (PetscSFNode*)iremote;
540     break;
541   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
542   }
543 
544   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
545   sf->graphset = PETSC_TRUE;
546   PetscFunctionReturn(0);
547 }
548 
549 /*@
550   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
551 
552   Collective
553 
554   Input Parameters:
555 + sf      - The PetscSF
556 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
557 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
558 
559   Notes:
560   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
561   n and N are local and global sizes of x respectively.
562 
563   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
564   sequential vectors y on all ranks.
565 
566   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
567   sequential vector y on rank 0.
568 
569   In above cases, entries of x are roots and entries of y are leaves.
570 
571   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
572   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
573   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
574   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
575   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
576 
577   In this case, roots and leaves are symmetric.
578 
579   Level: intermediate
580  @*/
581 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
582 {
583   MPI_Comm       comm;
584   PetscInt       n,N,res[2];
585   PetscMPIInt    rank,size;
586   PetscSFType    type;
587   PetscErrorCode ierr;
588 
589   PetscFunctionBegin;
590   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
591   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
592   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
593   if (PetscUnlikely((pattern < PETSCSF_PATTERN_ALLGATHER) || (pattern > PETSCSF_PATTERN_ALLTOALL))) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
594   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
595   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
596 
597   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
598     type = PETSCSFALLTOALL;
599     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
600     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
601     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
602     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
603   } else {
604     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
605     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
606     res[0] = n;
607     res[1] = -n;
608     /* Check if n are same over all ranks so that we can optimize it */
609     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr);
610     if (res[0] == -res[1]) { /* same n */
611       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
612     } else {
613       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
614     }
615     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
616   }
617   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
618 
619   sf->pattern = pattern;
620   sf->mine    = NULL; /* Contiguous */
621 
622   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
623      Also set other easy stuff.
624    */
625   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
626     sf->nleaves      = N;
627     sf->nroots       = n;
628     sf->nranks       = size;
629     sf->minleaf      = 0;
630     sf->maxleaf      = N - 1;
631   } else if (pattern == PETSCSF_PATTERN_GATHER) {
632     sf->nleaves      = rank ? 0 : N;
633     sf->nroots       = n;
634     sf->nranks       = rank ? 0 : size;
635     sf->minleaf      = 0;
636     sf->maxleaf      = rank ? -1 : N - 1;
637   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
638     sf->nleaves      = size;
639     sf->nroots       = size;
640     sf->nranks       = size;
641     sf->minleaf      = 0;
642     sf->maxleaf      = size - 1;
643   }
644   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
645   sf->graphset = PETSC_TRUE;
646   PetscFunctionReturn(0);
647 }
648 
649 /*@
650    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
651 
652    Collective
653 
654    Input Parameter:
655 .  sf - star forest to invert
656 
657    Output Parameter:
658 .  isf - inverse of sf
659 
660    Level: advanced
661 
662    Notes:
663    All roots must have degree 1.
664 
665    The local space may be a permutation, but cannot be sparse.
666 
667 .seealso: PetscSFSetGraph()
668 @*/
669 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
670 {
671   PetscErrorCode ierr;
672   PetscMPIInt    rank;
673   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
674   const PetscInt *ilocal;
675   PetscSFNode    *roots,*leaves;
676 
677   PetscFunctionBegin;
678   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
679   PetscSFCheckGraphSet(sf,1);
680   PetscValidPointer(isf,2);
681 
682   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
683   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
684 
685   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
686   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
687   for (i=0; i<maxlocal; i++) {
688     leaves[i].rank  = rank;
689     leaves[i].index = i;
690   }
691   for (i=0; i <nroots; i++) {
692     roots[i].rank  = -1;
693     roots[i].index = -1;
694   }
695   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
696   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
697 
698   /* Check whether our leaves are sparse */
699   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
700   if (count == nroots) newilocal = NULL;
701   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
702     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
703     for (i=0,count=0; i<nroots; i++) {
704       if (roots[i].rank >= 0) {
705         newilocal[count]   = i;
706         roots[count].rank  = roots[i].rank;
707         roots[count].index = roots[i].index;
708         count++;
709       }
710     }
711   }
712 
713   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
714   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
715   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 /*@
720    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
721 
722    Collective
723 
724    Input Parameters:
725 +  sf - communication object to duplicate
726 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
727 
728    Output Parameter:
729 .  newsf - new communication object
730 
731    Level: beginner
732 
733 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
734 @*/
735 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
736 {
737   PetscSFType    type;
738   PetscErrorCode ierr;
739   MPI_Datatype   dtype=MPIU_SCALAR;
740 
741   PetscFunctionBegin;
742   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
743   PetscValidLogicalCollectiveEnum(sf,opt,2);
744   PetscValidPointer(newsf,3);
745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
746   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
747   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
748   if (opt == PETSCSF_DUPLICATE_GRAPH) {
749     PetscSFCheckGraphSet(sf,1);
750     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
751       PetscInt          nroots,nleaves;
752       const PetscInt    *ilocal;
753       const PetscSFNode *iremote;
754       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
755       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
756     } else {
757       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
758     }
759   }
760   /* Since oldtype is committed, so is newtype, according to MPI */
761   if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);}
762   (*newsf)->vscat.bs     = sf->vscat.bs;
763   (*newsf)->vscat.unit   = dtype;
764   (*newsf)->vscat.to_n   = sf->vscat.to_n;
765   (*newsf)->vscat.from_n = sf->vscat.from_n;
766   /* Do not copy lsf. Build it on demand since it is rarely used */
767 
768 #if defined(PETSC_HAVE_DEVICE)
769   (*newsf)->backend              = sf->backend;
770   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
771   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
772   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
773 #endif
774   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
775   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
776   PetscFunctionReturn(0);
777 }
778 
779 /*@C
780    PetscSFGetGraph - Get the graph specifying a parallel star forest
781 
782    Not Collective
783 
784    Input Parameter:
785 .  sf - star forest
786 
787    Output Parameters:
788 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
789 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
790 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
791 -  iremote - remote locations of root vertices for each leaf on the current process
792 
793    Notes:
794      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
795 
796    Fortran Notes:
797      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
798      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
799 
800      To check for a NULL ilocal use
801 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
802 
803    Level: intermediate
804 
805 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
806 @*/
807 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
808 {
809   PetscErrorCode ierr;
810 
811   PetscFunctionBegin;
812   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
813   if (sf->ops->GetGraph) {
814     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
815   } else {
816     if (nroots) *nroots = sf->nroots;
817     if (nleaves) *nleaves = sf->nleaves;
818     if (ilocal) *ilocal = sf->mine;
819     if (iremote) *iremote = sf->remote;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 /*@
825    PetscSFGetLeafRange - Get the active leaf ranges
826 
827    Not Collective
828 
829    Input Parameter:
830 .  sf - star forest
831 
832    Output Parameters:
833 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
834 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
835 
836    Level: developer
837 
838 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
839 @*/
840 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
841 {
842   PetscFunctionBegin;
843   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
844   PetscSFCheckGraphSet(sf,1);
845   if (minleaf) *minleaf = sf->minleaf;
846   if (maxleaf) *maxleaf = sf->maxleaf;
847   PetscFunctionReturn(0);
848 }
849 
850 /*@C
851    PetscSFViewFromOptions - View from Options
852 
853    Collective on PetscSF
854 
855    Input Parameters:
856 +  A - the star forest
857 .  obj - Optional object
858 -  name - command line option
859 
860    Level: intermediate
861 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
862 @*/
863 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
864 {
865   PetscErrorCode ierr;
866 
867   PetscFunctionBegin;
868   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
869   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 /*@C
874    PetscSFView - view a star forest
875 
876    Collective
877 
878    Input Parameters:
879 +  sf - star forest
880 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
881 
882    Level: beginner
883 
884 .seealso: PetscSFCreate(), PetscSFSetGraph()
885 @*/
886 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
887 {
888   PetscErrorCode    ierr;
889   PetscBool         iascii;
890   PetscViewerFormat format;
891 
892   PetscFunctionBegin;
893   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
894   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
895   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
896   PetscCheckSameComm(sf,1,viewer,2);
897   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
898   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
899   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
900     PetscMPIInt rank;
901     PetscInt    ii,i,j;
902 
903     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
904     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
905     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
906       if (!sf->graphset) {
907         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
908         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
909         PetscFunctionReturn(0);
910       }
911       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
912       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
913       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
914       for (i=0; i<sf->nleaves; i++) {
915         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
916       }
917       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
918       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
919       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
920         PetscMPIInt *tmpranks,*perm;
921         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
922         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
923         for (i=0; i<sf->nranks; i++) perm[i] = i;
924         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
925         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
926         for (ii=0; ii<sf->nranks; ii++) {
927           i = perm[ii];
928           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
929           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
930             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
931           }
932         }
933         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
934       }
935       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
936       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
937     }
938     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
939   }
940   if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
941   PetscFunctionReturn(0);
942 }
943 
944 /*@C
945    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
946 
947    Not Collective
948 
949    Input Parameter:
950 .  sf - star forest
951 
952    Output Parameters:
953 +  nranks - number of ranks referenced by local part
954 .  ranks - array of ranks
955 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
956 .  rmine - concatenated array holding local indices referencing each remote rank
957 -  rremote - concatenated array holding remote indices referenced for each remote rank
958 
959    Level: developer
960 
961 .seealso: PetscSFGetLeafRanks()
962 @*/
963 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
964 {
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
969   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
970   if (sf->ops->GetRootRanks) {
971     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
972   } else {
973     /* The generic implementation */
974     if (nranks)  *nranks  = sf->nranks;
975     if (ranks)   *ranks   = sf->ranks;
976     if (roffset) *roffset = sf->roffset;
977     if (rmine)   *rmine   = sf->rmine;
978     if (rremote) *rremote = sf->rremote;
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 /*@C
984    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
985 
986    Not Collective
987 
988    Input Parameter:
989 .  sf - star forest
990 
991    Output Parameters:
992 +  niranks - number of leaf ranks referencing roots on this process
993 .  iranks - array of ranks
994 .  ioffset - offset in irootloc for each rank (length niranks+1)
995 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
996 
997    Level: developer
998 
999 .seealso: PetscSFGetRootRanks()
1000 @*/
1001 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
1002 {
1003   PetscErrorCode ierr;
1004 
1005   PetscFunctionBegin;
1006   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1007   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
1008   if (sf->ops->GetLeafRanks) {
1009     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
1010   } else {
1011     PetscSFType type;
1012     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
1013     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1014   }
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
1019   PetscInt i;
1020   for (i=0; i<n; i++) {
1021     if (needle == list[i]) return PETSC_TRUE;
1022   }
1023   return PETSC_FALSE;
1024 }
1025 
1026 /*@C
1027    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
1028 
1029    Collective
1030 
1031    Input Parameters:
1032 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
1033 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
1034 
1035    Level: developer
1036 
1037 .seealso: PetscSFGetRootRanks()
1038 @*/
1039 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1040 {
1041   PetscErrorCode     ierr;
1042   PetscTable         table;
1043   PetscTablePosition pos;
1044   PetscMPIInt        size,groupsize,*groupranks;
1045   PetscInt           *rcount,*ranks;
1046   PetscInt           i, irank = -1,orank = -1;
1047 
1048   PetscFunctionBegin;
1049   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1050   PetscSFCheckGraphSet(sf,1);
1051   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
1052   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1053   for (i=0; i<sf->nleaves; i++) {
1054     /* Log 1-based rank */
1055     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1056   }
1057   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1058   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1059   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1060   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1061   for (i=0; i<sf->nranks; i++) {
1062     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1063     ranks[i]--;             /* Convert back to 0-based */
1064   }
1065   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1066 
1067   /* We expect that dgroup is reliably "small" while nranks could be large */
1068   {
1069     MPI_Group group = MPI_GROUP_NULL;
1070     PetscMPIInt *dgroupranks;
1071     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1072     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr);
1073     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1074     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1075     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1076     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);}
1077     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1078     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1079   }
1080 
1081   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1082   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1083     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1084       if (InList(ranks[i],groupsize,groupranks)) break;
1085     }
1086     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1087       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1088     }
1089     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1090       PetscInt tmprank,tmpcount;
1091 
1092       tmprank             = ranks[i];
1093       tmpcount            = rcount[i];
1094       ranks[i]            = ranks[sf->ndranks];
1095       rcount[i]           = rcount[sf->ndranks];
1096       ranks[sf->ndranks]  = tmprank;
1097       rcount[sf->ndranks] = tmpcount;
1098       sf->ndranks++;
1099     }
1100   }
1101   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1102   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1103   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1104   sf->roffset[0] = 0;
1105   for (i=0; i<sf->nranks; i++) {
1106     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1107     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1108     rcount[i]        = 0;
1109   }
1110   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1111     /* short circuit */
1112     if (orank != sf->remote[i].rank) {
1113       /* Search for index of iremote[i].rank in sf->ranks */
1114       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1115       if (irank < 0) {
1116         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1117         if (irank >= 0) irank += sf->ndranks;
1118       }
1119       orank = sf->remote[i].rank;
1120     }
1121     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1122     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1123     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1124     rcount[irank]++;
1125   }
1126   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 /*@C
1131    PetscSFGetGroups - gets incoming and outgoing process groups
1132 
1133    Collective
1134 
1135    Input Parameter:
1136 .  sf - star forest
1137 
1138    Output Parameters:
1139 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1140 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1141 
1142    Level: developer
1143 
1144 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1145 @*/
1146 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1147 {
1148   PetscErrorCode ierr;
1149   MPI_Group      group = MPI_GROUP_NULL;
1150 
1151   PetscFunctionBegin;
1152   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1153   if (sf->ingroup == MPI_GROUP_NULL) {
1154     PetscInt       i;
1155     const PetscInt *indegree;
1156     PetscMPIInt    rank,*outranks,*inranks;
1157     PetscSFNode    *remote;
1158     PetscSF        bgcount;
1159 
1160     /* Compute the number of incoming ranks */
1161     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1162     for (i=0; i<sf->nranks; i++) {
1163       remote[i].rank  = sf->ranks[i];
1164       remote[i].index = 0;
1165     }
1166     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1167     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1168     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1169     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1170     /* Enumerate the incoming ranks */
1171     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1172     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1173     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1174     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1175     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1176     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1177     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr);
1178     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1179     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1180     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1181   }
1182   *incoming = sf->ingroup;
1183 
1184   if (sf->outgroup == MPI_GROUP_NULL) {
1185     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1186     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr);
1187     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1188   }
1189   *outgoing = sf->outgroup;
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 /*@
1194    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1195 
1196    Collective
1197 
1198    Input Parameter:
1199 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1200 
1201    Output Parameter:
1202 .  multi - star forest with split roots, such that each root has degree exactly 1
1203 
1204    Level: developer
1205 
1206    Notes:
1207 
1208    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1209    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1210    edge, it is a candidate for future optimization that might involve its removal.
1211 
1212 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1213 @*/
1214 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1215 {
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1220   PetscValidPointer(multi,2);
1221   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1222     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1223     *multi = sf->multi;
1224     sf->multi->multi = sf->multi;
1225     PetscFunctionReturn(0);
1226   }
1227   if (!sf->multi) {
1228     const PetscInt *indegree;
1229     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1230     PetscSFNode    *remote;
1231     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1232     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1233     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1234     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1235     inoffset[0] = 0;
1236     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1237     for (i=0; i<maxlocal; i++) outones[i] = 1;
1238     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1239     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1240     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1241     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1242       for (i=0; i<sf->nroots; i++) {
1243         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1244       }
1245     }
1246     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1247     for (i=0; i<sf->nleaves; i++) {
1248       remote[i].rank  = sf->remote[i].rank;
1249       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1250     }
1251     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1252     sf->multi->multi = sf->multi;
1253     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1254     if (sf->rankorder) {        /* Sort the ranks */
1255       PetscMPIInt rank;
1256       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1257       PetscSFNode *newremote;
1258       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1259       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1260       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1261       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1262       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1263       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1264       /* Sort the incoming ranks at each vertex, build the inverse map */
1265       for (i=0; i<sf->nroots; i++) {
1266         PetscInt j;
1267         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1268         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1269         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1270       }
1271       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1272       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1273       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1274       for (i=0; i<sf->nleaves; i++) {
1275         newremote[i].rank  = sf->remote[i].rank;
1276         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1277       }
1278       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1279       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1280     }
1281     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1282   }
1283   *multi = sf->multi;
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 /*@C
1288    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1289 
1290    Collective
1291 
1292    Input Parameters:
1293 +  sf - original star forest
1294 .  nselected  - number of selected roots on this process
1295 -  selected   - indices of the selected roots on this process
1296 
1297    Output Parameter:
1298 .  esf - new star forest
1299 
1300    Level: advanced
1301 
1302    Note:
1303    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1304    be done by calling PetscSFGetGraph().
1305 
1306 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1307 @*/
1308 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1309 {
1310   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1311   const PetscInt    *ilocal;
1312   signed char       *rootdata,*leafdata,*leafmem;
1313   const PetscSFNode *iremote;
1314   PetscSFNode       *new_iremote;
1315   MPI_Comm          comm;
1316   PetscErrorCode    ierr;
1317 
1318   PetscFunctionBegin;
1319   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1320   PetscSFCheckGraphSet(sf,1);
1321   if (nselected) PetscValidPointer(selected,3);
1322   PetscValidPointer(esf,4);
1323 
1324   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1325   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1326   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1327   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1328 
1329   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1330     PetscBool dups;
1331     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1332     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1333     for (i=0; i<nselected; i++)
1334       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1335   }
1336 
1337   if (sf->ops->CreateEmbeddedRootSF) {
1338     ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1339   } else {
1340     /* A generic version of creating embedded sf */
1341     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1342     maxlocal = maxleaf - minleaf + 1;
1343     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1344     leafdata = leafmem - minleaf;
1345     /* Tag selected roots and bcast to leaves */
1346     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1347     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1348     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1349 
1350     /* Build esf with leaves that are still connected */
1351     esf_nleaves = 0;
1352     for (i=0; i<nleaves; i++) {
1353       j = ilocal ? ilocal[i] : i;
1354       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1355          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1356       */
1357       esf_nleaves += (leafdata[j] ? 1 : 0);
1358     }
1359     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1360     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1361     for (i=n=0; i<nleaves; i++) {
1362       j = ilocal ? ilocal[i] : i;
1363       if (leafdata[j]) {
1364         new_ilocal[n]        = j;
1365         new_iremote[n].rank  = iremote[i].rank;
1366         new_iremote[n].index = iremote[i].index;
1367         ++n;
1368       }
1369     }
1370     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1371     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1372     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1373     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1374   }
1375   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 /*@C
1380   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1381 
1382   Collective
1383 
1384   Input Parameters:
1385 + sf - original star forest
1386 . nselected  - number of selected leaves on this process
1387 - selected   - indices of the selected leaves on this process
1388 
1389   Output Parameter:
1390 .  newsf - new star forest
1391 
1392   Level: advanced
1393 
1394 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1395 @*/
1396 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1397 {
1398   const PetscSFNode *iremote;
1399   PetscSFNode       *new_iremote;
1400   const PetscInt    *ilocal;
1401   PetscInt          i,nroots,*leaves,*new_ilocal;
1402   MPI_Comm          comm;
1403   PetscErrorCode    ierr;
1404 
1405   PetscFunctionBegin;
1406   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1407   PetscSFCheckGraphSet(sf,1);
1408   if (nselected) PetscValidPointer(selected,3);
1409   PetscValidPointer(newsf,4);
1410 
1411   /* Uniq selected[] and put results in leaves[] */
1412   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1413   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1414   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1415   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1416   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1417 
1418   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1419   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1420     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1421   } else {
1422     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1423     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1424     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1425     for (i=0; i<nselected; ++i) {
1426       const PetscInt l     = leaves[i];
1427       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1428       new_iremote[i].rank  = iremote[l].rank;
1429       new_iremote[i].index = iremote[l].index;
1430     }
1431     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1432     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(leaves);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 /*@C
1439    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1440 
1441    Collective on PetscSF
1442 
1443    Input Parameters:
1444 +  sf - star forest on which to communicate
1445 .  unit - data type associated with each node
1446 .  rootdata - buffer to broadcast
1447 -  op - operation to use for reduction
1448 
1449    Output Parameter:
1450 .  leafdata - buffer to be reduced with values from each leaf's respective root
1451 
1452    Level: intermediate
1453 
1454    Notes:
1455     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1456     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1457     use PetscSFBcastWithMemTypeBegin() instead.
1458 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1459 @*/
1460 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1461 {
1462   PetscErrorCode ierr;
1463   PetscMemType   rootmtype,leafmtype;
1464 
1465   PetscFunctionBegin;
1466   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1467   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1468   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1469   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1470   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1471   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1472   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 /*@C
1477    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1478 
1479    Collective on PetscSF
1480 
1481    Input Parameters:
1482 +  sf - star forest on which to communicate
1483 .  unit - data type associated with each node
1484 .  rootmtype - memory type of rootdata
1485 .  rootdata - buffer to broadcast
1486 .  leafmtype - memory type of leafdata
1487 -  op - operation to use for reduction
1488 
1489    Output Parameter:
1490 .  leafdata - buffer to be reduced with values from each leaf's respective root
1491 
1492    Level: intermediate
1493 
1494 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1495 @*/
1496 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1497 {
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1502   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1503   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1504   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1505   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 /*@C
1510    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1511 
1512    Collective
1513 
1514    Input Parameters:
1515 +  sf - star forest
1516 .  unit - data type
1517 .  rootdata - buffer to broadcast
1518 -  op - operation to use for reduction
1519 
1520    Output Parameter:
1521 .  leafdata - buffer to be reduced with values from each leaf's respective root
1522 
1523    Level: intermediate
1524 
1525 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1526 @*/
1527 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1528 {
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1533   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1534   ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1535   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 /*@C
1540    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1541 
1542    Collective
1543 
1544    Input Parameters:
1545 +  sf - star forest
1546 .  unit - data type
1547 .  leafdata - values to reduce
1548 -  op - reduction operation
1549 
1550    Output Parameter:
1551 .  rootdata - result of reduction of values from all leaves of each root
1552 
1553    Level: intermediate
1554 
1555    Notes:
1556     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1557     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1558     use PetscSFReduceWithMemTypeBegin() instead.
1559 
1560 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1561 @*/
1562 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1563 {
1564   PetscErrorCode ierr;
1565   PetscMemType   rootmtype,leafmtype;
1566 
1567   PetscFunctionBegin;
1568   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1569   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1570   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1571   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1572   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1573   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1574   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 /*@C
1579    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1580 
1581    Collective
1582 
1583    Input Parameters:
1584 +  sf - star forest
1585 .  unit - data type
1586 .  leafmtype - memory type of leafdata
1587 .  leafdata - values to reduce
1588 .  rootmtype - memory type of rootdata
1589 -  op - reduction operation
1590 
1591    Output Parameter:
1592 .  rootdata - result of reduction of values from all leaves of each root
1593 
1594    Level: intermediate
1595 
1596 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1597 @*/
1598 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1599 {
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1604   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1605   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1606   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1607   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 /*@C
1612    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1613 
1614    Collective
1615 
1616    Input Parameters:
1617 +  sf - star forest
1618 .  unit - data type
1619 .  leafdata - values to reduce
1620 -  op - reduction operation
1621 
1622    Output Parameter:
1623 .  rootdata - result of reduction of values from all leaves of each root
1624 
1625    Level: intermediate
1626 
1627 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1628 @*/
1629 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1630 {
1631   PetscErrorCode ierr;
1632 
1633   PetscFunctionBegin;
1634   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1635   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1636   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1637   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 /*@C
1642    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1643 
1644    Collective
1645 
1646    Input Parameters:
1647 +  sf - star forest
1648 .  unit - data type
1649 .  leafdata - leaf values to use in reduction
1650 -  op - operation to use for reduction
1651 
1652    Output Parameters:
1653 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1654 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1655 
1656    Level: advanced
1657 
1658    Note:
1659    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1660    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1661    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1662    integers.
1663 
1664 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1665 @*/
1666 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1667 {
1668   PetscErrorCode ierr;
1669   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1670 
1671   PetscFunctionBegin;
1672   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1673   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1674   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1675   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1676   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1677   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1678   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1679   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1680   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 /*@C
1685    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1686 
1687    Collective
1688 
1689    Input Parameters:
1690 +  sf - star forest
1691 .  unit - data type
1692 .  leafdata - leaf values to use in reduction
1693 -  op - operation to use for reduction
1694 
1695    Output Parameters:
1696 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1697 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1698 
1699    Level: advanced
1700 
1701 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1702 @*/
1703 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1704 {
1705   PetscErrorCode ierr;
1706 
1707   PetscFunctionBegin;
1708   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1709   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1710   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1711   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 /*@C
1716    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1717 
1718    Collective
1719 
1720    Input Parameter:
1721 .  sf - star forest
1722 
1723    Output Parameter:
1724 .  degree - degree of each root vertex
1725 
1726    Level: advanced
1727 
1728    Notes:
1729    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1730 
1731 .seealso: PetscSFGatherBegin()
1732 @*/
1733 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1734 {
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1739   PetscSFCheckGraphSet(sf,1);
1740   PetscValidPointer(degree,2);
1741   if (!sf->degreeknown) {
1742     PetscInt i, nroots = sf->nroots, maxlocal;
1743     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1744     maxlocal = sf->maxleaf-sf->minleaf+1;
1745     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1746     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1747     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1748     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1749     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1750   }
1751   *degree = NULL;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 /*@C
1756    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1757 
1758    Collective
1759 
1760    Input Parameter:
1761 .  sf - star forest
1762 
1763    Output Parameter:
1764 .  degree - degree of each root vertex
1765 
1766    Level: developer
1767 
1768    Notes:
1769    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1770 
1771 .seealso:
1772 @*/
1773 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1774 {
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1779   PetscSFCheckGraphSet(sf,1);
1780   PetscValidPointer(degree,2);
1781   if (!sf->degreeknown) {
1782     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1783     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1784     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1785     sf->degreeknown = PETSC_TRUE;
1786   }
1787   *degree = sf->degree;
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 /*@C
1792    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1793    Each multi-root is assigned index of the corresponding original root.
1794 
1795    Collective
1796 
1797    Input Parameters:
1798 +  sf - star forest
1799 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1800 
1801    Output Parameters:
1802 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1803 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1804 
1805    Level: developer
1806 
1807    Notes:
1808    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1809 
1810 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1811 @*/
1812 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1813 {
1814   PetscSF             msf;
1815   PetscInt            i, j, k, nroots, nmroots;
1816   PetscErrorCode      ierr;
1817 
1818   PetscFunctionBegin;
1819   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1820   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1821   if (nroots) PetscValidIntPointer(degree,2);
1822   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1823   PetscValidPointer(multiRootsOrigNumbering,4);
1824   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1825   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1826   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1827   for (i=0,j=0,k=0; i<nroots; i++) {
1828     if (!degree[i]) continue;
1829     for (j=0; j<degree[i]; j++,k++) {
1830       (*multiRootsOrigNumbering)[k] = i;
1831     }
1832   }
1833   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1834   if (nMultiRoots) *nMultiRoots = nmroots;
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 /*@C
1839    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1840 
1841    Collective
1842 
1843    Input Parameters:
1844 +  sf - star forest
1845 .  unit - data type
1846 -  leafdata - leaf data to gather to roots
1847 
1848    Output Parameter:
1849 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1850 
1851    Level: intermediate
1852 
1853 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1854 @*/
1855 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1856 {
1857   PetscErrorCode ierr;
1858   PetscSF        multi = NULL;
1859 
1860   PetscFunctionBegin;
1861   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1862   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1863   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1864   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 /*@C
1869    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1870 
1871    Collective
1872 
1873    Input Parameters:
1874 +  sf - star forest
1875 .  unit - data type
1876 -  leafdata - leaf data to gather to roots
1877 
1878    Output Parameter:
1879 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1880 
1881    Level: intermediate
1882 
1883 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1884 @*/
1885 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1886 {
1887   PetscErrorCode ierr;
1888   PetscSF        multi = NULL;
1889 
1890   PetscFunctionBegin;
1891   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1892   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1893   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 /*@C
1898    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1899 
1900    Collective
1901 
1902    Input Parameters:
1903 +  sf - star forest
1904 .  unit - data type
1905 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1906 
1907    Output Parameter:
1908 .  leafdata - leaf data to be update with personal data from each respective root
1909 
1910    Level: intermediate
1911 
1912 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1913 @*/
1914 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1915 {
1916   PetscErrorCode ierr;
1917   PetscSF        multi = NULL;
1918 
1919   PetscFunctionBegin;
1920   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1921   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1922   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1923   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 /*@C
1928    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1929 
1930    Collective
1931 
1932    Input Parameters:
1933 +  sf - star forest
1934 .  unit - data type
1935 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1936 
1937    Output Parameter:
1938 .  leafdata - leaf data to be update with personal data from each respective root
1939 
1940    Level: intermediate
1941 
1942 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1943 @*/
1944 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1945 {
1946   PetscErrorCode ierr;
1947   PetscSF        multi = NULL;
1948 
1949   PetscFunctionBegin;
1950   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1951   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1952   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1957 {
1958   PetscInt        i, n, nleaves;
1959   const PetscInt *ilocal = NULL;
1960   PetscHSetI      seen;
1961   PetscErrorCode  ierr;
1962 
1963   PetscFunctionBegin;
1964   if (PetscDefined(USE_DEBUG)) {
1965     ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1966     ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1967     for (i = 0; i < nleaves; i++) {
1968       const PetscInt leaf = ilocal ? ilocal[i] : i;
1969       ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1970     }
1971     ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1972     if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1973     ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1974   }
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 /*@
1979   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1980 
1981   Input Parameters:
1982 + sfA - The first PetscSF
1983 - sfB - The second PetscSF
1984 
1985   Output Parameters:
1986 . sfBA - The composite SF
1987 
1988   Level: developer
1989 
1990   Notes:
1991   Currently, the two SFs must be defined on congruent communicators and they must be true star
1992   forests, i.e. the same leaf is not connected with different roots.
1993 
1994   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1995   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1996   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1997   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1998 
1999 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
2000 @*/
2001 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2002 {
2003   PetscErrorCode    ierr;
2004   const PetscSFNode *remotePointsA,*remotePointsB;
2005   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
2006   const PetscInt    *localPointsA,*localPointsB;
2007   PetscInt          *localPointsBA;
2008   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
2009   PetscBool         denseB;
2010 
2011   PetscFunctionBegin;
2012   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2013   PetscSFCheckGraphSet(sfA,1);
2014   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2015   PetscSFCheckGraphSet(sfB,2);
2016   PetscCheckSameComm(sfA,1,sfB,2);
2017   PetscValidPointer(sfBA,3);
2018   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2019   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2020 
2021   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
2022   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
2023   if (localPointsA) {
2024     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
2025     for (i=0; i<numRootsB; i++) {
2026       reorderedRemotePointsA[i].rank = -1;
2027       reorderedRemotePointsA[i].index = -1;
2028     }
2029     for (i=0; i<numLeavesA; i++) {
2030       if (localPointsA[i] >= numRootsB) continue;
2031       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2032     }
2033     remotePointsA = reorderedRemotePointsA;
2034   }
2035   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
2036   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
2037   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2038   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2039   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2040 
2041   denseB = (PetscBool)!localPointsB;
2042   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2043     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2044     else numLeavesBA++;
2045   }
2046   if (denseB) {
2047     localPointsBA  = NULL;
2048     remotePointsBA = leafdataB;
2049   } else {
2050     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
2051     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
2052     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2053       const PetscInt l = localPointsB ? localPointsB[i] : i;
2054 
2055       if (leafdataB[l-minleaf].rank == -1) continue;
2056       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2057       localPointsBA[numLeavesBA] = l;
2058       numLeavesBA++;
2059     }
2060     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2061   }
2062   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2063   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2064   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*@
2069   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2070 
2071   Input Parameters:
2072 + sfA - The first PetscSF
2073 - sfB - The second PetscSF
2074 
2075   Output Parameters:
2076 . sfBA - The composite SF.
2077 
2078   Level: developer
2079 
2080   Notes:
2081   Currently, the two SFs must be defined on congruent communicators and they must be true star
2082   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2083   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2084 
2085   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2086   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2087   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2088   a Reduce on sfB, on connected roots.
2089 
2090 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2091 @*/
2092 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2093 {
2094   PetscErrorCode    ierr;
2095   const PetscSFNode *remotePointsA,*remotePointsB;
2096   PetscSFNode       *remotePointsBA;
2097   const PetscInt    *localPointsA,*localPointsB;
2098   PetscSFNode       *reorderedRemotePointsA = NULL;
2099   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2100   MPI_Op            op;
2101 #if defined(PETSC_USE_64BIT_INDICES)
2102   PetscBool         iswin;
2103 #endif
2104 
2105   PetscFunctionBegin;
2106   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2107   PetscSFCheckGraphSet(sfA,1);
2108   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2109   PetscSFCheckGraphSet(sfB,2);
2110   PetscCheckSameComm(sfA,1,sfB,2);
2111   PetscValidPointer(sfBA,3);
2112   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2113   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2114 
2115   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
2116   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
2117 
2118   /* TODO: Check roots of sfB have degree of 1 */
2119   /* Once we implement it, we can replace the MPI_MAXLOC
2120      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2121      We use MPI_MAXLOC only to have a deterministic output from this routine if
2122      the root condition is not meet.
2123    */
2124   op = MPI_MAXLOC;
2125 #if defined(PETSC_USE_64BIT_INDICES)
2126   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2127   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2128   if (iswin) op = MPI_REPLACE;
2129 #endif
2130 
2131   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2132   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2133   for (i=0; i<maxleaf - minleaf + 1; i++) {
2134     reorderedRemotePointsA[i].rank = -1;
2135     reorderedRemotePointsA[i].index = -1;
2136   }
2137   if (localPointsA) {
2138     for (i=0; i<numLeavesA; i++) {
2139       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2140       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2141     }
2142   } else {
2143     for (i=0; i<numLeavesA; i++) {
2144       if (i > maxleaf || i < minleaf) continue;
2145       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2146     }
2147   }
2148 
2149   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2150   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2151   for (i=0; i<numRootsB; i++) {
2152     remotePointsBA[i].rank = -1;
2153     remotePointsBA[i].index = -1;
2154   }
2155 
2156   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2157   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2158   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2159   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2160     if (remotePointsBA[i].rank == -1) continue;
2161     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2162     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2163     localPointsBA[numLeavesBA] = i;
2164     numLeavesBA++;
2165   }
2166   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2167   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2168   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 /*
2173   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2174 
2175   Input Parameters:
2176 . sf - The global PetscSF
2177 
2178   Output Parameters:
2179 . out - The local PetscSF
2180  */
2181 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2182 {
2183   MPI_Comm           comm;
2184   PetscMPIInt        myrank;
2185   const PetscInt     *ilocal;
2186   const PetscSFNode  *iremote;
2187   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2188   PetscSFNode        *liremote;
2189   PetscSF            lsf;
2190   PetscErrorCode     ierr;
2191 
2192   PetscFunctionBegin;
2193   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2194   if (sf->ops->CreateLocalSF) {
2195     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2196   } else {
2197     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2198     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2199     ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr);
2200 
2201     /* Find out local edges and build a local SF */
2202     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2203     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2204     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2205     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2206 
2207     for (i=j=0; i<nleaves; i++) {
2208       if (iremote[i].rank == (PetscInt)myrank) {
2209         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2210         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2211         liremote[j].index = iremote[i].index;
2212         j++;
2213       }
2214     }
2215     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2216     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2217     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2218     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2219     *out = lsf;
2220   }
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2225 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2226 {
2227   PetscErrorCode     ierr;
2228   PetscMemType       rootmtype,leafmtype;
2229 
2230   PetscFunctionBegin;
2231   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2232   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2233   ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2234   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2235   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2236   if (sf->ops->BcastToZero) {
2237     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2238   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2239   ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2240   PetscFunctionReturn(0);
2241 }
2242 
2243