xref: /petsc/src/vec/is/sf/interface/sf.c (revision 98921bda46e76d7aaed9e0138c5ff9d0ce93f355)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscErrorCode ierr;
57   PetscSF        b;
58 
59   PetscFunctionBegin;
60   PetscValidPointer(sf,2);
61   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
62 
63   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
64 
65   b->nroots    = -1;
66   b->nleaves   = -1;
67   b->minleaf   = PETSC_MAX_INT;
68   b->maxleaf   = PETSC_MIN_INT;
69   b->nranks    = -1;
70   b->rankorder = PETSC_TRUE;
71   b->ingroup   = MPI_GROUP_NULL;
72   b->outgroup  = MPI_GROUP_NULL;
73   b->graphset  = PETSC_FALSE;
74 #if defined(PETSC_HAVE_DEVICE)
75   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
76   b->use_stream_aware_mpi = PETSC_FALSE;
77   b->unknown_input_stream= PETSC_FALSE;
78   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79     b->backend = PETSCSF_BACKEND_KOKKOS;
80   #elif defined(PETSC_HAVE_CUDA)
81     b->backend = PETSCSF_BACKEND_CUDA;
82   #elif defined(PETSC_HAVE_HIP)
83     b->backend = PETSCSF_BACKEND_HIP;
84   #endif
85 
86   #if defined(PETSC_HAVE_NVSHMEM)
87     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
88     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr);
90     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr);
91   #endif
92 #endif
93   b->vscat.from_n = -1;
94   b->vscat.to_n   = -1;
95   b->vscat.unit   = MPIU_SCALAR;
96  *sf = b;
97   PetscFunctionReturn(0);
98 }
99 
100 /*@
101    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
102 
103    Collective
104 
105    Input Parameter:
106 .  sf - star forest
107 
108    Level: advanced
109 
110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
111 @*/
112 PetscErrorCode PetscSFReset(PetscSF sf)
113 {
114   PetscErrorCode ierr;
115 
116   PetscFunctionBegin;
117   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
118   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
119   sf->nroots   = -1;
120   sf->nleaves  = -1;
121   sf->minleaf  = PETSC_MAX_INT;
122   sf->maxleaf  = PETSC_MIN_INT;
123   sf->mine     = NULL;
124   sf->remote   = NULL;
125   sf->graphset = PETSC_FALSE;
126   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
127   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
128   sf->nranks = -1;
129   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
130   sf->degreeknown = PETSC_FALSE;
131   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
132   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);}
133   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);}
134   if (sf->multi) sf->multi->multi = NULL;
135   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
136   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
137 
138  #if defined(PETSC_HAVE_DEVICE)
139   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
140  #endif
141 
142   sf->setupcalled = PETSC_FALSE;
143   PetscFunctionReturn(0);
144 }
145 
146 /*@C
147    PetscSFSetType - Set the PetscSF communication implementation
148 
149    Collective on PetscSF
150 
151    Input Parameters:
152 +  sf - the PetscSF context
153 -  type - a known method
154 
155    Options Database Key:
156 .  -sf_type <type> - Sets the method; use -help for a list
157    of available methods (for instance, window, basic, neighbor)
158 
159    Notes:
160    See "include/petscsf.h" for available methods (for instance)
161 +    PETSCSFWINDOW - MPI-2/3 one-sided
162 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
163 
164   Level: intermediate
165 
166 .seealso: PetscSFType, PetscSFCreate()
167 @*/
168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
169 {
170   PetscErrorCode ierr,(*r)(PetscSF);
171   PetscBool      match;
172 
173   PetscFunctionBegin;
174   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
175   PetscValidCharPointer(type,2);
176 
177   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
178   if (match) PetscFunctionReturn(0);
179 
180   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
181   if (!r) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
182   /* Destroy the previous PetscSF implementation context */
183   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
184   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
185   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
186   ierr = (*r)(sf);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 /*@C
191   PetscSFGetType - Get the PetscSF communication implementation
192 
193   Not Collective
194 
195   Input Parameter:
196 . sf  - the PetscSF context
197 
198   Output Parameter:
199 . type - the PetscSF type name
200 
201   Level: intermediate
202 
203 .seealso: PetscSFSetType(), PetscSFCreate()
204 @*/
205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
206 {
207   PetscFunctionBegin;
208   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
209   PetscValidPointer(type,2);
210   *type = ((PetscObject)sf)->type_name;
211   PetscFunctionReturn(0);
212 }
213 
214 /*@C
215    PetscSFDestroy - destroy star forest
216 
217    Collective
218 
219    Input Parameter:
220 .  sf - address of star forest
221 
222    Level: intermediate
223 
224 .seealso: PetscSFCreate(), PetscSFReset()
225 @*/
226 PetscErrorCode PetscSFDestroy(PetscSF *sf)
227 {
228   PetscErrorCode ierr;
229 
230   PetscFunctionBegin;
231   if (!*sf) PetscFunctionReturn(0);
232   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
233   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
234   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
235   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
236   ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr);
237   if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);}
238   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
243 {
244   PetscInt           i, nleaves;
245   PetscMPIInt        size;
246   const PetscInt    *ilocal;
247   const PetscSFNode *iremote;
248   PetscErrorCode     ierr;
249 
250   PetscFunctionBegin;
251   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
252   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
254   for (i = 0; i < nleaves; i++) {
255     const PetscInt rank = iremote[i].rank;
256     const PetscInt remote = iremote[i].index;
257     const PetscInt leaf = ilocal ? ilocal[i] : i;
258     if (rank < 0 || rank >= size) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
259     if (remote < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
260     if (leaf < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 /*@
266    PetscSFSetUp - set up communication structures
267 
268    Collective
269 
270    Input Parameter:
271 .  sf - star forest communication object
272 
273    Level: beginner
274 
275 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
276 @*/
277 PetscErrorCode PetscSFSetUp(PetscSF sf)
278 {
279   PetscErrorCode ierr;
280 
281   PetscFunctionBegin;
282   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
283   PetscSFCheckGraphSet(sf,1);
284   if (sf->setupcalled) PetscFunctionReturn(0);
285   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
286   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
287   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
288   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
289 #if defined(PETSC_HAVE_CUDA)
290   if (sf->backend == PETSCSF_BACKEND_CUDA) {
291     sf->ops->Malloc = PetscSFMalloc_CUDA;
292     sf->ops->Free   = PetscSFFree_CUDA;
293   }
294 #endif
295 #if defined(PETSC_HAVE_HIP)
296   if (sf->backend == PETSCSF_BACKEND_HIP) {
297     sf->ops->Malloc = PetscSFMalloc_HIP;
298     sf->ops->Free   = PetscSFFree_HIP;
299   }
300 #endif
301 
302 #
303 #if defined(PETSC_HAVE_KOKKOS)
304   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
305     sf->ops->Malloc = PetscSFMalloc_Kokkos;
306     sf->ops->Free   = PetscSFFree_Kokkos;
307   }
308 #endif
309   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
310   sf->setupcalled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
314 /*@
315    PetscSFSetFromOptions - set PetscSF options using the options database
316 
317    Logically Collective
318 
319    Input Parameter:
320 .  sf - star forest
321 
322    Options Database Keys:
323 +  -sf_type               - implementation type, see PetscSFSetType()
324 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
325 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
326                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
327                             If true, this option only works with -use_gpu_aware_mpi 1.
328 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
329                                If true, this option only works with -use_gpu_aware_mpi 1.
330 
331 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
332                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
333                               the only available is kokkos.
334 
335    Level: intermediate
336 @*/
337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
338 {
339   PetscSFType    deft;
340   char           type[256];
341   PetscErrorCode ierr;
342   PetscBool      flg;
343 
344   PetscFunctionBegin;
345   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
346   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
347   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
348   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
349   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
350   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
351  #if defined(PETSC_HAVE_DEVICE)
352   {
353     char        backendstr[32] = {0};
354     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
355     /* Change the defaults set in PetscSFCreate() with command line options */
356     ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr);
357     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
358     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
359     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
360     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
361     ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr);
362   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
363     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
364     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
365     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
366     else if (set) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
367   #elif defined(PETSC_HAVE_KOKKOS)
368     if (set && !isKokkos) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
369    #endif
370   }
371  #endif
372   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
373   ierr = PetscOptionsEnd();CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 /*@
378    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
379 
380    Logically Collective
381 
382    Input Parameters:
383 +  sf - star forest
384 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
385 
386    Level: advanced
387 
388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
389 @*/
390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
391 {
392   PetscFunctionBegin;
393   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
394   PetscValidLogicalCollectiveBool(sf,flg,2);
395   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
396   sf->rankorder = flg;
397   PetscFunctionReturn(0);
398 }
399 
400 /*@C
401    PetscSFSetGraph - Set a parallel star forest
402 
403    Collective
404 
405    Input Parameters:
406 +  sf - star forest
407 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
408 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
409 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
410 during setup in debug mode)
411 .  localmode - copy mode for ilocal
412 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
413 during setup in debug mode)
414 -  remotemode - copy mode for iremote
415 
416    Level: intermediate
417 
418    Notes:
419     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
420 
421    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
422    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
423    needed
424 
425    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
426    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
427 
428 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
429 @*/
430 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
431 {
432   PetscErrorCode ierr;
433 
434   PetscFunctionBegin;
435   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
436   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
437   if (nleaves > 0) PetscValidPointer(iremote,6);
438   if (nroots  < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
439   if (nleaves < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
440 
441   if (sf->nroots >= 0) { /* Reset only if graph already set */
442     ierr = PetscSFReset(sf);CHKERRQ(ierr);
443   }
444 
445   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
446 
447   sf->nroots  = nroots;
448   sf->nleaves = nleaves;
449 
450   if (nleaves && ilocal) {
451     PetscInt i;
452     PetscInt minleaf = PETSC_MAX_INT;
453     PetscInt maxleaf = PETSC_MIN_INT;
454     int      contiguous = 1;
455     for (i=0; i<nleaves; i++) {
456       minleaf = PetscMin(minleaf,ilocal[i]);
457       maxleaf = PetscMax(maxleaf,ilocal[i]);
458       contiguous &= (ilocal[i] == i);
459     }
460     sf->minleaf = minleaf;
461     sf->maxleaf = maxleaf;
462     if (contiguous) {
463       if (localmode == PETSC_OWN_POINTER) {
464         ierr = PetscFree(ilocal);CHKERRQ(ierr);
465       }
466       ilocal = NULL;
467     }
468   } else {
469     sf->minleaf = 0;
470     sf->maxleaf = nleaves - 1;
471   }
472 
473   if (ilocal) {
474     switch (localmode) {
475     case PETSC_COPY_VALUES:
476       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
477       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
478       sf->mine = sf->mine_alloc;
479       break;
480     case PETSC_OWN_POINTER:
481       sf->mine_alloc = (PetscInt*)ilocal;
482       sf->mine       = sf->mine_alloc;
483       break;
484     case PETSC_USE_POINTER:
485       sf->mine_alloc = NULL;
486       sf->mine       = (PetscInt*)ilocal;
487       break;
488     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
489     }
490   }
491 
492   switch (remotemode) {
493   case PETSC_COPY_VALUES:
494     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
495     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
496     sf->remote = sf->remote_alloc;
497     break;
498   case PETSC_OWN_POINTER:
499     sf->remote_alloc = (PetscSFNode*)iremote;
500     sf->remote       = sf->remote_alloc;
501     break;
502   case PETSC_USE_POINTER:
503     sf->remote_alloc = NULL;
504     sf->remote       = (PetscSFNode*)iremote;
505     break;
506   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
507   }
508 
509   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
510   sf->graphset = PETSC_TRUE;
511   PetscFunctionReturn(0);
512 }
513 
514 /*@
515   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
516 
517   Collective
518 
519   Input Parameters:
520 + sf      - The PetscSF
521 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
522 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
523 
524   Notes:
525   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
526   n and N are local and global sizes of x respectively.
527 
528   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
529   sequential vectors y on all ranks.
530 
531   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
532   sequential vector y on rank 0.
533 
534   In above cases, entries of x are roots and entries of y are leaves.
535 
536   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
537   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
538   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
539   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
540   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
541 
542   In this case, roots and leaves are symmetric.
543 
544   Level: intermediate
545  @*/
546 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
547 {
548   MPI_Comm       comm;
549   PetscInt       n,N,res[2];
550   PetscMPIInt    rank,size;
551   PetscSFType    type;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
556   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
557   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
558   if (PetscUnlikely((pattern < PETSCSF_PATTERN_ALLGATHER) || (pattern > PETSCSF_PATTERN_ALLTOALL))) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
559   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
560   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
561 
562   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
563     type = PETSCSFALLTOALL;
564     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
565     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
566     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
567     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
568   } else {
569     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
570     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
571     res[0] = n;
572     res[1] = -n;
573     /* Check if n are same over all ranks so that we can optimize it */
574     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr);
575     if (res[0] == -res[1]) { /* same n */
576       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
577     } else {
578       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
579     }
580     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
581   }
582   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
583 
584   sf->pattern = pattern;
585   sf->mine    = NULL; /* Contiguous */
586 
587   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
588      Also set other easy stuff.
589    */
590   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
591     sf->nleaves      = N;
592     sf->nroots       = n;
593     sf->nranks       = size;
594     sf->minleaf      = 0;
595     sf->maxleaf      = N - 1;
596   } else if (pattern == PETSCSF_PATTERN_GATHER) {
597     sf->nleaves      = rank ? 0 : N;
598     sf->nroots       = n;
599     sf->nranks       = rank ? 0 : size;
600     sf->minleaf      = 0;
601     sf->maxleaf      = rank ? -1 : N - 1;
602   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
603     sf->nleaves      = size;
604     sf->nroots       = size;
605     sf->nranks       = size;
606     sf->minleaf      = 0;
607     sf->maxleaf      = size - 1;
608   }
609   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
610   sf->graphset = PETSC_TRUE;
611   PetscFunctionReturn(0);
612 }
613 
614 /*@
615    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
616 
617    Collective
618 
619    Input Parameter:
620 .  sf - star forest to invert
621 
622    Output Parameter:
623 .  isf - inverse of sf
624 
625    Level: advanced
626 
627    Notes:
628    All roots must have degree 1.
629 
630    The local space may be a permutation, but cannot be sparse.
631 
632 .seealso: PetscSFSetGraph()
633 @*/
634 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
635 {
636   PetscErrorCode ierr;
637   PetscMPIInt    rank;
638   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
639   const PetscInt *ilocal;
640   PetscSFNode    *roots,*leaves;
641 
642   PetscFunctionBegin;
643   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
644   PetscSFCheckGraphSet(sf,1);
645   PetscValidPointer(isf,2);
646 
647   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
648   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
649 
650   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
651   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
652   for (i=0; i<maxlocal; i++) {
653     leaves[i].rank  = rank;
654     leaves[i].index = i;
655   }
656   for (i=0; i <nroots; i++) {
657     roots[i].rank  = -1;
658     roots[i].index = -1;
659   }
660   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
661   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
662 
663   /* Check whether our leaves are sparse */
664   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
665   if (count == nroots) newilocal = NULL;
666   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
667     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
668     for (i=0,count=0; i<nroots; i++) {
669       if (roots[i].rank >= 0) {
670         newilocal[count]   = i;
671         roots[count].rank  = roots[i].rank;
672         roots[count].index = roots[i].index;
673         count++;
674       }
675     }
676   }
677 
678   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
679   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
680   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 /*@
685    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
686 
687    Collective
688 
689    Input Parameters:
690 +  sf - communication object to duplicate
691 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
692 
693    Output Parameter:
694 .  newsf - new communication object
695 
696    Level: beginner
697 
698 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
699 @*/
700 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
701 {
702   PetscSFType    type;
703   PetscErrorCode ierr;
704   MPI_Datatype   dtype=MPIU_SCALAR;
705 
706   PetscFunctionBegin;
707   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
708   PetscValidLogicalCollectiveEnum(sf,opt,2);
709   PetscValidPointer(newsf,3);
710   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
711   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
712   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
713   if (opt == PETSCSF_DUPLICATE_GRAPH) {
714     PetscSFCheckGraphSet(sf,1);
715     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
716       PetscInt          nroots,nleaves;
717       const PetscInt    *ilocal;
718       const PetscSFNode *iremote;
719       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
720       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
721     } else {
722       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
723     }
724   }
725   /* Since oldtype is committed, so is newtype, according to MPI */
726   if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);}
727   (*newsf)->vscat.bs     = sf->vscat.bs;
728   (*newsf)->vscat.unit   = dtype;
729   (*newsf)->vscat.to_n   = sf->vscat.to_n;
730   (*newsf)->vscat.from_n = sf->vscat.from_n;
731   /* Do not copy lsf. Build it on demand since it is rarely used */
732 
733 #if defined(PETSC_HAVE_DEVICE)
734   (*newsf)->backend              = sf->backend;
735   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
736   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
737   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
738 #endif
739   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
740   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
741   PetscFunctionReturn(0);
742 }
743 
744 /*@C
745    PetscSFGetGraph - Get the graph specifying a parallel star forest
746 
747    Not Collective
748 
749    Input Parameter:
750 .  sf - star forest
751 
752    Output Parameters:
753 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
754 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
755 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
756 -  iremote - remote locations of root vertices for each leaf on the current process
757 
758    Notes:
759      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
760 
761    Fortran Notes:
762      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
763      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
764 
765      To check for a NULL ilocal use
766 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
767 
768    Level: intermediate
769 
770 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
771 @*/
772 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
773 {
774   PetscErrorCode ierr;
775 
776   PetscFunctionBegin;
777   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
778   if (sf->ops->GetGraph) {
779     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
780   } else {
781     if (nroots) *nroots = sf->nroots;
782     if (nleaves) *nleaves = sf->nleaves;
783     if (ilocal) *ilocal = sf->mine;
784     if (iremote) *iremote = sf->remote;
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 /*@
790    PetscSFGetLeafRange - Get the active leaf ranges
791 
792    Not Collective
793 
794    Input Parameter:
795 .  sf - star forest
796 
797    Output Parameters:
798 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
799 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
800 
801    Level: developer
802 
803 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
804 @*/
805 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
806 {
807   PetscFunctionBegin;
808   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
809   PetscSFCheckGraphSet(sf,1);
810   if (minleaf) *minleaf = sf->minleaf;
811   if (maxleaf) *maxleaf = sf->maxleaf;
812   PetscFunctionReturn(0);
813 }
814 
815 /*@C
816    PetscSFViewFromOptions - View from Options
817 
818    Collective on PetscSF
819 
820    Input Parameters:
821 +  A - the star forest
822 .  obj - Optional object
823 -  name - command line option
824 
825    Level: intermediate
826 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
827 @*/
828 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
829 {
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
834   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 /*@C
839    PetscSFView - view a star forest
840 
841    Collective
842 
843    Input Parameters:
844 +  sf - star forest
845 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
846 
847    Level: beginner
848 
849 .seealso: PetscSFCreate(), PetscSFSetGraph()
850 @*/
851 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
852 {
853   PetscErrorCode    ierr;
854   PetscBool         iascii;
855   PetscViewerFormat format;
856 
857   PetscFunctionBegin;
858   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
859   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
860   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
861   PetscCheckSameComm(sf,1,viewer,2);
862   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
863   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
864   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
865     PetscMPIInt rank;
866     PetscInt    ii,i,j;
867 
868     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
869     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
870     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
871       if (!sf->graphset) {
872         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
873         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
874         PetscFunctionReturn(0);
875       }
876       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
877       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
878       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
879       for (i=0; i<sf->nleaves; i++) {
880         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
881       }
882       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
883       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885         PetscMPIInt *tmpranks,*perm;
886         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
887         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
888         for (i=0; i<sf->nranks; i++) perm[i] = i;
889         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
891         for (ii=0; ii<sf->nranks; ii++) {
892           i = perm[ii];
893           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
894           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
895             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
896           }
897         }
898         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
899       }
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
902     }
903     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
904   }
905   if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
906   PetscFunctionReturn(0);
907 }
908 
909 /*@C
910    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
911 
912    Not Collective
913 
914    Input Parameter:
915 .  sf - star forest
916 
917    Output Parameters:
918 +  nranks - number of ranks referenced by local part
919 .  ranks - array of ranks
920 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
921 .  rmine - concatenated array holding local indices referencing each remote rank
922 -  rremote - concatenated array holding remote indices referenced for each remote rank
923 
924    Level: developer
925 
926 .seealso: PetscSFGetLeafRanks()
927 @*/
928 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
929 {
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
934   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
935   if (sf->ops->GetRootRanks) {
936     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
937   } else {
938     /* The generic implementation */
939     if (nranks)  *nranks  = sf->nranks;
940     if (ranks)   *ranks   = sf->ranks;
941     if (roffset) *roffset = sf->roffset;
942     if (rmine)   *rmine   = sf->rmine;
943     if (rremote) *rremote = sf->rremote;
944   }
945   PetscFunctionReturn(0);
946 }
947 
948 /*@C
949    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
950 
951    Not Collective
952 
953    Input Parameter:
954 .  sf - star forest
955 
956    Output Parameters:
957 +  niranks - number of leaf ranks referencing roots on this process
958 .  iranks - array of ranks
959 .  ioffset - offset in irootloc for each rank (length niranks+1)
960 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
961 
962    Level: developer
963 
964 .seealso: PetscSFGetRootRanks()
965 @*/
966 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
967 {
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
972   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
973   if (sf->ops->GetLeafRanks) {
974     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
975   } else {
976     PetscSFType type;
977     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
978     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
979   }
980   PetscFunctionReturn(0);
981 }
982 
983 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
984   PetscInt i;
985   for (i=0; i<n; i++) {
986     if (needle == list[i]) return PETSC_TRUE;
987   }
988   return PETSC_FALSE;
989 }
990 
991 /*@C
992    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
993 
994    Collective
995 
996    Input Parameters:
997 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
998 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
999 
1000    Level: developer
1001 
1002 .seealso: PetscSFGetRootRanks()
1003 @*/
1004 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1005 {
1006   PetscErrorCode     ierr;
1007   PetscTable         table;
1008   PetscTablePosition pos;
1009   PetscMPIInt        size,groupsize,*groupranks;
1010   PetscInt           *rcount,*ranks;
1011   PetscInt           i, irank = -1,orank = -1;
1012 
1013   PetscFunctionBegin;
1014   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1015   PetscSFCheckGraphSet(sf,1);
1016   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
1017   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1018   for (i=0; i<sf->nleaves; i++) {
1019     /* Log 1-based rank */
1020     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1021   }
1022   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1023   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1024   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1025   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1026   for (i=0; i<sf->nranks; i++) {
1027     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1028     ranks[i]--;             /* Convert back to 0-based */
1029   }
1030   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1031 
1032   /* We expect that dgroup is reliably "small" while nranks could be large */
1033   {
1034     MPI_Group group = MPI_GROUP_NULL;
1035     PetscMPIInt *dgroupranks;
1036     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1037     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr);
1038     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1039     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1040     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1041     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);}
1042     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1043     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1044   }
1045 
1046   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1047   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1048     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1049       if (InList(ranks[i],groupsize,groupranks)) break;
1050     }
1051     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1052       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1053     }
1054     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1055       PetscInt tmprank,tmpcount;
1056 
1057       tmprank             = ranks[i];
1058       tmpcount            = rcount[i];
1059       ranks[i]            = ranks[sf->ndranks];
1060       rcount[i]           = rcount[sf->ndranks];
1061       ranks[sf->ndranks]  = tmprank;
1062       rcount[sf->ndranks] = tmpcount;
1063       sf->ndranks++;
1064     }
1065   }
1066   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1067   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1068   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1069   sf->roffset[0] = 0;
1070   for (i=0; i<sf->nranks; i++) {
1071     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1072     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1073     rcount[i]        = 0;
1074   }
1075   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1076     /* short circuit */
1077     if (orank != sf->remote[i].rank) {
1078       /* Search for index of iremote[i].rank in sf->ranks */
1079       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1080       if (irank < 0) {
1081         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1082         if (irank >= 0) irank += sf->ndranks;
1083       }
1084       orank = sf->remote[i].rank;
1085     }
1086     if (irank < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1087     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1088     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1089     rcount[irank]++;
1090   }
1091   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 /*@C
1096    PetscSFGetGroups - gets incoming and outgoing process groups
1097 
1098    Collective
1099 
1100    Input Parameter:
1101 .  sf - star forest
1102 
1103    Output Parameters:
1104 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1105 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1106 
1107    Level: developer
1108 
1109 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1110 @*/
1111 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1112 {
1113   PetscErrorCode ierr;
1114   MPI_Group      group = MPI_GROUP_NULL;
1115 
1116   PetscFunctionBegin;
1117   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1118   if (sf->ingroup == MPI_GROUP_NULL) {
1119     PetscInt       i;
1120     const PetscInt *indegree;
1121     PetscMPIInt    rank,*outranks,*inranks;
1122     PetscSFNode    *remote;
1123     PetscSF        bgcount;
1124 
1125     /* Compute the number of incoming ranks */
1126     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1127     for (i=0; i<sf->nranks; i++) {
1128       remote[i].rank  = sf->ranks[i];
1129       remote[i].index = 0;
1130     }
1131     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1132     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1133     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1134     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1135     /* Enumerate the incoming ranks */
1136     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1137     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1138     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1139     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1140     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1141     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1142     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr);
1143     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1144     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1145     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1146   }
1147   *incoming = sf->ingroup;
1148 
1149   if (sf->outgroup == MPI_GROUP_NULL) {
1150     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1151     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr);
1152     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1153   }
1154   *outgoing = sf->outgroup;
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 /*@
1159    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1160 
1161    Collective
1162 
1163    Input Parameter:
1164 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1165 
1166    Output Parameter:
1167 .  multi - star forest with split roots, such that each root has degree exactly 1
1168 
1169    Level: developer
1170 
1171    Notes:
1172 
1173    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1174    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1175    edge, it is a candidate for future optimization that might involve its removal.
1176 
1177 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1178 @*/
1179 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1180 {
1181   PetscErrorCode ierr;
1182 
1183   PetscFunctionBegin;
1184   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1185   PetscValidPointer(multi,2);
1186   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1187     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1188     *multi = sf->multi;
1189     sf->multi->multi = sf->multi;
1190     PetscFunctionReturn(0);
1191   }
1192   if (!sf->multi) {
1193     const PetscInt *indegree;
1194     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1195     PetscSFNode    *remote;
1196     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1197     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1198     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1199     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1200     inoffset[0] = 0;
1201     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1202     for (i=0; i<maxlocal; i++) outones[i] = 1;
1203     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1204     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1205     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1206     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1207       for (i=0; i<sf->nroots; i++) {
1208         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1209       }
1210     }
1211     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1212     for (i=0; i<sf->nleaves; i++) {
1213       remote[i].rank  = sf->remote[i].rank;
1214       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1215     }
1216     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1217     sf->multi->multi = sf->multi;
1218     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1219     if (sf->rankorder) {        /* Sort the ranks */
1220       PetscMPIInt rank;
1221       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1222       PetscSFNode *newremote;
1223       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1224       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1225       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1226       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1227       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1228       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1229       /* Sort the incoming ranks at each vertex, build the inverse map */
1230       for (i=0; i<sf->nroots; i++) {
1231         PetscInt j;
1232         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1233         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1234         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1235       }
1236       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1237       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1238       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1239       for (i=0; i<sf->nleaves; i++) {
1240         newremote[i].rank  = sf->remote[i].rank;
1241         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1242       }
1243       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1244       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1245     }
1246     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1247   }
1248   *multi = sf->multi;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 /*@C
1253    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1254 
1255    Collective
1256 
1257    Input Parameters:
1258 +  sf - original star forest
1259 .  nselected  - number of selected roots on this process
1260 -  selected   - indices of the selected roots on this process
1261 
1262    Output Parameter:
1263 .  esf - new star forest
1264 
1265    Level: advanced
1266 
1267    Note:
1268    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1269    be done by calling PetscSFGetGraph().
1270 
1271 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1272 @*/
1273 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1274 {
1275   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1276   const PetscInt    *ilocal;
1277   signed char       *rootdata,*leafdata,*leafmem;
1278   const PetscSFNode *iremote;
1279   PetscSFNode       *new_iremote;
1280   MPI_Comm          comm;
1281   PetscErrorCode    ierr;
1282 
1283   PetscFunctionBegin;
1284   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1285   PetscSFCheckGraphSet(sf,1);
1286   if (nselected) PetscValidPointer(selected,3);
1287   PetscValidPointer(esf,4);
1288 
1289   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1290   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1291   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1292   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1293 
1294   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1295     PetscBool dups;
1296     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1297     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1298     for (i=0; i<nselected; i++)
1299       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1300   }
1301 
1302   if (sf->ops->CreateEmbeddedRootSF) {
1303     ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1304   } else {
1305     /* A generic version of creating embedded sf */
1306     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1307     maxlocal = maxleaf - minleaf + 1;
1308     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1309     leafdata = leafmem - minleaf;
1310     /* Tag selected roots and bcast to leaves */
1311     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1312     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1313     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1314 
1315     /* Build esf with leaves that are still connected */
1316     esf_nleaves = 0;
1317     for (i=0; i<nleaves; i++) {
1318       j = ilocal ? ilocal[i] : i;
1319       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1320          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1321       */
1322       esf_nleaves += (leafdata[j] ? 1 : 0);
1323     }
1324     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1325     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1326     for (i=n=0; i<nleaves; i++) {
1327       j = ilocal ? ilocal[i] : i;
1328       if (leafdata[j]) {
1329         new_ilocal[n]        = j;
1330         new_iremote[n].rank  = iremote[i].rank;
1331         new_iremote[n].index = iremote[i].index;
1332         ++n;
1333       }
1334     }
1335     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1336     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1337     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1338     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1339   }
1340   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /*@C
1345   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1346 
1347   Collective
1348 
1349   Input Parameters:
1350 + sf - original star forest
1351 . nselected  - number of selected leaves on this process
1352 - selected   - indices of the selected leaves on this process
1353 
1354   Output Parameter:
1355 .  newsf - new star forest
1356 
1357   Level: advanced
1358 
1359 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1360 @*/
1361 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1362 {
1363   const PetscSFNode *iremote;
1364   PetscSFNode       *new_iremote;
1365   const PetscInt    *ilocal;
1366   PetscInt          i,nroots,*leaves,*new_ilocal;
1367   MPI_Comm          comm;
1368   PetscErrorCode    ierr;
1369 
1370   PetscFunctionBegin;
1371   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1372   PetscSFCheckGraphSet(sf,1);
1373   if (nselected) PetscValidPointer(selected,3);
1374   PetscValidPointer(newsf,4);
1375 
1376   /* Uniq selected[] and put results in leaves[] */
1377   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1378   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1379   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1380   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1381   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1382 
1383   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1384   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1385     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1386   } else {
1387     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1388     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1389     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1390     for (i=0; i<nselected; ++i) {
1391       const PetscInt l     = leaves[i];
1392       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1393       new_iremote[i].rank  = iremote[l].rank;
1394       new_iremote[i].index = iremote[l].index;
1395     }
1396     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1397     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1398   }
1399   ierr = PetscFree(leaves);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 /*@C
1404    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1405 
1406    Collective on PetscSF
1407 
1408    Input Parameters:
1409 +  sf - star forest on which to communicate
1410 .  unit - data type associated with each node
1411 .  rootdata - buffer to broadcast
1412 -  op - operation to use for reduction
1413 
1414    Output Parameter:
1415 .  leafdata - buffer to be reduced with values from each leaf's respective root
1416 
1417    Level: intermediate
1418 
1419    Notes:
1420     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1421     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1422     use PetscSFBcastWithMemTypeBegin() instead.
1423 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1424 @*/
1425 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1426 {
1427   PetscErrorCode ierr;
1428   PetscMemType   rootmtype,leafmtype;
1429 
1430   PetscFunctionBegin;
1431   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1432   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1433   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1434   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1435   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1436   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1437   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 /*@C
1442    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1443 
1444    Collective on PetscSF
1445 
1446    Input Parameters:
1447 +  sf - star forest on which to communicate
1448 .  unit - data type associated with each node
1449 .  rootmtype - memory type of rootdata
1450 .  rootdata - buffer to broadcast
1451 .  leafmtype - memory type of leafdata
1452 -  op - operation to use for reduction
1453 
1454    Output Parameter:
1455 .  leafdata - buffer to be reduced with values from each leaf's respective root
1456 
1457    Level: intermediate
1458 
1459 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1460 @*/
1461 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1462 {
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1467   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1468   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1469   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1470   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 /*@C
1475    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1476 
1477    Collective
1478 
1479    Input Parameters:
1480 +  sf - star forest
1481 .  unit - data type
1482 .  rootdata - buffer to broadcast
1483 -  op - operation to use for reduction
1484 
1485    Output Parameter:
1486 .  leafdata - buffer to be reduced with values from each leaf's respective root
1487 
1488    Level: intermediate
1489 
1490 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1491 @*/
1492 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1493 {
1494   PetscErrorCode ierr;
1495 
1496   PetscFunctionBegin;
1497   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1498   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1499   ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1500   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 /*@C
1505    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1506 
1507    Collective
1508 
1509    Input Parameters:
1510 +  sf - star forest
1511 .  unit - data type
1512 .  leafdata - values to reduce
1513 -  op - reduction operation
1514 
1515    Output Parameter:
1516 .  rootdata - result of reduction of values from all leaves of each root
1517 
1518    Level: intermediate
1519 
1520    Notes:
1521     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1522     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1523     use PetscSFReduceWithMemTypeBegin() instead.
1524 
1525 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1526 @*/
1527 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1528 {
1529   PetscErrorCode ierr;
1530   PetscMemType   rootmtype,leafmtype;
1531 
1532   PetscFunctionBegin;
1533   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1534   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1535   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1536   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1537   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1538   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1539   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 /*@C
1544    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1545 
1546    Collective
1547 
1548    Input Parameters:
1549 +  sf - star forest
1550 .  unit - data type
1551 .  leafmtype - memory type of leafdata
1552 .  leafdata - values to reduce
1553 .  rootmtype - memory type of rootdata
1554 -  op - reduction operation
1555 
1556    Output Parameter:
1557 .  rootdata - result of reduction of values from all leaves of each root
1558 
1559    Level: intermediate
1560 
1561 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1562 @*/
1563 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1564 {
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1569   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1570   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1571   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1572   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 /*@C
1577    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1578 
1579    Collective
1580 
1581    Input Parameters:
1582 +  sf - star forest
1583 .  unit - data type
1584 .  leafdata - values to reduce
1585 -  op - reduction operation
1586 
1587    Output Parameter:
1588 .  rootdata - result of reduction of values from all leaves of each root
1589 
1590    Level: intermediate
1591 
1592 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1593 @*/
1594 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1595 {
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1600   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1601   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1602   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 /*@C
1607    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1608 
1609    Collective
1610 
1611    Input Parameters:
1612 +  sf - star forest
1613 .  unit - data type
1614 .  leafdata - leaf values to use in reduction
1615 -  op - operation to use for reduction
1616 
1617    Output Parameters:
1618 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1619 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1620 
1621    Level: advanced
1622 
1623    Note:
1624    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1625    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1626    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1627    integers.
1628 
1629 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1630 @*/
1631 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1632 {
1633   PetscErrorCode ierr;
1634   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1635 
1636   PetscFunctionBegin;
1637   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1638   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1639   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1640   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1641   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1642   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1643   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1644   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1645   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 /*@C
1650    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1651 
1652    Collective
1653 
1654    Input Parameters:
1655 +  sf - star forest
1656 .  unit - data type
1657 .  rootmtype - memory type of rootdata
1658 .  leafmtype - memory type of leafdata
1659 .  leafdata - leaf values to use in reduction
1660 .  leafupdatemtype - memory type of leafupdate
1661 -  op - operation to use for reduction
1662 
1663    Output Parameters:
1664 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1665 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1666 
1667    Level: advanced
1668 
1669    Note: See PetscSFFetchAndOpBegin() for more details.
1670 
1671 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1672 @*/
1673 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1674 {
1675   PetscErrorCode ierr;
1676 
1677   PetscFunctionBegin;
1678   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1679   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1680   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1681   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1682   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1683   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 /*@C
1688    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1689 
1690    Collective
1691 
1692    Input Parameters:
1693 +  sf - star forest
1694 .  unit - data type
1695 .  leafdata - leaf values to use in reduction
1696 -  op - operation to use for reduction
1697 
1698    Output Parameters:
1699 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1700 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1701 
1702    Level: advanced
1703 
1704 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1705 @*/
1706 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1707 {
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1712   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1713   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1714   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 /*@C
1719    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1720 
1721    Collective
1722 
1723    Input Parameter:
1724 .  sf - star forest
1725 
1726    Output Parameter:
1727 .  degree - degree of each root vertex
1728 
1729    Level: advanced
1730 
1731    Notes:
1732    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1733 
1734 .seealso: PetscSFGatherBegin()
1735 @*/
1736 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1737 {
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1742   PetscSFCheckGraphSet(sf,1);
1743   PetscValidPointer(degree,2);
1744   if (!sf->degreeknown) {
1745     PetscInt i, nroots = sf->nroots, maxlocal;
1746     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1747     maxlocal = sf->maxleaf-sf->minleaf+1;
1748     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1749     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1750     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1751     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1752     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1753   }
1754   *degree = NULL;
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 /*@C
1759    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1760 
1761    Collective
1762 
1763    Input Parameter:
1764 .  sf - star forest
1765 
1766    Output Parameter:
1767 .  degree - degree of each root vertex
1768 
1769    Level: developer
1770 
1771    Notes:
1772    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1773 
1774 .seealso:
1775 @*/
1776 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1777 {
1778   PetscErrorCode ierr;
1779 
1780   PetscFunctionBegin;
1781   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1782   PetscSFCheckGraphSet(sf,1);
1783   PetscValidPointer(degree,2);
1784   if (!sf->degreeknown) {
1785     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1786     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1787     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1788     sf->degreeknown = PETSC_TRUE;
1789   }
1790   *degree = sf->degree;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 /*@C
1795    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1796    Each multi-root is assigned index of the corresponding original root.
1797 
1798    Collective
1799 
1800    Input Parameters:
1801 +  sf - star forest
1802 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1803 
1804    Output Parameters:
1805 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1806 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1807 
1808    Level: developer
1809 
1810    Notes:
1811    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1812 
1813 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1814 @*/
1815 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1816 {
1817   PetscSF             msf;
1818   PetscInt            i, j, k, nroots, nmroots;
1819   PetscErrorCode      ierr;
1820 
1821   PetscFunctionBegin;
1822   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1823   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1824   if (nroots) PetscValidIntPointer(degree,2);
1825   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1826   PetscValidPointer(multiRootsOrigNumbering,4);
1827   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1828   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1829   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1830   for (i=0,j=0,k=0; i<nroots; i++) {
1831     if (!degree[i]) continue;
1832     for (j=0; j<degree[i]; j++,k++) {
1833       (*multiRootsOrigNumbering)[k] = i;
1834     }
1835   }
1836   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1837   if (nMultiRoots) *nMultiRoots = nmroots;
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 /*@C
1842    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1843 
1844    Collective
1845 
1846    Input Parameters:
1847 +  sf - star forest
1848 .  unit - data type
1849 -  leafdata - leaf data to gather to roots
1850 
1851    Output Parameter:
1852 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1853 
1854    Level: intermediate
1855 
1856 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1857 @*/
1858 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1859 {
1860   PetscErrorCode ierr;
1861   PetscSF        multi = NULL;
1862 
1863   PetscFunctionBegin;
1864   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1865   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1866   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1867   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 /*@C
1872    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1873 
1874    Collective
1875 
1876    Input Parameters:
1877 +  sf - star forest
1878 .  unit - data type
1879 -  leafdata - leaf data to gather to roots
1880 
1881    Output Parameter:
1882 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1883 
1884    Level: intermediate
1885 
1886 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1887 @*/
1888 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1889 {
1890   PetscErrorCode ierr;
1891   PetscSF        multi = NULL;
1892 
1893   PetscFunctionBegin;
1894   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1895   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1896   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 /*@C
1901    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1902 
1903    Collective
1904 
1905    Input Parameters:
1906 +  sf - star forest
1907 .  unit - data type
1908 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1909 
1910    Output Parameter:
1911 .  leafdata - leaf data to be update with personal data from each respective root
1912 
1913    Level: intermediate
1914 
1915 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1916 @*/
1917 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1918 {
1919   PetscErrorCode ierr;
1920   PetscSF        multi = NULL;
1921 
1922   PetscFunctionBegin;
1923   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1924   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1925   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1926   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 /*@C
1931    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1932 
1933    Collective
1934 
1935    Input Parameters:
1936 +  sf - star forest
1937 .  unit - data type
1938 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1939 
1940    Output Parameter:
1941 .  leafdata - leaf data to be update with personal data from each respective root
1942 
1943    Level: intermediate
1944 
1945 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1946 @*/
1947 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1948 {
1949   PetscErrorCode ierr;
1950   PetscSF        multi = NULL;
1951 
1952   PetscFunctionBegin;
1953   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1954   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1955   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1960 {
1961   PetscInt        i, n, nleaves;
1962   const PetscInt *ilocal = NULL;
1963   PetscHSetI      seen;
1964   PetscErrorCode  ierr;
1965 
1966   PetscFunctionBegin;
1967   if (PetscDefined(USE_DEBUG)) {
1968     ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1969     ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1970     for (i = 0; i < nleaves; i++) {
1971       const PetscInt leaf = ilocal ? ilocal[i] : i;
1972       ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1973     }
1974     ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1975     if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1976     ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1977   }
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 /*@
1982   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1983 
1984   Input Parameters:
1985 + sfA - The first PetscSF
1986 - sfB - The second PetscSF
1987 
1988   Output Parameters:
1989 . sfBA - The composite SF
1990 
1991   Level: developer
1992 
1993   Notes:
1994   Currently, the two SFs must be defined on congruent communicators and they must be true star
1995   forests, i.e. the same leaf is not connected with different roots.
1996 
1997   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1998   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1999   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
2000   Bcast on sfA, then a Bcast on sfB, on connected nodes.
2001 
2002 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
2003 @*/
2004 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2005 {
2006   PetscErrorCode    ierr;
2007   const PetscSFNode *remotePointsA,*remotePointsB;
2008   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
2009   const PetscInt    *localPointsA,*localPointsB;
2010   PetscInt          *localPointsBA;
2011   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
2012   PetscBool         denseB;
2013 
2014   PetscFunctionBegin;
2015   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2016   PetscSFCheckGraphSet(sfA,1);
2017   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2018   PetscSFCheckGraphSet(sfB,2);
2019   PetscCheckSameComm(sfA,1,sfB,2);
2020   PetscValidPointer(sfBA,3);
2021   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2022   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2023 
2024   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
2025   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
2026   if (localPointsA) {
2027     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
2028     for (i=0; i<numRootsB; i++) {
2029       reorderedRemotePointsA[i].rank = -1;
2030       reorderedRemotePointsA[i].index = -1;
2031     }
2032     for (i=0; i<numLeavesA; i++) {
2033       if (localPointsA[i] >= numRootsB) continue;
2034       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2035     }
2036     remotePointsA = reorderedRemotePointsA;
2037   }
2038   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
2039   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
2040   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2041   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2042   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2043 
2044   denseB = (PetscBool)!localPointsB;
2045   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2046     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2047     else numLeavesBA++;
2048   }
2049   if (denseB) {
2050     localPointsBA  = NULL;
2051     remotePointsBA = leafdataB;
2052   } else {
2053     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
2054     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
2055     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2056       const PetscInt l = localPointsB ? localPointsB[i] : i;
2057 
2058       if (leafdataB[l-minleaf].rank == -1) continue;
2059       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2060       localPointsBA[numLeavesBA] = l;
2061       numLeavesBA++;
2062     }
2063     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2064   }
2065   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2066   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2067   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 /*@
2072   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2073 
2074   Input Parameters:
2075 + sfA - The first PetscSF
2076 - sfB - The second PetscSF
2077 
2078   Output Parameters:
2079 . sfBA - The composite SF.
2080 
2081   Level: developer
2082 
2083   Notes:
2084   Currently, the two SFs must be defined on congruent communicators and they must be true star
2085   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2086   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2087 
2088   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2089   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2090   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2091   a Reduce on sfB, on connected roots.
2092 
2093 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2094 @*/
2095 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2096 {
2097   PetscErrorCode    ierr;
2098   const PetscSFNode *remotePointsA,*remotePointsB;
2099   PetscSFNode       *remotePointsBA;
2100   const PetscInt    *localPointsA,*localPointsB;
2101   PetscSFNode       *reorderedRemotePointsA = NULL;
2102   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2103   MPI_Op            op;
2104 #if defined(PETSC_USE_64BIT_INDICES)
2105   PetscBool         iswin;
2106 #endif
2107 
2108   PetscFunctionBegin;
2109   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2110   PetscSFCheckGraphSet(sfA,1);
2111   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2112   PetscSFCheckGraphSet(sfB,2);
2113   PetscCheckSameComm(sfA,1,sfB,2);
2114   PetscValidPointer(sfBA,3);
2115   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2116   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2117 
2118   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
2119   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
2120 
2121   /* TODO: Check roots of sfB have degree of 1 */
2122   /* Once we implement it, we can replace the MPI_MAXLOC
2123      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2124      We use MPI_MAXLOC only to have a deterministic output from this routine if
2125      the root condition is not meet.
2126    */
2127   op = MPI_MAXLOC;
2128 #if defined(PETSC_USE_64BIT_INDICES)
2129   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2130   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2131   if (iswin) op = MPI_REPLACE;
2132 #endif
2133 
2134   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2135   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2136   for (i=0; i<maxleaf - minleaf + 1; i++) {
2137     reorderedRemotePointsA[i].rank = -1;
2138     reorderedRemotePointsA[i].index = -1;
2139   }
2140   if (localPointsA) {
2141     for (i=0; i<numLeavesA; i++) {
2142       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2143       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2144     }
2145   } else {
2146     for (i=0; i<numLeavesA; i++) {
2147       if (i > maxleaf || i < minleaf) continue;
2148       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2149     }
2150   }
2151 
2152   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2153   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2154   for (i=0; i<numRootsB; i++) {
2155     remotePointsBA[i].rank = -1;
2156     remotePointsBA[i].index = -1;
2157   }
2158 
2159   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2160   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2161   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2162   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2163     if (remotePointsBA[i].rank == -1) continue;
2164     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2165     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2166     localPointsBA[numLeavesBA] = i;
2167     numLeavesBA++;
2168   }
2169   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2170   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2171   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /*
2176   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2177 
2178   Input Parameters:
2179 . sf - The global PetscSF
2180 
2181   Output Parameters:
2182 . out - The local PetscSF
2183  */
2184 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2185 {
2186   MPI_Comm           comm;
2187   PetscMPIInt        myrank;
2188   const PetscInt     *ilocal;
2189   const PetscSFNode  *iremote;
2190   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2191   PetscSFNode        *liremote;
2192   PetscSF            lsf;
2193   PetscErrorCode     ierr;
2194 
2195   PetscFunctionBegin;
2196   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2197   if (sf->ops->CreateLocalSF) {
2198     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2199   } else {
2200     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2201     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2202     ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr);
2203 
2204     /* Find out local edges and build a local SF */
2205     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2206     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2207     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2208     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2209 
2210     for (i=j=0; i<nleaves; i++) {
2211       if (iremote[i].rank == (PetscInt)myrank) {
2212         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2213         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2214         liremote[j].index = iremote[i].index;
2215         j++;
2216       }
2217     }
2218     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2219     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2220     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2221     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2222     *out = lsf;
2223   }
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2228 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2229 {
2230   PetscErrorCode     ierr;
2231   PetscMemType       rootmtype,leafmtype;
2232 
2233   PetscFunctionBegin;
2234   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2235   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2236   ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2237   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2238   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2239   if (sf->ops->BcastToZero) {
2240     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2241   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2242   ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2243   PetscFunctionReturn(0);
2244 }
2245 
2246