xref: /petsc/src/vec/is/sf/interface/sf.c (revision 6913077d92d45dc85e2cd558d8cf01c8c45125fc)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscErrorCode ierr;
57   PetscSF        b;
58 
59   PetscFunctionBegin;
60   PetscValidPointer(sf,2);
61   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
62 
63   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
64 
65   b->nroots    = -1;
66   b->nleaves   = -1;
67   b->minleaf   = PETSC_MAX_INT;
68   b->maxleaf   = PETSC_MIN_INT;
69   b->nranks    = -1;
70   b->rankorder = PETSC_TRUE;
71   b->ingroup   = MPI_GROUP_NULL;
72   b->outgroup  = MPI_GROUP_NULL;
73   b->graphset  = PETSC_FALSE;
74 #if defined(PETSC_HAVE_DEVICE)
75   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
76   b->use_stream_aware_mpi = PETSC_FALSE;
77   b->unknown_input_stream= PETSC_FALSE;
78   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79     b->backend = PETSCSF_BACKEND_KOKKOS;
80   #elif defined(PETSC_HAVE_CUDA)
81     b->backend = PETSCSF_BACKEND_CUDA;
82   #elif defined(PETSC_HAVE_HIP)
83     b->backend = PETSCSF_BACKEND_HIP;
84   #endif
85 
86   #if defined(PETSC_HAVE_NVSHMEM)
87     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
88     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);CHKERRQ(ierr);
90     ierr = PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);CHKERRQ(ierr);
91   #endif
92 #endif
93   b->vscat.from_n = -1;
94   b->vscat.to_n   = -1;
95   b->vscat.unit   = MPIU_SCALAR;
96  *sf = b;
97   PetscFunctionReturn(0);
98 }
99 
100 /*@
101    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
102 
103    Collective
104 
105    Input Parameter:
106 .  sf - star forest
107 
108    Level: advanced
109 
110 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
111 @*/
112 PetscErrorCode PetscSFReset(PetscSF sf)
113 {
114   PetscErrorCode ierr;
115 
116   PetscFunctionBegin;
117   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
118   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
119   sf->nroots   = -1;
120   sf->nleaves  = -1;
121   sf->minleaf  = PETSC_MAX_INT;
122   sf->maxleaf  = PETSC_MIN_INT;
123   sf->mine     = NULL;
124   sf->remote   = NULL;
125   sf->graphset = PETSC_FALSE;
126   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
127   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
128   sf->nranks = -1;
129   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
130   sf->degreeknown = PETSC_FALSE;
131   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
132   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRMPI(ierr);}
133   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRMPI(ierr);}
134   if (sf->multi) sf->multi->multi = NULL;
135   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
136   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
137 
138  #if defined(PETSC_HAVE_DEVICE)
139   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
140  #endif
141 
142   sf->setupcalled = PETSC_FALSE;
143   PetscFunctionReturn(0);
144 }
145 
146 /*@C
147    PetscSFSetType - Set the PetscSF communication implementation
148 
149    Collective on PetscSF
150 
151    Input Parameters:
152 +  sf - the PetscSF context
153 -  type - a known method
154 
155    Options Database Key:
156 .  -sf_type <type> - Sets the method; use -help for a list
157    of available methods (for instance, window, basic, neighbor)
158 
159    Notes:
160    See "include/petscsf.h" for available methods (for instance)
161 +    PETSCSFWINDOW - MPI-2/3 one-sided
162 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
163 
164   Level: intermediate
165 
166 .seealso: PetscSFType, PetscSFCreate()
167 @*/
168 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
169 {
170   PetscErrorCode ierr,(*r)(PetscSF);
171   PetscBool      match;
172 
173   PetscFunctionBegin;
174   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
175   PetscValidCharPointer(type,2);
176 
177   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
178   if (match) PetscFunctionReturn(0);
179 
180   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
181   PetscCheckFalse(!r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
182   /* Destroy the previous PetscSF implementation context */
183   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
184   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
185   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
186   ierr = (*r)(sf);CHKERRQ(ierr);
187   PetscFunctionReturn(0);
188 }
189 
190 /*@C
191   PetscSFGetType - Get the PetscSF communication implementation
192 
193   Not Collective
194 
195   Input Parameter:
196 . sf  - the PetscSF context
197 
198   Output Parameter:
199 . type - the PetscSF type name
200 
201   Level: intermediate
202 
203 .seealso: PetscSFSetType(), PetscSFCreate()
204 @*/
205 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
206 {
207   PetscFunctionBegin;
208   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
209   PetscValidPointer(type,2);
210   *type = ((PetscObject)sf)->type_name;
211   PetscFunctionReturn(0);
212 }
213 
214 /*@C
215    PetscSFDestroy - destroy star forest
216 
217    Collective
218 
219    Input Parameter:
220 .  sf - address of star forest
221 
222    Level: intermediate
223 
224 .seealso: PetscSFCreate(), PetscSFReset()
225 @*/
226 PetscErrorCode PetscSFDestroy(PetscSF *sf)
227 {
228   PetscErrorCode ierr;
229 
230   PetscFunctionBegin;
231   if (!*sf) PetscFunctionReturn(0);
232   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
233   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
234   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
235   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
236   ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr);
237   if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRMPI(ierr);}
238   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
243 {
244   PetscInt           i, nleaves;
245   PetscMPIInt        size;
246   const PetscInt    *ilocal;
247   const PetscSFNode *iremote;
248   PetscErrorCode     ierr;
249 
250   PetscFunctionBegin;
251   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
252   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
253   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
254   for (i = 0; i < nleaves; i++) {
255     const PetscInt rank = iremote[i].rank;
256     const PetscInt remote = iremote[i].index;
257     const PetscInt leaf = ilocal ? ilocal[i] : i;
258     PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
259     PetscCheckFalse(remote < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
260     PetscCheckFalse(leaf < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
261   }
262   PetscFunctionReturn(0);
263 }
264 
265 /*@
266    PetscSFSetUp - set up communication structures
267 
268    Collective
269 
270    Input Parameter:
271 .  sf - star forest communication object
272 
273    Level: beginner
274 
275 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
276 @*/
277 PetscErrorCode PetscSFSetUp(PetscSF sf)
278 {
279   PetscErrorCode ierr;
280 
281   PetscFunctionBegin;
282   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
283   PetscSFCheckGraphSet(sf,1);
284   if (sf->setupcalled) PetscFunctionReturn(0);
285   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
286   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
287   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
288   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
289 #if defined(PETSC_HAVE_CUDA)
290   if (sf->backend == PETSCSF_BACKEND_CUDA) {
291     sf->ops->Malloc = PetscSFMalloc_CUDA;
292     sf->ops->Free   = PetscSFFree_CUDA;
293   }
294 #endif
295 #if defined(PETSC_HAVE_HIP)
296   if (sf->backend == PETSCSF_BACKEND_HIP) {
297     sf->ops->Malloc = PetscSFMalloc_HIP;
298     sf->ops->Free   = PetscSFFree_HIP;
299   }
300 #endif
301 
302 #
303 #if defined(PETSC_HAVE_KOKKOS)
304   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
305     sf->ops->Malloc = PetscSFMalloc_Kokkos;
306     sf->ops->Free   = PetscSFFree_Kokkos;
307   }
308 #endif
309   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
310   sf->setupcalled = PETSC_TRUE;
311   PetscFunctionReturn(0);
312 }
313 
314 /*@
315    PetscSFSetFromOptions - set PetscSF options using the options database
316 
317    Logically Collective
318 
319    Input Parameter:
320 .  sf - star forest
321 
322    Options Database Keys:
323 +  -sf_type               - implementation type, see PetscSFSetType()
324 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
325 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
326                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
327                             If true, this option only works with -use_gpu_aware_mpi 1.
328 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
329                                If true, this option only works with -use_gpu_aware_mpi 1.
330 
331 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
332                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
333                               the only available is kokkos.
334 
335    Level: intermediate
336 @*/
337 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
338 {
339   PetscSFType    deft;
340   char           type[256];
341   PetscErrorCode ierr;
342   PetscBool      flg;
343 
344   PetscFunctionBegin;
345   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
346   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
347   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
348   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
349   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
350   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
351  #if defined(PETSC_HAVE_DEVICE)
352   {
353     char        backendstr[32] = {0};
354     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
355     /* Change the defaults set in PetscSFCreate() with command line options */
356     ierr = PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);CHKERRQ(ierr);
357     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
358     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
359     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
360     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
361     ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr);
362   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
363     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
364     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
365     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
366     else PetscCheckFalse(set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
367   #elif defined(PETSC_HAVE_KOKKOS)
368     PetscCheckFalse(set && !isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
369    #endif
370   }
371  #endif
372   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
373   ierr = PetscOptionsEnd();CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 /*@
378    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
379 
380    Logically Collective
381 
382    Input Parameters:
383 +  sf - star forest
384 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
385 
386    Level: advanced
387 
388 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
389 @*/
390 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
391 {
392   PetscFunctionBegin;
393   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
394   PetscValidLogicalCollectiveBool(sf,flg,2);
395   PetscCheckFalse(sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
396   sf->rankorder = flg;
397   PetscFunctionReturn(0);
398 }
399 
400 /*@C
401    PetscSFSetGraph - Set a parallel star forest
402 
403    Collective
404 
405    Input Parameters:
406 +  sf - star forest
407 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
408 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
409 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
410 during setup in debug mode)
411 .  localmode - copy mode for ilocal
412 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
413 during setup in debug mode)
414 -  remotemode - copy mode for iremote
415 
416    Level: intermediate
417 
418    Notes:
419    Leaf indices in ilocal must be unique, otherwise an error occurs.
420 
421    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
422    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
423    PETSc might modify the respective array;
424    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
425    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
426 
427    Fortran Notes:
428    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
429 
430    Developer Notes:
431    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
432    This also allows to compare leaf sets of two SFs easily.
433 
434 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
435 @*/
436 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
437 {
438   PetscBool       unique, contiguous;
439   PetscErrorCode  ierr;
440 
441   PetscFunctionBegin;
442   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
443   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
444   if (nleaves > 0) PetscValidPointer(iremote,6);
445   PetscCheckFalse(nroots  < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
446   PetscCheckFalse(nleaves < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
447   PetscCheck(localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
448   PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
449 
450   if (sf->nroots >= 0) { /* Reset only if graph already set */
451     ierr = PetscSFReset(sf);CHKERRQ(ierr);
452   }
453 
454   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
455 
456   sf->nroots  = nroots;
457   sf->nleaves = nleaves;
458 
459   if (localmode == PETSC_COPY_VALUES && ilocal) {
460     PetscInt *tlocal = NULL;
461 
462     ierr = PetscMalloc1(nleaves,&tlocal);CHKERRQ(ierr);
463     ierr = PetscArraycpy(tlocal,ilocal,nleaves);CHKERRQ(ierr);
464     ilocal = tlocal;
465   }
466   if (remotemode == PETSC_COPY_VALUES) {
467     PetscSFNode *tremote = NULL;
468 
469     ierr = PetscMalloc1(nleaves,&tremote);CHKERRQ(ierr);
470     ierr = PetscArraycpy(tremote,iremote,nleaves);CHKERRQ(ierr);
471     iremote = tremote;
472   }
473 
474   if (nleaves && ilocal) {
475     PetscSFNode   work;
476 
477     ierr = PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);CHKERRQ(ierr);
478     ierr = PetscSortedCheckDupsInt(nleaves, ilocal, &unique);CHKERRQ(ierr);
479     unique = PetscNot(unique);
480     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
481     sf->minleaf = ilocal[0];
482     sf->maxleaf = ilocal[nleaves-1];
483     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
484   } else {
485     sf->minleaf = 0;
486     sf->maxleaf = nleaves - 1;
487     unique      = PETSC_TRUE;
488     contiguous  = PETSC_TRUE;
489   }
490 
491   if (contiguous) {
492     if (localmode == PETSC_USE_POINTER) {
493       ilocal = NULL;
494     } else {
495       ierr = PetscFree(ilocal);CHKERRQ(ierr);
496     }
497   }
498   sf->mine            = ilocal;
499   if (localmode == PETSC_USE_POINTER) {
500     sf->mine_alloc    = NULL;
501   } else {
502     sf->mine_alloc    = ilocal;
503   }
504   sf->remote          = iremote;
505   if (remotemode == PETSC_USE_POINTER) {
506     sf->remote_alloc  = NULL;
507   } else {
508     sf->remote_alloc  = iremote;
509   }
510   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
511   sf->graphset = PETSC_TRUE;
512   PetscFunctionReturn(0);
513 }
514 
515 /*@
516   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
517 
518   Collective
519 
520   Input Parameters:
521 + sf      - The PetscSF
522 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
523 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
524 
525   Notes:
526   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
527   n and N are local and global sizes of x respectively.
528 
529   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
530   sequential vectors y on all ranks.
531 
532   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
533   sequential vector y on rank 0.
534 
535   In above cases, entries of x are roots and entries of y are leaves.
536 
537   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
538   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
539   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
540   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
541   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
542 
543   In this case, roots and leaves are symmetric.
544 
545   Level: intermediate
546  @*/
547 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
548 {
549   MPI_Comm       comm;
550   PetscInt       n,N,res[2];
551   PetscMPIInt    rank,size;
552   PetscSFType    type;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
557   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
558   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
559   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
560   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
561   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
562 
563   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
564     type = PETSCSFALLTOALL;
565     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
566     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
567     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
568     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
569   } else {
570     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
571     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
572     res[0] = n;
573     res[1] = -n;
574     /* Check if n are same over all ranks so that we can optimize it */
575     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRMPI(ierr);
576     if (res[0] == -res[1]) { /* same n */
577       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
578     } else {
579       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
580     }
581     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
582   }
583   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
584 
585   sf->pattern = pattern;
586   sf->mine    = NULL; /* Contiguous */
587 
588   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
589      Also set other easy stuff.
590    */
591   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
592     sf->nleaves      = N;
593     sf->nroots       = n;
594     sf->nranks       = size;
595     sf->minleaf      = 0;
596     sf->maxleaf      = N - 1;
597   } else if (pattern == PETSCSF_PATTERN_GATHER) {
598     sf->nleaves      = rank ? 0 : N;
599     sf->nroots       = n;
600     sf->nranks       = rank ? 0 : size;
601     sf->minleaf      = 0;
602     sf->maxleaf      = rank ? -1 : N - 1;
603   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
604     sf->nleaves      = size;
605     sf->nroots       = size;
606     sf->nranks       = size;
607     sf->minleaf      = 0;
608     sf->maxleaf      = size - 1;
609   }
610   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
611   sf->graphset = PETSC_TRUE;
612   PetscFunctionReturn(0);
613 }
614 
615 /*@
616    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
617 
618    Collective
619 
620    Input Parameter:
621 .  sf - star forest to invert
622 
623    Output Parameter:
624 .  isf - inverse of sf
625 
626    Level: advanced
627 
628    Notes:
629    All roots must have degree 1.
630 
631    The local space may be a permutation, but cannot be sparse.
632 
633 .seealso: PetscSFSetGraph()
634 @*/
635 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
636 {
637   PetscErrorCode ierr;
638   PetscMPIInt    rank;
639   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
640   const PetscInt *ilocal;
641   PetscSFNode    *roots,*leaves;
642 
643   PetscFunctionBegin;
644   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
645   PetscSFCheckGraphSet(sf,1);
646   PetscValidPointer(isf,2);
647 
648   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
649   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
650 
651   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
652   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
653   for (i=0; i<maxlocal; i++) {
654     leaves[i].rank  = rank;
655     leaves[i].index = i;
656   }
657   for (i=0; i <nroots; i++) {
658     roots[i].rank  = -1;
659     roots[i].index = -1;
660   }
661   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
662   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);CHKERRQ(ierr);
663 
664   /* Check whether our leaves are sparse */
665   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
666   if (count == nroots) newilocal = NULL;
667   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
668     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
669     for (i=0,count=0; i<nroots; i++) {
670       if (roots[i].rank >= 0) {
671         newilocal[count]   = i;
672         roots[count].rank  = roots[i].rank;
673         roots[count].index = roots[i].index;
674         count++;
675       }
676     }
677   }
678 
679   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
680   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
681   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 /*@
686    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
687 
688    Collective
689 
690    Input Parameters:
691 +  sf - communication object to duplicate
692 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
693 
694    Output Parameter:
695 .  newsf - new communication object
696 
697    Level: beginner
698 
699 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
700 @*/
701 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
702 {
703   PetscSFType    type;
704   PetscErrorCode ierr;
705   MPI_Datatype   dtype=MPIU_SCALAR;
706 
707   PetscFunctionBegin;
708   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
709   PetscValidLogicalCollectiveEnum(sf,opt,2);
710   PetscValidPointer(newsf,3);
711   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
712   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
713   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
714   if (opt == PETSCSF_DUPLICATE_GRAPH) {
715     PetscSFCheckGraphSet(sf,1);
716     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
717       PetscInt          nroots,nleaves;
718       const PetscInt    *ilocal;
719       const PetscSFNode *iremote;
720       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
721       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
722     } else {
723       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
724     }
725   }
726   /* Since oldtype is committed, so is newtype, according to MPI */
727   if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRMPI(ierr);}
728   (*newsf)->vscat.bs     = sf->vscat.bs;
729   (*newsf)->vscat.unit   = dtype;
730   (*newsf)->vscat.to_n   = sf->vscat.to_n;
731   (*newsf)->vscat.from_n = sf->vscat.from_n;
732   /* Do not copy lsf. Build it on demand since it is rarely used */
733 
734 #if defined(PETSC_HAVE_DEVICE)
735   (*newsf)->backend              = sf->backend;
736   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
737   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
738   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
739 #endif
740   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
741   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
742   PetscFunctionReturn(0);
743 }
744 
745 /*@C
746    PetscSFGetGraph - Get the graph specifying a parallel star forest
747 
748    Not Collective
749 
750    Input Parameter:
751 .  sf - star forest
752 
753    Output Parameters:
754 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
755 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
756 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
757 -  iremote - remote locations of root vertices for each leaf on the current process
758 
759    Notes:
760      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
761 
762      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
763      see its manpage for details.
764 
765    Fortran Notes:
766      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
767      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
768 
769      To check for a NULL ilocal use
770 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
771 
772    Level: intermediate
773 
774 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
775 @*/
776 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
777 {
778   PetscErrorCode ierr;
779 
780   PetscFunctionBegin;
781   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
782   if (sf->ops->GetGraph) {
783     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
784   } else {
785     if (nroots) *nroots = sf->nroots;
786     if (nleaves) *nleaves = sf->nleaves;
787     if (ilocal) *ilocal = sf->mine;
788     if (iremote) *iremote = sf->remote;
789   }
790   PetscFunctionReturn(0);
791 }
792 
793 /*@
794    PetscSFGetLeafRange - Get the active leaf ranges
795 
796    Not Collective
797 
798    Input Parameter:
799 .  sf - star forest
800 
801    Output Parameters:
802 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
803 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
804 
805    Level: developer
806 
807 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
808 @*/
809 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
810 {
811   PetscFunctionBegin;
812   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
813   PetscSFCheckGraphSet(sf,1);
814   if (minleaf) *minleaf = sf->minleaf;
815   if (maxleaf) *maxleaf = sf->maxleaf;
816   PetscFunctionReturn(0);
817 }
818 
819 /*@C
820    PetscSFViewFromOptions - View from Options
821 
822    Collective on PetscSF
823 
824    Input Parameters:
825 +  A - the star forest
826 .  obj - Optional object
827 -  name - command line option
828 
829    Level: intermediate
830 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
831 @*/
832 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
833 {
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
838   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 /*@C
843    PetscSFView - view a star forest
844 
845    Collective
846 
847    Input Parameters:
848 +  sf - star forest
849 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
850 
851    Level: beginner
852 
853 .seealso: PetscSFCreate(), PetscSFSetGraph()
854 @*/
855 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
856 {
857   PetscErrorCode    ierr;
858   PetscBool         iascii;
859   PetscViewerFormat format;
860 
861   PetscFunctionBegin;
862   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
863   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
864   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
865   PetscCheckSameComm(sf,1,viewer,2);
866   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
867   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
868   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
869     PetscMPIInt rank;
870     PetscInt    ii,i,j;
871 
872     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
873     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
874     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
875       if (!sf->graphset) {
876         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
877         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
878         PetscFunctionReturn(0);
879       }
880       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
881       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
882       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
883       for (i=0; i<sf->nleaves; i++) {
884         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
885       }
886       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
887       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
888       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
889         PetscMPIInt *tmpranks,*perm;
890         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
891         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
892         for (i=0; i<sf->nranks; i++) perm[i] = i;
893         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
895         for (ii=0; ii<sf->nranks; ii++) {
896           i = perm[ii];
897           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
898           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
899             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
900           }
901         }
902         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
903       }
904       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
905       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
906     }
907     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
908   }
909   if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
910   PetscFunctionReturn(0);
911 }
912 
913 /*@C
914    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
915 
916    Not Collective
917 
918    Input Parameter:
919 .  sf - star forest
920 
921    Output Parameters:
922 +  nranks - number of ranks referenced by local part
923 .  ranks - array of ranks
924 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
925 .  rmine - concatenated array holding local indices referencing each remote rank
926 -  rremote - concatenated array holding remote indices referenced for each remote rank
927 
928    Level: developer
929 
930 .seealso: PetscSFGetLeafRanks()
931 @*/
932 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
933 {
934   PetscErrorCode ierr;
935 
936   PetscFunctionBegin;
937   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
938   PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
939   if (sf->ops->GetRootRanks) {
940     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
941   } else {
942     /* The generic implementation */
943     if (nranks)  *nranks  = sf->nranks;
944     if (ranks)   *ranks   = sf->ranks;
945     if (roffset) *roffset = sf->roffset;
946     if (rmine)   *rmine   = sf->rmine;
947     if (rremote) *rremote = sf->rremote;
948   }
949   PetscFunctionReturn(0);
950 }
951 
952 /*@C
953    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
954 
955    Not Collective
956 
957    Input Parameter:
958 .  sf - star forest
959 
960    Output Parameters:
961 +  niranks - number of leaf ranks referencing roots on this process
962 .  iranks - array of ranks
963 .  ioffset - offset in irootloc for each rank (length niranks+1)
964 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
965 
966    Level: developer
967 
968 .seealso: PetscSFGetRootRanks()
969 @*/
970 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
971 {
972   PetscErrorCode ierr;
973 
974   PetscFunctionBegin;
975   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
976   PetscCheckFalse(!sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
977   if (sf->ops->GetLeafRanks) {
978     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
979   } else {
980     PetscSFType type;
981     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
982     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
983   }
984   PetscFunctionReturn(0);
985 }
986 
987 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
988   PetscInt i;
989   for (i=0; i<n; i++) {
990     if (needle == list[i]) return PETSC_TRUE;
991   }
992   return PETSC_FALSE;
993 }
994 
995 /*@C
996    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
997 
998    Collective
999 
1000    Input Parameters:
1001 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
1002 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
1003 
1004    Level: developer
1005 
1006 .seealso: PetscSFGetRootRanks()
1007 @*/
1008 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1009 {
1010   PetscErrorCode     ierr;
1011   PetscTable         table;
1012   PetscTablePosition pos;
1013   PetscMPIInt        size,groupsize,*groupranks;
1014   PetscInt           *rcount,*ranks;
1015   PetscInt           i, irank = -1,orank = -1;
1016 
1017   PetscFunctionBegin;
1018   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1019   PetscSFCheckGraphSet(sf,1);
1020   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRMPI(ierr);
1021   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1022   for (i=0; i<sf->nleaves; i++) {
1023     /* Log 1-based rank */
1024     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1025   }
1026   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1027   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1028   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1029   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1030   for (i=0; i<sf->nranks; i++) {
1031     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1032     ranks[i]--;             /* Convert back to 0-based */
1033   }
1034   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1035 
1036   /* We expect that dgroup is reliably "small" while nranks could be large */
1037   {
1038     MPI_Group group = MPI_GROUP_NULL;
1039     PetscMPIInt *dgroupranks;
1040     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1041     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRMPI(ierr);
1042     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1043     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1044     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1045     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRMPI(ierr);}
1046     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1047     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1048   }
1049 
1050   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1051   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1052     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1053       if (InList(ranks[i],groupsize,groupranks)) break;
1054     }
1055     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1056       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1057     }
1058     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1059       PetscInt tmprank,tmpcount;
1060 
1061       tmprank             = ranks[i];
1062       tmpcount            = rcount[i];
1063       ranks[i]            = ranks[sf->ndranks];
1064       rcount[i]           = rcount[sf->ndranks];
1065       ranks[sf->ndranks]  = tmprank;
1066       rcount[sf->ndranks] = tmpcount;
1067       sf->ndranks++;
1068     }
1069   }
1070   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1071   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1072   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1073   sf->roffset[0] = 0;
1074   for (i=0; i<sf->nranks; i++) {
1075     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1076     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1077     rcount[i]        = 0;
1078   }
1079   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1080     /* short circuit */
1081     if (orank != sf->remote[i].rank) {
1082       /* Search for index of iremote[i].rank in sf->ranks */
1083       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1084       if (irank < 0) {
1085         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1086         if (irank >= 0) irank += sf->ndranks;
1087       }
1088       orank = sf->remote[i].rank;
1089     }
1090     PetscCheckFalse(irank < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1091     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1092     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1093     rcount[irank]++;
1094   }
1095   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /*@C
1100    PetscSFGetGroups - gets incoming and outgoing process groups
1101 
1102    Collective
1103 
1104    Input Parameter:
1105 .  sf - star forest
1106 
1107    Output Parameters:
1108 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1109 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1110 
1111    Level: developer
1112 
1113 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1114 @*/
1115 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1116 {
1117   PetscErrorCode ierr;
1118   MPI_Group      group = MPI_GROUP_NULL;
1119 
1120   PetscFunctionBegin;
1121   PetscCheckFalse(sf->nranks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1122   if (sf->ingroup == MPI_GROUP_NULL) {
1123     PetscInt       i;
1124     const PetscInt *indegree;
1125     PetscMPIInt    rank,*outranks,*inranks;
1126     PetscSFNode    *remote;
1127     PetscSF        bgcount;
1128 
1129     /* Compute the number of incoming ranks */
1130     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1131     for (i=0; i<sf->nranks; i++) {
1132       remote[i].rank  = sf->ranks[i];
1133       remote[i].index = 0;
1134     }
1135     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1136     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1137     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1138     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1139     /* Enumerate the incoming ranks */
1140     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1141     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1142     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1143     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1144     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1145     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1146     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRMPI(ierr);
1147     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1148     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1149     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1150   }
1151   *incoming = sf->ingroup;
1152 
1153   if (sf->outgroup == MPI_GROUP_NULL) {
1154     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRMPI(ierr);
1155     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRMPI(ierr);
1156     ierr = MPI_Group_free(&group);CHKERRMPI(ierr);
1157   }
1158   *outgoing = sf->outgroup;
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 /*@
1163    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1164 
1165    Collective
1166 
1167    Input Parameter:
1168 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1169 
1170    Output Parameter:
1171 .  multi - star forest with split roots, such that each root has degree exactly 1
1172 
1173    Level: developer
1174 
1175    Notes:
1176 
1177    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1178    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1179    edge, it is a candidate for future optimization that might involve its removal.
1180 
1181 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1182 @*/
1183 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1184 {
1185   PetscErrorCode ierr;
1186 
1187   PetscFunctionBegin;
1188   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1189   PetscValidPointer(multi,2);
1190   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1191     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1192     *multi = sf->multi;
1193     sf->multi->multi = sf->multi;
1194     PetscFunctionReturn(0);
1195   }
1196   if (!sf->multi) {
1197     const PetscInt *indegree;
1198     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1199     PetscSFNode    *remote;
1200     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1201     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1202     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1203     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1204     inoffset[0] = 0;
1205     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1206     for (i=0; i<maxlocal; i++) outones[i] = 1;
1207     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1208     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1209     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1210     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1211       for (i=0; i<sf->nroots; i++) {
1212         PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1213       }
1214     }
1215     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1216     for (i=0; i<sf->nleaves; i++) {
1217       remote[i].rank  = sf->remote[i].rank;
1218       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1219     }
1220     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1221     sf->multi->multi = sf->multi;
1222     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1223     if (sf->rankorder) {        /* Sort the ranks */
1224       PetscMPIInt rank;
1225       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1226       PetscSFNode *newremote;
1227       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
1228       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1229       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1230       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1231       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1232       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);CHKERRQ(ierr);
1233       /* Sort the incoming ranks at each vertex, build the inverse map */
1234       for (i=0; i<sf->nroots; i++) {
1235         PetscInt j;
1236         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1237         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1238         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1239       }
1240       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1241       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);CHKERRQ(ierr);
1242       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1243       for (i=0; i<sf->nleaves; i++) {
1244         newremote[i].rank  = sf->remote[i].rank;
1245         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1246       }
1247       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1248       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1249     }
1250     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1251   }
1252   *multi = sf->multi;
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 /*@C
1257    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1258 
1259    Collective
1260 
1261    Input Parameters:
1262 +  sf - original star forest
1263 .  nselected  - number of selected roots on this process
1264 -  selected   - indices of the selected roots on this process
1265 
1266    Output Parameter:
1267 .  esf - new star forest
1268 
1269    Level: advanced
1270 
1271    Note:
1272    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1273    be done by calling PetscSFGetGraph().
1274 
1275 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1276 @*/
1277 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1278 {
1279   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1280   const PetscInt    *ilocal;
1281   signed char       *rootdata,*leafdata,*leafmem;
1282   const PetscSFNode *iremote;
1283   PetscSFNode       *new_iremote;
1284   MPI_Comm          comm;
1285   PetscErrorCode    ierr;
1286 
1287   PetscFunctionBegin;
1288   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1289   PetscSFCheckGraphSet(sf,1);
1290   if (nselected) PetscValidPointer(selected,3);
1291   PetscValidPointer(esf,4);
1292 
1293   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1294   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1295   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1296   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1297 
1298   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1299     PetscBool dups;
1300     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1301     PetscCheckFalse(dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1302     for (i=0; i<nselected; i++)
1303       PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1304   }
1305 
1306   if (sf->ops->CreateEmbeddedRootSF) {
1307     ierr = (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1308   } else {
1309     /* A generic version of creating embedded sf */
1310     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1311     maxlocal = maxleaf - minleaf + 1;
1312     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1313     leafdata = leafmem - minleaf;
1314     /* Tag selected roots and bcast to leaves */
1315     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1316     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1317     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1318 
1319     /* Build esf with leaves that are still connected */
1320     esf_nleaves = 0;
1321     for (i=0; i<nleaves; i++) {
1322       j = ilocal ? ilocal[i] : i;
1323       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1324          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1325       */
1326       esf_nleaves += (leafdata[j] ? 1 : 0);
1327     }
1328     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1329     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1330     for (i=n=0; i<nleaves; i++) {
1331       j = ilocal ? ilocal[i] : i;
1332       if (leafdata[j]) {
1333         new_ilocal[n]        = j;
1334         new_iremote[n].rank  = iremote[i].rank;
1335         new_iremote[n].index = iremote[i].index;
1336         ++n;
1337       }
1338     }
1339     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1340     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1341     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1342     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1343   }
1344   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 /*@C
1349   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1350 
1351   Collective
1352 
1353   Input Parameters:
1354 + sf - original star forest
1355 . nselected  - number of selected leaves on this process
1356 - selected   - indices of the selected leaves on this process
1357 
1358   Output Parameter:
1359 .  newsf - new star forest
1360 
1361   Level: advanced
1362 
1363 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1364 @*/
1365 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1366 {
1367   const PetscSFNode *iremote;
1368   PetscSFNode       *new_iremote;
1369   const PetscInt    *ilocal;
1370   PetscInt          i,nroots,*leaves,*new_ilocal;
1371   MPI_Comm          comm;
1372   PetscErrorCode    ierr;
1373 
1374   PetscFunctionBegin;
1375   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1376   PetscSFCheckGraphSet(sf,1);
1377   if (nselected) PetscValidPointer(selected,3);
1378   PetscValidPointer(newsf,4);
1379 
1380   /* Uniq selected[] and put results in leaves[] */
1381   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1382   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1383   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1384   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1385   PetscCheckFalse(nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1386 
1387   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1388   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1389     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1390   } else {
1391     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1392     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1393     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1394     for (i=0; i<nselected; ++i) {
1395       const PetscInt l     = leaves[i];
1396       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1397       new_iremote[i].rank  = iremote[l].rank;
1398       new_iremote[i].index = iremote[l].index;
1399     }
1400     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1401     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1402   }
1403   ierr = PetscFree(leaves);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 /*@C
1408    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1409 
1410    Collective on PetscSF
1411 
1412    Input Parameters:
1413 +  sf - star forest on which to communicate
1414 .  unit - data type associated with each node
1415 .  rootdata - buffer to broadcast
1416 -  op - operation to use for reduction
1417 
1418    Output Parameter:
1419 .  leafdata - buffer to be reduced with values from each leaf's respective root
1420 
1421    Level: intermediate
1422 
1423    Notes:
1424     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1425     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1426     use PetscSFBcastWithMemTypeBegin() instead.
1427 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1428 @*/
1429 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1430 {
1431   PetscErrorCode ierr;
1432   PetscMemType   rootmtype,leafmtype;
1433 
1434   PetscFunctionBegin;
1435   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1436   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1437   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1438   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1439   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1440   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1441   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 /*@C
1446    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1447 
1448    Collective on PetscSF
1449 
1450    Input Parameters:
1451 +  sf - star forest on which to communicate
1452 .  unit - data type associated with each node
1453 .  rootmtype - memory type of rootdata
1454 .  rootdata - buffer to broadcast
1455 .  leafmtype - memory type of leafdata
1456 -  op - operation to use for reduction
1457 
1458    Output Parameter:
1459 .  leafdata - buffer to be reduced with values from each leaf's respective root
1460 
1461    Level: intermediate
1462 
1463 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1464 @*/
1465 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1466 {
1467   PetscErrorCode ierr;
1468 
1469   PetscFunctionBegin;
1470   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1471   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1472   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1473   ierr = (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1474   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);}
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 /*@C
1479    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1480 
1481    Collective
1482 
1483    Input Parameters:
1484 +  sf - star forest
1485 .  unit - data type
1486 .  rootdata - buffer to broadcast
1487 -  op - operation to use for reduction
1488 
1489    Output Parameter:
1490 .  leafdata - buffer to be reduced with values from each leaf's respective root
1491 
1492    Level: intermediate
1493 
1494 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1495 @*/
1496 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1497 {
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1502   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1503   ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1504   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);}
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 /*@C
1509    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1510 
1511    Collective
1512 
1513    Input Parameters:
1514 +  sf - star forest
1515 .  unit - data type
1516 .  leafdata - values to reduce
1517 -  op - reduction operation
1518 
1519    Output Parameter:
1520 .  rootdata - result of reduction of values from all leaves of each root
1521 
1522    Level: intermediate
1523 
1524    Notes:
1525     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1526     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1527     use PetscSFReduceWithMemTypeBegin() instead.
1528 
1529 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1530 @*/
1531 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1532 {
1533   PetscErrorCode ierr;
1534   PetscMemType   rootmtype,leafmtype;
1535 
1536   PetscFunctionBegin;
1537   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1538   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1539   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1540   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1541   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1542   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1543   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 /*@C
1548    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1549 
1550    Collective
1551 
1552    Input Parameters:
1553 +  sf - star forest
1554 .  unit - data type
1555 .  leafmtype - memory type of leafdata
1556 .  leafdata - values to reduce
1557 .  rootmtype - memory type of rootdata
1558 -  op - reduction operation
1559 
1560    Output Parameter:
1561 .  rootdata - result of reduction of values from all leaves of each root
1562 
1563    Level: intermediate
1564 
1565 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1566 @*/
1567 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1568 {
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1573   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1574   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1575   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1576   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 /*@C
1581    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1582 
1583    Collective
1584 
1585    Input Parameters:
1586 +  sf - star forest
1587 .  unit - data type
1588 .  leafdata - values to reduce
1589 -  op - reduction operation
1590 
1591    Output Parameter:
1592 .  rootdata - result of reduction of values from all leaves of each root
1593 
1594    Level: intermediate
1595 
1596 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1597 @*/
1598 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1599 {
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1604   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1605   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1606   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /*@C
1611    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1612 
1613    Collective
1614 
1615    Input Parameters:
1616 +  sf - star forest
1617 .  unit - data type
1618 .  leafdata - leaf values to use in reduction
1619 -  op - operation to use for reduction
1620 
1621    Output Parameters:
1622 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1623 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1624 
1625    Level: advanced
1626 
1627    Note:
1628    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1629    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1630    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1631    integers.
1632 
1633 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1634 @*/
1635 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1636 {
1637   PetscErrorCode ierr;
1638   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1639 
1640   PetscFunctionBegin;
1641   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1642   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1643   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1644   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1645   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1646   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1647   PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1648   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1649   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 /*@C
1654    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1655 
1656    Collective
1657 
1658    Input Parameters:
1659 +  sf - star forest
1660 .  unit - data type
1661 .  rootmtype - memory type of rootdata
1662 .  leafmtype - memory type of leafdata
1663 .  leafdata - leaf values to use in reduction
1664 .  leafupdatemtype - memory type of leafupdate
1665 -  op - operation to use for reduction
1666 
1667    Output Parameters:
1668 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1669 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1670 
1671    Level: advanced
1672 
1673    Note: See PetscSFFetchAndOpBegin() for more details.
1674 
1675 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1676 @*/
1677 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1678 {
1679   PetscErrorCode ierr;
1680 
1681   PetscFunctionBegin;
1682   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1683   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1684   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1685   PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1686   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1687   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 /*@C
1692    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1693 
1694    Collective
1695 
1696    Input Parameters:
1697 +  sf - star forest
1698 .  unit - data type
1699 .  leafdata - leaf values to use in reduction
1700 -  op - operation to use for reduction
1701 
1702    Output Parameters:
1703 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1704 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1705 
1706    Level: advanced
1707 
1708 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1709 @*/
1710 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1711 {
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1716   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1717   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1718   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 /*@C
1723    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1724 
1725    Collective
1726 
1727    Input Parameter:
1728 .  sf - star forest
1729 
1730    Output Parameter:
1731 .  degree - degree of each root vertex
1732 
1733    Level: advanced
1734 
1735    Notes:
1736    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1737 
1738 .seealso: PetscSFGatherBegin()
1739 @*/
1740 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1741 {
1742   PetscErrorCode ierr;
1743 
1744   PetscFunctionBegin;
1745   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1746   PetscSFCheckGraphSet(sf,1);
1747   PetscValidPointer(degree,2);
1748   if (!sf->degreeknown) {
1749     PetscInt i, nroots = sf->nroots, maxlocal;
1750     PetscCheckFalse(sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1751     maxlocal = sf->maxleaf-sf->minleaf+1;
1752     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1753     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1754     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1755     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1756     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1757   }
1758   *degree = NULL;
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 /*@C
1763    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1764 
1765    Collective
1766 
1767    Input Parameter:
1768 .  sf - star forest
1769 
1770    Output Parameter:
1771 .  degree - degree of each root vertex
1772 
1773    Level: developer
1774 
1775    Notes:
1776    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1777 
1778 .seealso:
1779 @*/
1780 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1781 {
1782   PetscErrorCode ierr;
1783 
1784   PetscFunctionBegin;
1785   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1786   PetscSFCheckGraphSet(sf,1);
1787   PetscValidPointer(degree,2);
1788   if (!sf->degreeknown) {
1789     PetscCheckFalse(!sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1790     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1791     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1792     sf->degreeknown = PETSC_TRUE;
1793   }
1794   *degree = sf->degree;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 /*@C
1799    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1800    Each multi-root is assigned index of the corresponding original root.
1801 
1802    Collective
1803 
1804    Input Parameters:
1805 +  sf - star forest
1806 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1807 
1808    Output Parameters:
1809 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1810 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1811 
1812    Level: developer
1813 
1814    Notes:
1815    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1816 
1817 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1818 @*/
1819 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1820 {
1821   PetscSF             msf;
1822   PetscInt            i, j, k, nroots, nmroots;
1823   PetscErrorCode      ierr;
1824 
1825   PetscFunctionBegin;
1826   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1827   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1828   if (nroots) PetscValidIntPointer(degree,2);
1829   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1830   PetscValidPointer(multiRootsOrigNumbering,4);
1831   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1832   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1833   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1834   for (i=0,j=0,k=0; i<nroots; i++) {
1835     if (!degree[i]) continue;
1836     for (j=0; j<degree[i]; j++,k++) {
1837       (*multiRootsOrigNumbering)[k] = i;
1838     }
1839   }
1840   PetscCheckFalse(k != nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1841   if (nMultiRoots) *nMultiRoots = nmroots;
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 /*@C
1846    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1847 
1848    Collective
1849 
1850    Input Parameters:
1851 +  sf - star forest
1852 .  unit - data type
1853 -  leafdata - leaf data to gather to roots
1854 
1855    Output Parameter:
1856 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1857 
1858    Level: intermediate
1859 
1860 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1861 @*/
1862 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1863 {
1864   PetscErrorCode ierr;
1865   PetscSF        multi = NULL;
1866 
1867   PetscFunctionBegin;
1868   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1869   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1870   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1871   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 /*@C
1876    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1877 
1878    Collective
1879 
1880    Input Parameters:
1881 +  sf - star forest
1882 .  unit - data type
1883 -  leafdata - leaf data to gather to roots
1884 
1885    Output Parameter:
1886 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1887 
1888    Level: intermediate
1889 
1890 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1891 @*/
1892 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1893 {
1894   PetscErrorCode ierr;
1895   PetscSF        multi = NULL;
1896 
1897   PetscFunctionBegin;
1898   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1899   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1900   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 /*@C
1905    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1906 
1907    Collective
1908 
1909    Input Parameters:
1910 +  sf - star forest
1911 .  unit - data type
1912 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1913 
1914    Output Parameter:
1915 .  leafdata - leaf data to be update with personal data from each respective root
1916 
1917    Level: intermediate
1918 
1919 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1920 @*/
1921 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1922 {
1923   PetscErrorCode ierr;
1924   PetscSF        multi = NULL;
1925 
1926   PetscFunctionBegin;
1927   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1928   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1929   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1930   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 /*@C
1935    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1936 
1937    Collective
1938 
1939    Input Parameters:
1940 +  sf - star forest
1941 .  unit - data type
1942 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1943 
1944    Output Parameter:
1945 .  leafdata - leaf data to be update with personal data from each respective root
1946 
1947    Level: intermediate
1948 
1949 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1950 @*/
1951 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1952 {
1953   PetscErrorCode ierr;
1954   PetscSF        multi = NULL;
1955 
1956   PetscFunctionBegin;
1957   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1958   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1959   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1964 {
1965   PetscInt        i, n, nleaves;
1966   const PetscInt *ilocal = NULL;
1967   PetscHSetI      seen;
1968   PetscErrorCode  ierr;
1969 
1970   PetscFunctionBegin;
1971   if (PetscDefined(USE_DEBUG)) {
1972     ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1973     ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1974     for (i = 0; i < nleaves; i++) {
1975       const PetscInt leaf = ilocal ? ilocal[i] : i;
1976       ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1977     }
1978     ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1979     PetscCheckFalse(n != nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1980     ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1981   }
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 /*@
1986   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1987 
1988   Input Parameters:
1989 + sfA - The first PetscSF
1990 - sfB - The second PetscSF
1991 
1992   Output Parameters:
1993 . sfBA - The composite SF
1994 
1995   Level: developer
1996 
1997   Notes:
1998   Currently, the two SFs must be defined on congruent communicators and they must be true star
1999   forests, i.e. the same leaf is not connected with different roots.
2000 
2001   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
2002   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
2003   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
2004   Bcast on sfA, then a Bcast on sfB, on connected nodes.
2005 
2006 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
2007 @*/
2008 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2009 {
2010   PetscErrorCode    ierr;
2011   const PetscSFNode *remotePointsA,*remotePointsB;
2012   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
2013   const PetscInt    *localPointsA,*localPointsB;
2014   PetscInt          *localPointsBA;
2015   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
2016   PetscBool         denseB;
2017 
2018   PetscFunctionBegin;
2019   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2020   PetscSFCheckGraphSet(sfA,1);
2021   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2022   PetscSFCheckGraphSet(sfB,2);
2023   PetscCheckSameComm(sfA,1,sfB,2);
2024   PetscValidPointer(sfBA,3);
2025   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2026   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2027 
2028   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
2029   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
2030   if (localPointsA) {
2031     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
2032     for (i=0; i<numRootsB; i++) {
2033       reorderedRemotePointsA[i].rank = -1;
2034       reorderedRemotePointsA[i].index = -1;
2035     }
2036     for (i=0; i<numLeavesA; i++) {
2037       if (localPointsA[i] >= numRootsB) continue;
2038       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2039     }
2040     remotePointsA = reorderedRemotePointsA;
2041   }
2042   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
2043   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
2044   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2045   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);CHKERRQ(ierr);
2046   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2047 
2048   denseB = (PetscBool)!localPointsB;
2049   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2050     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2051     else numLeavesBA++;
2052   }
2053   if (denseB) {
2054     localPointsBA  = NULL;
2055     remotePointsBA = leafdataB;
2056   } else {
2057     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
2058     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
2059     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2060       const PetscInt l = localPointsB ? localPointsB[i] : i;
2061 
2062       if (leafdataB[l-minleaf].rank == -1) continue;
2063       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2064       localPointsBA[numLeavesBA] = l;
2065       numLeavesBA++;
2066     }
2067     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2068   }
2069   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2070   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2071   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 /*@
2076   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2077 
2078   Input Parameters:
2079 + sfA - The first PetscSF
2080 - sfB - The second PetscSF
2081 
2082   Output Parameters:
2083 . sfBA - The composite SF.
2084 
2085   Level: developer
2086 
2087   Notes:
2088   Currently, the two SFs must be defined on congruent communicators and they must be true star
2089   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2090   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2091 
2092   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2093   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2094   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2095   a Reduce on sfB, on connected roots.
2096 
2097 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2098 @*/
2099 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2100 {
2101   PetscErrorCode    ierr;
2102   const PetscSFNode *remotePointsA,*remotePointsB;
2103   PetscSFNode       *remotePointsBA;
2104   const PetscInt    *localPointsA,*localPointsB;
2105   PetscSFNode       *reorderedRemotePointsA = NULL;
2106   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2107   MPI_Op            op;
2108 #if defined(PETSC_USE_64BIT_INDICES)
2109   PetscBool         iswin;
2110 #endif
2111 
2112   PetscFunctionBegin;
2113   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2114   PetscSFCheckGraphSet(sfA,1);
2115   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2116   PetscSFCheckGraphSet(sfB,2);
2117   PetscCheckSameComm(sfA,1,sfB,2);
2118   PetscValidPointer(sfBA,3);
2119   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2120   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2121 
2122   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
2123   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
2124 
2125   /* TODO: Check roots of sfB have degree of 1 */
2126   /* Once we implement it, we can replace the MPI_MAXLOC
2127      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2128      We use MPI_MAXLOC only to have a deterministic output from this routine if
2129      the root condition is not meet.
2130    */
2131   op = MPI_MAXLOC;
2132 #if defined(PETSC_USE_64BIT_INDICES)
2133   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2134   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2135   if (iswin) op = MPI_REPLACE;
2136 #endif
2137 
2138   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2139   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2140   for (i=0; i<maxleaf - minleaf + 1; i++) {
2141     reorderedRemotePointsA[i].rank = -1;
2142     reorderedRemotePointsA[i].index = -1;
2143   }
2144   if (localPointsA) {
2145     for (i=0; i<numLeavesA; i++) {
2146       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2147       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2148     }
2149   } else {
2150     for (i=0; i<numLeavesA; i++) {
2151       if (i > maxleaf || i < minleaf) continue;
2152       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2153     }
2154   }
2155 
2156   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2157   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2158   for (i=0; i<numRootsB; i++) {
2159     remotePointsBA[i].rank = -1;
2160     remotePointsBA[i].index = -1;
2161   }
2162 
2163   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2164   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2165   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2166   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2167     if (remotePointsBA[i].rank == -1) continue;
2168     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2169     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2170     localPointsBA[numLeavesBA] = i;
2171     numLeavesBA++;
2172   }
2173   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2174   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2175   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /*
2180   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2181 
2182   Input Parameters:
2183 . sf - The global PetscSF
2184 
2185   Output Parameters:
2186 . out - The local PetscSF
2187  */
2188 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2189 {
2190   MPI_Comm           comm;
2191   PetscMPIInt        myrank;
2192   const PetscInt     *ilocal;
2193   const PetscSFNode  *iremote;
2194   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2195   PetscSFNode        *liremote;
2196   PetscSF            lsf;
2197   PetscErrorCode     ierr;
2198 
2199   PetscFunctionBegin;
2200   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2201   if (sf->ops->CreateLocalSF) {
2202     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2203   } else {
2204     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2205     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2206     ierr = MPI_Comm_rank(comm,&myrank);CHKERRMPI(ierr);
2207 
2208     /* Find out local edges and build a local SF */
2209     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2210     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2211     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2212     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2213 
2214     for (i=j=0; i<nleaves; i++) {
2215       if (iremote[i].rank == (PetscInt)myrank) {
2216         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2217         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2218         liremote[j].index = iremote[i].index;
2219         j++;
2220       }
2221     }
2222     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2223     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2224     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2225     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2226     *out = lsf;
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2232 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2233 {
2234   PetscErrorCode     ierr;
2235   PetscMemType       rootmtype,leafmtype;
2236 
2237   PetscFunctionBegin;
2238   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2239   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2240   ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2241   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2242   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2243   if (sf->ops->BcastToZero) {
2244     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2245   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2246   ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 /*@
2251   PetscSFConcatenate - concatenate multiple SFs into one
2252 
2253   Input Parameters:
2254 + comm - the communicator
2255 . nsfs - the number of input PetscSF
2256 . sfs  - the array of input PetscSF
2257 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2258 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2259 
2260   Output Parameters:
2261 . newsf - The resulting PetscSF
2262 
2263   Level: developer
2264 
2265   Notes:
2266   The communicator of all SFs in sfs must be comm.
2267 
2268   The offsets in leafOffsets are added to the original leaf indices.
2269 
2270   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2271   In this case, NULL is also passed as ilocal to the resulting SF.
2272 
2273   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2274   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2275 
2276 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2277 @*/
2278 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2279 {
2280   PetscInt            i, s, nLeaves, nRoots;
2281   PetscInt           *leafArrayOffsets;
2282   PetscInt           *ilocal_new;
2283   PetscSFNode        *iremote_new;
2284   PetscInt           *rootOffsets;
2285   PetscBool           all_ilocal_null = PETSC_FALSE;
2286   PetscMPIInt         rank;
2287   PetscErrorCode      ierr;
2288 
2289   PetscFunctionBegin;
2290   {
2291     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2292 
2293     ierr = PetscSFCreate(comm,&dummy);CHKERRQ(ierr);
2294     PetscValidLogicalCollectiveInt(dummy,nsfs,2);
2295     PetscValidPointer(sfs,3);
2296     for (i=0; i<nsfs; i++) {
2297       PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3);
2298       PetscCheckSameComm(dummy,1,sfs[i],3);
2299     }
2300     PetscValidLogicalCollectiveBool(dummy,shareRoots,4);
2301     if (leafOffsets) PetscValidIntPointer(leafOffsets,5);
2302     PetscValidPointer(newsf,6);
2303     ierr = PetscSFDestroy(&dummy);CHKERRQ(ierr);
2304   }
2305   if (!nsfs) {
2306     ierr = PetscSFCreate(comm, newsf);CHKERRQ(ierr);
2307     ierr = PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);CHKERRQ(ierr);
2308     PetscFunctionReturn(0);
2309   }
2310   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
2311 
2312   ierr = PetscCalloc1(nsfs+1, &rootOffsets);CHKERRQ(ierr);
2313   if (shareRoots) {
2314     ierr = PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);CHKERRQ(ierr);
2315     if (PetscDefined(USE_DEBUG)) {
2316       for (s=1; s<nsfs; s++) {
2317         PetscInt nr;
2318 
2319         ierr = PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);CHKERRQ(ierr);
2320         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2321       }
2322     }
2323   } else {
2324     for (s=0; s<nsfs; s++) {
2325       PetscInt nr;
2326 
2327       ierr = PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);CHKERRQ(ierr);
2328       rootOffsets[s+1] = rootOffsets[s] + nr;
2329     }
2330     nRoots = rootOffsets[nsfs];
2331   }
2332 
2333   /* Calculate leaf array offsets and automatic root offsets */
2334   ierr = PetscMalloc1(nsfs+1,&leafArrayOffsets);CHKERRQ(ierr);
2335   leafArrayOffsets[0] = 0;
2336   for (s=0; s<nsfs; s++) {
2337     PetscInt        nl;
2338 
2339     ierr = PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);CHKERRQ(ierr);
2340     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2341   }
2342   nLeaves = leafArrayOffsets[nsfs];
2343 
2344   if (!leafOffsets) {
2345     all_ilocal_null = PETSC_TRUE;
2346     for (s=0; s<nsfs; s++) {
2347       const PetscInt *ilocal;
2348 
2349       ierr = PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);CHKERRQ(ierr);
2350       if (ilocal) {
2351         all_ilocal_null = PETSC_FALSE;
2352         break;
2353       }
2354     }
2355     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2356   }
2357 
2358   /* Renumber and concatenate local leaves */
2359   ilocal_new = NULL;
2360   if (!all_ilocal_null) {
2361     ierr = PetscMalloc1(nLeaves, &ilocal_new);CHKERRQ(ierr);
2362     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2363     for (s = 0; s<nsfs; s++) {
2364       const PetscInt   *ilocal;
2365       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2366       PetscInt          i, nleaves_l;
2367 
2368       ierr = PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);CHKERRQ(ierr);
2369       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2370     }
2371   }
2372 
2373   /* Renumber and concatenate remote roots */
2374   ierr = PetscMalloc1(nLeaves, &iremote_new);CHKERRQ(ierr);
2375   for (i = 0; i < nLeaves; i++) {
2376     iremote_new[i].rank   = -1;
2377     iremote_new[i].index  = -1;
2378   }
2379   for (s = 0; s<nsfs; s++) {
2380     PetscInt            i, nl, nr;
2381     PetscSF             tmp_sf;
2382     const PetscSFNode  *iremote;
2383     PetscSFNode        *tmp_rootdata;
2384     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2385 
2386     ierr = PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);CHKERRQ(ierr);
2387     ierr = PetscSFCreate(comm, &tmp_sf);CHKERRQ(ierr);
2388     /* create helper SF with contiguous leaves */
2389     ierr = PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES);CHKERRQ(ierr);
2390     ierr = PetscSFSetUp(tmp_sf);CHKERRQ(ierr);
2391     ierr = PetscMalloc1(nr, &tmp_rootdata);CHKERRQ(ierr);
2392     for (i = 0; i < nr; i++) {
2393       tmp_rootdata[i].index = i + rootOffsets[s];
2394       tmp_rootdata[i].rank  = (PetscInt) rank;
2395     }
2396     ierr = PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);CHKERRQ(ierr);
2397     ierr = PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);CHKERRQ(ierr);
2398     ierr = PetscSFDestroy(&tmp_sf);CHKERRQ(ierr);
2399     ierr = PetscFree(tmp_rootdata);CHKERRQ(ierr);
2400   }
2401 
2402   /* Build the new SF */
2403   ierr = PetscSFCreate(comm, newsf);CHKERRQ(ierr);
2404   ierr = PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);CHKERRQ(ierr);
2405   ierr = PetscSFSetUp(*newsf);CHKERRQ(ierr);
2406   ierr = PetscFree(rootOffsets);CHKERRQ(ierr);
2407   ierr = PetscFree(leafArrayOffsets);CHKERRQ(ierr);
2408   PetscFunctionReturn(0);
2409 }
2410