xref: /petsc/src/vec/is/sf/interface/sf.c (revision 76d901e46dda72c1afe96306c7cb4731c47d4e87)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) PetscCheck((sf)->graphset,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME);
19 #else
20 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
21 #endif
22 #endif
23 
24 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
25 
26 /*@
27    PetscSFCreate - create a star forest communication context
28 
29    Collective
30 
31    Input Parameter:
32 .  comm - communicator on which the star forest will operate
33 
34    Output Parameter:
35 .  sf - new star forest context
36 
37    Options Database Keys:
38 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
39 .  -sf_type window    -Use MPI-3 one-sided window for communication
40 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
41 
42    Level: intermediate
43 
44    Notes:
45    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
46    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
47    SFs are optimized and they have better performance than general SFs.
48 
49 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
50 @*/
51 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
52 {
53   PetscSF        b;
54 
55   PetscFunctionBegin;
56   PetscValidPointer(sf,2);
57   PetscCall(PetscSFInitializePackage());
58 
59   PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView));
60 
61   b->nroots    = -1;
62   b->nleaves   = -1;
63   b->minleaf   = PETSC_MAX_INT;
64   b->maxleaf   = PETSC_MIN_INT;
65   b->nranks    = -1;
66   b->rankorder = PETSC_TRUE;
67   b->ingroup   = MPI_GROUP_NULL;
68   b->outgroup  = MPI_GROUP_NULL;
69   b->graphset  = PETSC_FALSE;
70 #if defined(PETSC_HAVE_DEVICE)
71   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
72   b->use_stream_aware_mpi = PETSC_FALSE;
73   b->unknown_input_stream= PETSC_FALSE;
74   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
75     b->backend = PETSCSF_BACKEND_KOKKOS;
76   #elif defined(PETSC_HAVE_CUDA)
77     b->backend = PETSCSF_BACKEND_CUDA;
78   #elif defined(PETSC_HAVE_HIP)
79     b->backend = PETSCSF_BACKEND_HIP;
80   #endif
81 
82   #if defined(PETSC_HAVE_NVSHMEM)
83     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
84     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
85     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL));
86     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL));
87   #endif
88 #endif
89   b->vscat.from_n = -1;
90   b->vscat.to_n   = -1;
91   b->vscat.unit   = MPIU_SCALAR;
92  *sf = b;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@
97    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
98 
99    Collective
100 
101    Input Parameter:
102 .  sf - star forest
103 
104    Level: advanced
105 
106 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
107 @*/
108 PetscErrorCode PetscSFReset(PetscSF sf)
109 {
110   PetscFunctionBegin;
111   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
112   if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf));
113   sf->nroots   = -1;
114   sf->nleaves  = -1;
115   sf->minleaf  = PETSC_MAX_INT;
116   sf->maxleaf  = PETSC_MIN_INT;
117   sf->mine     = NULL;
118   sf->remote   = NULL;
119   sf->graphset = PETSC_FALSE;
120   PetscCall(PetscFree(sf->mine_alloc));
121   PetscCall(PetscFree(sf->remote_alloc));
122   sf->nranks = -1;
123   PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote));
124   sf->degreeknown = PETSC_FALSE;
125   PetscCall(PetscFree(sf->degree));
126   if (sf->ingroup  != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
127   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
128   if (sf->multi) sf->multi->multi = NULL;
129   PetscCall(PetscSFDestroy(&sf->multi));
130   PetscCall(PetscLayoutDestroy(&sf->map));
131 
132  #if defined(PETSC_HAVE_DEVICE)
133   for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]));
134  #endif
135 
136   sf->setupcalled = PETSC_FALSE;
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141    PetscSFSetType - Set the PetscSF communication implementation
142 
143    Collective on PetscSF
144 
145    Input Parameters:
146 +  sf - the PetscSF context
147 -  type - a known method
148 
149    Options Database Key:
150 .  -sf_type <type> - Sets the method; use -help for a list
151    of available methods (for instance, window, basic, neighbor)
152 
153    Notes:
154    See "include/petscsf.h" for available methods (for instance)
155 +    PETSCSFWINDOW - MPI-2/3 one-sided
156 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
157 
158   Level: intermediate
159 
160 .seealso: `PetscSFType`, `PetscSFCreate()`
161 @*/
162 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
163 {
164   PetscBool      match;
165   PetscErrorCode (*r)(PetscSF);
166 
167   PetscFunctionBegin;
168   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
169   PetscValidCharPointer(type,2);
170 
171   PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match));
172   if (match) PetscFunctionReturn(0);
173 
174   PetscCall(PetscFunctionListFind(PetscSFList,type,&r));
175   PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
176   /* Destroy the previous PetscSF implementation context */
177   if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf));
178   PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops)));
179   PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type));
180   PetscCall((*r)(sf));
181   PetscFunctionReturn(0);
182 }
183 
184 /*@C
185   PetscSFGetType - Get the PetscSF communication implementation
186 
187   Not Collective
188 
189   Input Parameter:
190 . sf  - the PetscSF context
191 
192   Output Parameter:
193 . type - the PetscSF type name
194 
195   Level: intermediate
196 
197 .seealso: `PetscSFSetType()`, `PetscSFCreate()`
198 @*/
199 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
200 {
201   PetscFunctionBegin;
202   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
203   PetscValidPointer(type,2);
204   *type = ((PetscObject)sf)->type_name;
205   PetscFunctionReturn(0);
206 }
207 
208 /*@C
209    PetscSFDestroy - destroy star forest
210 
211    Collective
212 
213    Input Parameter:
214 .  sf - address of star forest
215 
216    Level: intermediate
217 
218 .seealso: `PetscSFCreate()`, `PetscSFReset()`
219 @*/
220 PetscErrorCode PetscSFDestroy(PetscSF *sf)
221 {
222   PetscFunctionBegin;
223   if (!*sf) PetscFunctionReturn(0);
224   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
225   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
226   PetscCall(PetscSFReset(*sf));
227   if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf));
228   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
229   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
230   PetscCall(PetscHeaderDestroy(sf));
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
235 {
236   PetscInt           i, nleaves;
237   PetscMPIInt        size;
238   const PetscInt    *ilocal;
239   const PetscSFNode *iremote;
240 
241   PetscFunctionBegin;
242   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
243   PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote));
244   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
245   for (i = 0; i < nleaves; i++) {
246     const PetscInt rank = iremote[i].rank;
247     const PetscInt remote = iremote[i].index;
248     const PetscInt leaf = ilocal ? ilocal[i] : i;
249     PetscCheck(rank >= 0 && rank < size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
250     PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
251     PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
252   }
253   PetscFunctionReturn(0);
254 }
255 
256 /*@
257    PetscSFSetUp - set up communication structures
258 
259    Collective
260 
261    Input Parameter:
262 .  sf - star forest communication object
263 
264    Level: beginner
265 
266 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()`
267 @*/
268 PetscErrorCode PetscSFSetUp(PetscSF sf)
269 {
270   PetscFunctionBegin;
271   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
272   PetscSFCheckGraphSet(sf,1);
273   if (sf->setupcalled) PetscFunctionReturn(0);
274   PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0));
275   PetscCall(PetscSFCheckGraphValid_Private(sf));
276   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */
277   if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf));
278 #if defined(PETSC_HAVE_CUDA)
279   if (sf->backend == PETSCSF_BACKEND_CUDA) {
280     sf->ops->Malloc = PetscSFMalloc_CUDA;
281     sf->ops->Free   = PetscSFFree_CUDA;
282   }
283 #endif
284 #if defined(PETSC_HAVE_HIP)
285   if (sf->backend == PETSCSF_BACKEND_HIP) {
286     sf->ops->Malloc = PetscSFMalloc_HIP;
287     sf->ops->Free   = PetscSFFree_HIP;
288   }
289 #endif
290 
291 #
292 #if defined(PETSC_HAVE_KOKKOS)
293   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
294     sf->ops->Malloc = PetscSFMalloc_Kokkos;
295     sf->ops->Free   = PetscSFFree_Kokkos;
296   }
297 #endif
298   PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0));
299   sf->setupcalled = PETSC_TRUE;
300   PetscFunctionReturn(0);
301 }
302 
303 /*@
304    PetscSFSetFromOptions - set PetscSF options using the options database
305 
306    Logically Collective
307 
308    Input Parameter:
309 .  sf - star forest
310 
311    Options Database Keys:
312 +  -sf_type               - implementation type, see PetscSFSetType()
313 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
314 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
315                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
316                             If true, this option only works with -use_gpu_aware_mpi 1.
317 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
318                                If true, this option only works with -use_gpu_aware_mpi 1.
319 
320 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
321                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
322                               the only available is kokkos.
323 
324    Level: intermediate
325 @*/
326 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
327 {
328   PetscSFType    deft;
329   char           type[256];
330   PetscBool      flg;
331 
332   PetscFunctionBegin;
333   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
334   PetscObjectOptionsBegin((PetscObject)sf);
335   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
336   PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg));
337   PetscCall(PetscSFSetType(sf,flg ? type : deft));
338   PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL));
339  #if defined(PETSC_HAVE_DEVICE)
340   {
341     char        backendstr[32] = {0};
342     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
343     /* Change the defaults set in PetscSFCreate() with command line options */
344     PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL));
345     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL));
346     PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set));
347     PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda));
348     PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos));
349     PetscCall(PetscStrcasecmp("hip",backendstr,&isHip));
350   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
351     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
352     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
353     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
354     else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
355   #elif defined(PETSC_HAVE_KOKKOS)
356     PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
357    #endif
358   }
359  #endif
360   if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf));
361   PetscOptionsEnd();
362   PetscFunctionReturn(0);
363 }
364 
365 /*@
366    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
367 
368    Logically Collective
369 
370    Input Parameters:
371 +  sf - star forest
372 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
373 
374    Level: advanced
375 
376 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
377 @*/
378 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
379 {
380   PetscFunctionBegin;
381   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
382   PetscValidLogicalCollectiveBool(sf,flg,2);
383   PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
384   sf->rankorder = flg;
385   PetscFunctionReturn(0);
386 }
387 
388 /*@C
389    PetscSFSetGraph - Set a parallel star forest
390 
391    Collective
392 
393    Input Parameters:
394 +  sf - star forest
395 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
396 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
397 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
398 during setup in debug mode)
399 .  localmode - copy mode for ilocal
400 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
401 during setup in debug mode)
402 -  remotemode - copy mode for iremote
403 
404    Level: intermediate
405 
406    Notes:
407    Leaf indices in ilocal must be unique, otherwise an error occurs.
408 
409    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
410    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
411    PETSc might modify the respective array;
412    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
413    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
414 
415    Fortran Notes:
416    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
417 
418    Developer Notes:
419    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
420    This also allows to compare leaf sets of two SFs easily.
421 
422 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
423 @*/
424 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
425 {
426   PetscBool       unique, contiguous;
427 
428   PetscFunctionBegin;
429   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
430   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
431   if (nleaves > 0) PetscValidPointer(iremote,6);
432   PetscCheck(nroots  >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
433   PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
434   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
435    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
436   PetscCheck((int)localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
437   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
438 
439   if (sf->nroots >= 0) { /* Reset only if graph already set */
440     PetscCall(PetscSFReset(sf));
441   }
442 
443   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0));
444 
445   sf->nroots  = nroots;
446   sf->nleaves = nleaves;
447 
448   if (localmode == PETSC_COPY_VALUES && ilocal) {
449     PetscInt *tlocal = NULL;
450 
451     PetscCall(PetscMalloc1(nleaves,&tlocal));
452     PetscCall(PetscArraycpy(tlocal,ilocal,nleaves));
453     ilocal = tlocal;
454   }
455   if (remotemode == PETSC_COPY_VALUES) {
456     PetscSFNode *tremote = NULL;
457 
458     PetscCall(PetscMalloc1(nleaves,&tremote));
459     PetscCall(PetscArraycpy(tremote,iremote,nleaves));
460     iremote = tremote;
461   }
462 
463   if (nleaves && ilocal) {
464     PetscSFNode   work;
465 
466     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
467     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
468     unique = PetscNot(unique);
469     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
470     sf->minleaf = ilocal[0];
471     sf->maxleaf = ilocal[nleaves-1];
472     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
473   } else {
474     sf->minleaf = 0;
475     sf->maxleaf = nleaves - 1;
476     unique      = PETSC_TRUE;
477     contiguous  = PETSC_TRUE;
478   }
479 
480   if (contiguous) {
481     if (localmode == PETSC_USE_POINTER) {
482       ilocal = NULL;
483     } else {
484       PetscCall(PetscFree(ilocal));
485     }
486   }
487   sf->mine            = ilocal;
488   if (localmode == PETSC_USE_POINTER) {
489     sf->mine_alloc    = NULL;
490   } else {
491     sf->mine_alloc    = ilocal;
492   }
493   sf->remote          = iremote;
494   if (remotemode == PETSC_USE_POINTER) {
495     sf->remote_alloc  = NULL;
496   } else {
497     sf->remote_alloc  = iremote;
498   }
499   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0));
500   sf->graphset = PETSC_TRUE;
501   PetscFunctionReturn(0);
502 }
503 
504 /*@
505   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
506 
507   Collective
508 
509   Input Parameters:
510 + sf      - The PetscSF
511 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
512 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
513 
514   Notes:
515   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
516   n and N are local and global sizes of x respectively.
517 
518   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
519   sequential vectors y on all ranks.
520 
521   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
522   sequential vector y on rank 0.
523 
524   In above cases, entries of x are roots and entries of y are leaves.
525 
526   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
527   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
528   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
529   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
530   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
531 
532   In this case, roots and leaves are symmetric.
533 
534   Level: intermediate
535  @*/
536 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
537 {
538   MPI_Comm       comm;
539   PetscInt       n,N,res[2];
540   PetscMPIInt    rank,size;
541   PetscSFType    type;
542 
543   PetscFunctionBegin;
544   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
545   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
546   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
547   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
548   PetscCallMPI(MPI_Comm_rank(comm,&rank));
549   PetscCallMPI(MPI_Comm_size(comm,&size));
550 
551   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
552     type = PETSCSFALLTOALL;
553     PetscCall(PetscLayoutCreate(comm,&sf->map));
554     PetscCall(PetscLayoutSetLocalSize(sf->map,size));
555     PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size));
556     PetscCall(PetscLayoutSetUp(sf->map));
557   } else {
558     PetscCall(PetscLayoutGetLocalSize(map,&n));
559     PetscCall(PetscLayoutGetSize(map,&N));
560     res[0] = n;
561     res[1] = -n;
562     /* Check if n are same over all ranks so that we can optimize it */
563     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm));
564     if (res[0] == -res[1]) { /* same n */
565       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
566     } else {
567       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
568     }
569     PetscCall(PetscLayoutReference(map,&sf->map));
570   }
571   PetscCall(PetscSFSetType(sf,type));
572 
573   sf->pattern = pattern;
574   sf->mine    = NULL; /* Contiguous */
575 
576   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
577      Also set other easy stuff.
578    */
579   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
580     sf->nleaves      = N;
581     sf->nroots       = n;
582     sf->nranks       = size;
583     sf->minleaf      = 0;
584     sf->maxleaf      = N - 1;
585   } else if (pattern == PETSCSF_PATTERN_GATHER) {
586     sf->nleaves      = rank ? 0 : N;
587     sf->nroots       = n;
588     sf->nranks       = rank ? 0 : size;
589     sf->minleaf      = 0;
590     sf->maxleaf      = rank ? -1 : N - 1;
591   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
592     sf->nleaves      = size;
593     sf->nroots       = size;
594     sf->nranks       = size;
595     sf->minleaf      = 0;
596     sf->maxleaf      = size - 1;
597   }
598   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
599   sf->graphset = PETSC_TRUE;
600   PetscFunctionReturn(0);
601 }
602 
603 /*@
604    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
605 
606    Collective
607 
608    Input Parameter:
609 .  sf - star forest to invert
610 
611    Output Parameter:
612 .  isf - inverse of sf
613 
614    Level: advanced
615 
616    Notes:
617    All roots must have degree 1.
618 
619    The local space may be a permutation, but cannot be sparse.
620 
621 .seealso: `PetscSFSetGraph()`
622 @*/
623 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
624 {
625   PetscMPIInt    rank;
626   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
627   const PetscInt *ilocal;
628   PetscSFNode    *roots,*leaves;
629 
630   PetscFunctionBegin;
631   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
632   PetscSFCheckGraphSet(sf,1);
633   PetscValidPointer(isf,2);
634 
635   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL));
636   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
637 
638   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
639   PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves));
640   for (i=0; i<maxlocal; i++) {
641     leaves[i].rank  = rank;
642     leaves[i].index = i;
643   }
644   for (i=0; i <nroots; i++) {
645     roots[i].rank  = -1;
646     roots[i].index = -1;
647   }
648   PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
649   PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
650 
651   /* Check whether our leaves are sparse */
652   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
653   if (count == nroots) newilocal = NULL;
654   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
655     PetscCall(PetscMalloc1(count,&newilocal));
656     for (i=0,count=0; i<nroots; i++) {
657       if (roots[i].rank >= 0) {
658         newilocal[count]   = i;
659         roots[count].rank  = roots[i].rank;
660         roots[count].index = roots[i].index;
661         count++;
662       }
663     }
664   }
665 
666   PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf));
667   PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES));
668   PetscCall(PetscFree2(roots,leaves));
669   PetscFunctionReturn(0);
670 }
671 
672 /*@
673    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
674 
675    Collective
676 
677    Input Parameters:
678 +  sf - communication object to duplicate
679 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
680 
681    Output Parameter:
682 .  newsf - new communication object
683 
684    Level: beginner
685 
686 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
687 @*/
688 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
689 {
690   PetscSFType    type;
691   MPI_Datatype   dtype=MPIU_SCALAR;
692 
693   PetscFunctionBegin;
694   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
695   PetscValidLogicalCollectiveEnum(sf,opt,2);
696   PetscValidPointer(newsf,3);
697   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf));
698   PetscCall(PetscSFGetType(sf,&type));
699   if (type) PetscCall(PetscSFSetType(*newsf,type));
700   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
701   if (opt == PETSCSF_DUPLICATE_GRAPH) {
702     PetscSFCheckGraphSet(sf,1);
703     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
704       PetscInt          nroots,nleaves;
705       const PetscInt    *ilocal;
706       const PetscSFNode *iremote;
707       PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
708       PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES));
709     } else {
710       PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern));
711     }
712   }
713   /* Since oldtype is committed, so is newtype, according to MPI */
714   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype));
715   (*newsf)->vscat.bs     = sf->vscat.bs;
716   (*newsf)->vscat.unit   = dtype;
717   (*newsf)->vscat.to_n   = sf->vscat.to_n;
718   (*newsf)->vscat.from_n = sf->vscat.from_n;
719   /* Do not copy lsf. Build it on demand since it is rarely used */
720 
721 #if defined(PETSC_HAVE_DEVICE)
722   (*newsf)->backend              = sf->backend;
723   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
724   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
725   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
726 #endif
727   if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf));
728   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
729   PetscFunctionReturn(0);
730 }
731 
732 /*@C
733    PetscSFGetGraph - Get the graph specifying a parallel star forest
734 
735    Not Collective
736 
737    Input Parameter:
738 .  sf - star forest
739 
740    Output Parameters:
741 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
742 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
743 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
744 -  iremote - remote locations of root vertices for each leaf on the current process
745 
746    Notes:
747      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
748 
749      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
750      see its manpage for details.
751 
752    Fortran Notes:
753      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
754      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
755 
756      To check for a NULL ilocal use
757 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
758 
759    Level: intermediate
760 
761 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
762 @*/
763 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
764 {
765   PetscFunctionBegin;
766   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
767   if (sf->ops->GetGraph) {
768     PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote));
769   } else {
770     if (nroots) *nroots = sf->nroots;
771     if (nleaves) *nleaves = sf->nleaves;
772     if (ilocal) *ilocal = sf->mine;
773     if (iremote) *iremote = sf->remote;
774   }
775   PetscFunctionReturn(0);
776 }
777 
778 /*@
779    PetscSFGetLeafRange - Get the active leaf ranges
780 
781    Not Collective
782 
783    Input Parameter:
784 .  sf - star forest
785 
786    Output Parameters:
787 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
788 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
789 
790    Level: developer
791 
792 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
793 @*/
794 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
795 {
796   PetscFunctionBegin;
797   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
798   PetscSFCheckGraphSet(sf,1);
799   if (minleaf) *minleaf = sf->minleaf;
800   if (maxleaf) *maxleaf = sf->maxleaf;
801   PetscFunctionReturn(0);
802 }
803 
804 /*@C
805    PetscSFViewFromOptions - View from Options
806 
807    Collective on PetscSF
808 
809    Input Parameters:
810 +  A - the star forest
811 .  obj - Optional object
812 -  name - command line option
813 
814    Level: intermediate
815 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
816 @*/
817 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
818 {
819   PetscFunctionBegin;
820   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
821   PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name));
822   PetscFunctionReturn(0);
823 }
824 
825 /*@C
826    PetscSFView - view a star forest
827 
828    Collective
829 
830    Input Parameters:
831 +  sf - star forest
832 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
833 
834    Level: beginner
835 
836 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`
837 @*/
838 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
839 {
840   PetscBool         iascii;
841   PetscViewerFormat format;
842 
843   PetscFunctionBegin;
844   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
845   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer));
846   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
847   PetscCheckSameComm(sf,1,viewer,2);
848   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
850   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
851     PetscMPIInt rank;
852     PetscInt    ii,i,j;
853 
854     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer));
855     PetscCall(PetscViewerASCIIPushTab(viewer));
856     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
857       if (!sf->graphset) {
858         PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n"));
859         PetscCall(PetscViewerASCIIPopTab(viewer));
860         PetscFunctionReturn(0);
861       }
862       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
863       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
864       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks));
865       for (i=0; i<sf->nleaves; i++) {
866         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index));
867       }
868       PetscCall(PetscViewerFlush(viewer));
869       PetscCall(PetscViewerGetFormat(viewer,&format));
870       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
871         PetscMPIInt *tmpranks,*perm;
872         PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm));
873         PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks));
874         for (i=0; i<sf->nranks; i++) perm[i] = i;
875         PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm));
876         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank));
877         for (ii=0; ii<sf->nranks; ii++) {
878           i = perm[ii];
879           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]));
880           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
881             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]));
882           }
883         }
884         PetscCall(PetscFree2(tmpranks,perm));
885       }
886       PetscCall(PetscViewerFlush(viewer));
887       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
888     }
889     PetscCall(PetscViewerASCIIPopTab(viewer));
890   }
891   if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer));
892   PetscFunctionReturn(0);
893 }
894 
895 /*@C
896    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
897 
898    Not Collective
899 
900    Input Parameter:
901 .  sf - star forest
902 
903    Output Parameters:
904 +  nranks - number of ranks referenced by local part
905 .  ranks - array of ranks
906 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
907 .  rmine - concatenated array holding local indices referencing each remote rank
908 -  rremote - concatenated array holding remote indices referenced for each remote rank
909 
910    Level: developer
911 
912 .seealso: `PetscSFGetLeafRanks()`
913 @*/
914 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
915 {
916   PetscFunctionBegin;
917   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
918   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
919   if (sf->ops->GetRootRanks) {
920     PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote));
921   } else {
922     /* The generic implementation */
923     if (nranks)  *nranks  = sf->nranks;
924     if (ranks)   *ranks   = sf->ranks;
925     if (roffset) *roffset = sf->roffset;
926     if (rmine)   *rmine   = sf->rmine;
927     if (rremote) *rremote = sf->rremote;
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 /*@C
933    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
934 
935    Not Collective
936 
937    Input Parameter:
938 .  sf - star forest
939 
940    Output Parameters:
941 +  niranks - number of leaf ranks referencing roots on this process
942 .  iranks - array of ranks
943 .  ioffset - offset in irootloc for each rank (length niranks+1)
944 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
945 
946    Level: developer
947 
948 .seealso: `PetscSFGetRootRanks()`
949 @*/
950 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
951 {
952   PetscFunctionBegin;
953   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
954   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
955   if (sf->ops->GetLeafRanks) {
956     PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc));
957   } else {
958     PetscSFType type;
959     PetscCall(PetscSFGetType(sf,&type));
960     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
961   }
962   PetscFunctionReturn(0);
963 }
964 
965 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
966   PetscInt i;
967   for (i=0; i<n; i++) {
968     if (needle == list[i]) return PETSC_TRUE;
969   }
970   return PETSC_FALSE;
971 }
972 
973 /*@C
974    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
975 
976    Collective
977 
978    Input Parameters:
979 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
980 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
981 
982    Level: developer
983 
984 .seealso: `PetscSFGetRootRanks()`
985 @*/
986 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
987 {
988   PetscTable         table;
989   PetscTablePosition pos;
990   PetscMPIInt        size,groupsize,*groupranks;
991   PetscInt           *rcount,*ranks;
992   PetscInt           i, irank = -1,orank = -1;
993 
994   PetscFunctionBegin;
995   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
996   PetscSFCheckGraphSet(sf,1);
997   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
998   PetscCall(PetscTableCreate(10,size,&table));
999   for (i=0; i<sf->nleaves; i++) {
1000     /* Log 1-based rank */
1001     PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES));
1002   }
1003   PetscCall(PetscTableGetCount(table,&sf->nranks));
1004   PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote));
1005   PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks));
1006   PetscCall(PetscTableGetHeadPosition(table,&pos));
1007   for (i=0; i<sf->nranks; i++) {
1008     PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]));
1009     ranks[i]--;             /* Convert back to 0-based */
1010   }
1011   PetscCall(PetscTableDestroy(&table));
1012 
1013   /* We expect that dgroup is reliably "small" while nranks could be large */
1014   {
1015     MPI_Group group = MPI_GROUP_NULL;
1016     PetscMPIInt *dgroupranks;
1017     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1018     PetscCallMPI(MPI_Group_size(dgroup,&groupsize));
1019     PetscCall(PetscMalloc1(groupsize,&dgroupranks));
1020     PetscCall(PetscMalloc1(groupsize,&groupranks));
1021     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1022     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks));
1023     PetscCallMPI(MPI_Group_free(&group));
1024     PetscCall(PetscFree(dgroupranks));
1025   }
1026 
1027   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1028   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1029     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1030       if (InList(ranks[i],groupsize,groupranks)) break;
1031     }
1032     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1033       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1034     }
1035     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1036       PetscInt tmprank,tmpcount;
1037 
1038       tmprank             = ranks[i];
1039       tmpcount            = rcount[i];
1040       ranks[i]            = ranks[sf->ndranks];
1041       rcount[i]           = rcount[sf->ndranks];
1042       ranks[sf->ndranks]  = tmprank;
1043       rcount[sf->ndranks] = tmpcount;
1044       sf->ndranks++;
1045     }
1046   }
1047   PetscCall(PetscFree(groupranks));
1048   PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount));
1049   PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks));
1050   sf->roffset[0] = 0;
1051   for (i=0; i<sf->nranks; i++) {
1052     PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i));
1053     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1054     rcount[i]        = 0;
1055   }
1056   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1057     /* short circuit */
1058     if (orank != sf->remote[i].rank) {
1059       /* Search for index of iremote[i].rank in sf->ranks */
1060       PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank));
1061       if (irank < 0) {
1062         PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank));
1063         if (irank >= 0) irank += sf->ndranks;
1064       }
1065       orank = sf->remote[i].rank;
1066     }
1067     PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1068     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1069     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1070     rcount[irank]++;
1071   }
1072   PetscCall(PetscFree2(rcount,ranks));
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*@C
1077    PetscSFGetGroups - gets incoming and outgoing process groups
1078 
1079    Collective
1080 
1081    Input Parameter:
1082 .  sf - star forest
1083 
1084    Output Parameters:
1085 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1086 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1087 
1088    Level: developer
1089 
1090 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1091 @*/
1092 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1093 {
1094   MPI_Group      group = MPI_GROUP_NULL;
1095 
1096   PetscFunctionBegin;
1097   PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1098   if (sf->ingroup == MPI_GROUP_NULL) {
1099     PetscInt       i;
1100     const PetscInt *indegree;
1101     PetscMPIInt    rank,*outranks,*inranks;
1102     PetscSFNode    *remote;
1103     PetscSF        bgcount;
1104 
1105     /* Compute the number of incoming ranks */
1106     PetscCall(PetscMalloc1(sf->nranks,&remote));
1107     for (i=0; i<sf->nranks; i++) {
1108       remote[i].rank  = sf->ranks[i];
1109       remote[i].index = 0;
1110     }
1111     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount));
1112     PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1113     PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree));
1114     PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree));
1115     /* Enumerate the incoming ranks */
1116     PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks));
1117     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1118     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1119     PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks));
1120     PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks));
1121     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1122     PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup));
1123     PetscCallMPI(MPI_Group_free(&group));
1124     PetscCall(PetscFree2(inranks,outranks));
1125     PetscCall(PetscSFDestroy(&bgcount));
1126   }
1127   *incoming = sf->ingroup;
1128 
1129   if (sf->outgroup == MPI_GROUP_NULL) {
1130     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1131     PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup));
1132     PetscCallMPI(MPI_Group_free(&group));
1133   }
1134   *outgoing = sf->outgroup;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 /*@
1139    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1140 
1141    Collective
1142 
1143    Input Parameter:
1144 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1145 
1146    Output Parameter:
1147 .  multi - star forest with split roots, such that each root has degree exactly 1
1148 
1149    Level: developer
1150 
1151    Notes:
1152 
1153    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1154    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1155    edge, it is a candidate for future optimization that might involve its removal.
1156 
1157 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1158 @*/
1159 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1160 {
1161   PetscFunctionBegin;
1162   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1163   PetscValidPointer(multi,2);
1164   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1165     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1166     *multi = sf->multi;
1167     sf->multi->multi = sf->multi;
1168     PetscFunctionReturn(0);
1169   }
1170   if (!sf->multi) {
1171     const PetscInt *indegree;
1172     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1173     PetscSFNode    *remote;
1174     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1175     PetscCall(PetscSFComputeDegreeBegin(sf,&indegree));
1176     PetscCall(PetscSFComputeDegreeEnd(sf,&indegree));
1177     PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset));
1178     inoffset[0] = 0;
1179     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1180     for (i=0; i<maxlocal; i++) outones[i] = 1;
1181     PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1182     PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1183     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1184     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1185       for (i=0; i<sf->nroots; i++) {
1186         PetscCheck(inoffset[i] + indegree[i] == inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1187       }
1188     }
1189     PetscCall(PetscMalloc1(sf->nleaves,&remote));
1190     for (i=0; i<sf->nleaves; i++) {
1191       remote[i].rank  = sf->remote[i].rank;
1192       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1193     }
1194     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1195     sf->multi->multi = sf->multi;
1196     PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1197     if (sf->rankorder) {        /* Sort the ranks */
1198       PetscMPIInt rank;
1199       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1200       PetscSFNode *newremote;
1201       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1202       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1203       PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset));
1204       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1205       PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1206       PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1207       /* Sort the incoming ranks at each vertex, build the inverse map */
1208       for (i=0; i<sf->nroots; i++) {
1209         PetscInt j;
1210         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1211         PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset));
1212         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1213       }
1214       PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1215       PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1216       PetscCall(PetscMalloc1(sf->nleaves,&newremote));
1217       for (i=0; i<sf->nleaves; i++) {
1218         newremote[i].rank  = sf->remote[i].rank;
1219         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1220       }
1221       PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER));
1222       PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset));
1223     }
1224     PetscCall(PetscFree3(inoffset,outones,outoffset));
1225   }
1226   *multi = sf->multi;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 /*@C
1231    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1232 
1233    Collective
1234 
1235    Input Parameters:
1236 +  sf - original star forest
1237 .  nselected  - number of selected roots on this process
1238 -  selected   - indices of the selected roots on this process
1239 
1240    Output Parameter:
1241 .  esf - new star forest
1242 
1243    Level: advanced
1244 
1245    Note:
1246    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1247    be done by calling PetscSFGetGraph().
1248 
1249 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()`
1250 @*/
1251 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1252 {
1253   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1254   const PetscInt    *ilocal;
1255   signed char       *rootdata,*leafdata,*leafmem;
1256   const PetscSFNode *iremote;
1257   PetscSFNode       *new_iremote;
1258   MPI_Comm          comm;
1259 
1260   PetscFunctionBegin;
1261   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1262   PetscSFCheckGraphSet(sf,1);
1263   if (nselected) PetscValidIntPointer(selected,3);
1264   PetscValidPointer(esf,4);
1265 
1266   PetscCall(PetscSFSetUp(sf));
1267   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0));
1268   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1269   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
1270 
1271   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1272     PetscBool dups;
1273     PetscCall(PetscCheckDupsInt(nselected,selected,&dups));
1274     PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1275     for (i=0; i<nselected; i++)
1276       PetscCheck(selected[i] >= 0 && selected[i] < nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1277   }
1278 
1279   if (sf->ops->CreateEmbeddedRootSF) {
1280     PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf));
1281   } else {
1282     /* A generic version of creating embedded sf */
1283     PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
1284     maxlocal = maxleaf - minleaf + 1;
1285     PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
1286     leafdata = leafmem - minleaf;
1287     /* Tag selected roots and bcast to leaves */
1288     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1289     PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1290     PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1291 
1292     /* Build esf with leaves that are still connected */
1293     esf_nleaves = 0;
1294     for (i=0; i<nleaves; i++) {
1295       j = ilocal ? ilocal[i] : i;
1296       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1297          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1298       */
1299       esf_nleaves += (leafdata[j] ? 1 : 0);
1300     }
1301     PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal));
1302     PetscCall(PetscMalloc1(esf_nleaves,&new_iremote));
1303     for (i=n=0; i<nleaves; i++) {
1304       j = ilocal ? ilocal[i] : i;
1305       if (leafdata[j]) {
1306         new_ilocal[n]        = j;
1307         new_iremote[n].rank  = iremote[i].rank;
1308         new_iremote[n].index = iremote[i].index;
1309         ++n;
1310       }
1311     }
1312     PetscCall(PetscSFCreate(comm,esf));
1313     PetscCall(PetscSFSetFromOptions(*esf));
1314     PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1315     PetscCall(PetscFree2(rootdata,leafmem));
1316   }
1317   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0));
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /*@C
1322   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1323 
1324   Collective
1325 
1326   Input Parameters:
1327 + sf - original star forest
1328 . nselected  - number of selected leaves on this process
1329 - selected   - indices of the selected leaves on this process
1330 
1331   Output Parameter:
1332 .  newsf - new star forest
1333 
1334   Level: advanced
1335 
1336 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1337 @*/
1338 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1339 {
1340   const PetscSFNode *iremote;
1341   PetscSFNode       *new_iremote;
1342   const PetscInt    *ilocal;
1343   PetscInt          i,nroots,*leaves,*new_ilocal;
1344   MPI_Comm          comm;
1345 
1346   PetscFunctionBegin;
1347   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1348   PetscSFCheckGraphSet(sf,1);
1349   if (nselected) PetscValidIntPointer(selected,3);
1350   PetscValidPointer(newsf,4);
1351 
1352   /* Uniq selected[] and put results in leaves[] */
1353   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1354   PetscCall(PetscMalloc1(nselected,&leaves));
1355   PetscCall(PetscArraycpy(leaves,selected,nselected));
1356   PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves));
1357   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1358 
1359   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1360   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1361     PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf));
1362   } else {
1363     PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote));
1364     PetscCall(PetscMalloc1(nselected,&new_ilocal));
1365     PetscCall(PetscMalloc1(nselected,&new_iremote));
1366     for (i=0; i<nselected; ++i) {
1367       const PetscInt l     = leaves[i];
1368       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1369       new_iremote[i].rank  = iremote[l].rank;
1370       new_iremote[i].index = iremote[l].index;
1371     }
1372     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf));
1373     PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1374   }
1375   PetscCall(PetscFree(leaves));
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 /*@C
1380    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1381 
1382    Collective on PetscSF
1383 
1384    Input Parameters:
1385 +  sf - star forest on which to communicate
1386 .  unit - data type associated with each node
1387 .  rootdata - buffer to broadcast
1388 -  op - operation to use for reduction
1389 
1390    Output Parameter:
1391 .  leafdata - buffer to be reduced with values from each leaf's respective root
1392 
1393    Level: intermediate
1394 
1395    Notes:
1396     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1397     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1398     use PetscSFBcastWithMemTypeBegin() instead.
1399 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1400 @*/
1401 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1402 {
1403   PetscMemType   rootmtype,leafmtype;
1404 
1405   PetscFunctionBegin;
1406   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1407   PetscCall(PetscSFSetUp(sf));
1408   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1409   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1410   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1411   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1412   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /*@C
1417    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1418 
1419    Collective on PetscSF
1420 
1421    Input Parameters:
1422 +  sf - star forest on which to communicate
1423 .  unit - data type associated with each node
1424 .  rootmtype - memory type of rootdata
1425 .  rootdata - buffer to broadcast
1426 .  leafmtype - memory type of leafdata
1427 -  op - operation to use for reduction
1428 
1429    Output Parameter:
1430 .  leafdata - buffer to be reduced with values from each leaf's respective root
1431 
1432    Level: intermediate
1433 
1434 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1435 @*/
1436 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1437 {
1438   PetscFunctionBegin;
1439   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1440   PetscCall(PetscSFSetUp(sf));
1441   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1442   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1443   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /*@C
1448    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1449 
1450    Collective
1451 
1452    Input Parameters:
1453 +  sf - star forest
1454 .  unit - data type
1455 .  rootdata - buffer to broadcast
1456 -  op - operation to use for reduction
1457 
1458    Output Parameter:
1459 .  leafdata - buffer to be reduced with values from each leaf's respective root
1460 
1461    Level: intermediate
1462 
1463 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1464 @*/
1465 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1466 {
1467   PetscFunctionBegin;
1468   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1469   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0));
1470   PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op));
1471   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0));
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 /*@C
1476    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1477 
1478    Collective
1479 
1480    Input Parameters:
1481 +  sf - star forest
1482 .  unit - data type
1483 .  leafdata - values to reduce
1484 -  op - reduction operation
1485 
1486    Output Parameter:
1487 .  rootdata - result of reduction of values from all leaves of each root
1488 
1489    Level: intermediate
1490 
1491    Notes:
1492     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1493     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1494     use PetscSFReduceWithMemTypeBegin() instead.
1495 
1496 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1497 @*/
1498 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1499 {
1500   PetscMemType   rootmtype,leafmtype;
1501 
1502   PetscFunctionBegin;
1503   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1504   PetscCall(PetscSFSetUp(sf));
1505   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1506   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1507   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1508   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1509   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 /*@C
1514    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1515 
1516    Collective
1517 
1518    Input Parameters:
1519 +  sf - star forest
1520 .  unit - data type
1521 .  leafmtype - memory type of leafdata
1522 .  leafdata - values to reduce
1523 .  rootmtype - memory type of rootdata
1524 -  op - reduction operation
1525 
1526    Output Parameter:
1527 .  rootdata - result of reduction of values from all leaves of each root
1528 
1529    Level: intermediate
1530 
1531 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1532 @*/
1533 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1534 {
1535   PetscFunctionBegin;
1536   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1537   PetscCall(PetscSFSetUp(sf));
1538   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1539   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1540   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 /*@C
1545    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1546 
1547    Collective
1548 
1549    Input Parameters:
1550 +  sf - star forest
1551 .  unit - data type
1552 .  leafdata - values to reduce
1553 -  op - reduction operation
1554 
1555    Output Parameter:
1556 .  rootdata - result of reduction of values from all leaves of each root
1557 
1558    Level: intermediate
1559 
1560 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1561 @*/
1562 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1563 {
1564   PetscFunctionBegin;
1565   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1566   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0));
1567   PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op));
1568   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0));
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 /*@C
1573    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1574 
1575    Collective
1576 
1577    Input Parameters:
1578 +  sf - star forest
1579 .  unit - data type
1580 .  leafdata - leaf values to use in reduction
1581 -  op - operation to use for reduction
1582 
1583    Output Parameters:
1584 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1585 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1586 
1587    Level: advanced
1588 
1589    Note:
1590    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1591    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1592    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1593    integers.
1594 
1595 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1596 @*/
1597 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1598 {
1599   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1600 
1601   PetscFunctionBegin;
1602   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1603   PetscCall(PetscSFSetUp(sf));
1604   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1605   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1606   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1607   PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype));
1608   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1609   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1610   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 /*@C
1615    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1616 
1617    Collective
1618 
1619    Input Parameters:
1620 +  sf - star forest
1621 .  unit - data type
1622 .  rootmtype - memory type of rootdata
1623 .  leafmtype - memory type of leafdata
1624 .  leafdata - leaf values to use in reduction
1625 .  leafupdatemtype - memory type of leafupdate
1626 -  op - operation to use for reduction
1627 
1628    Output Parameters:
1629 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1630 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1631 
1632    Level: advanced
1633 
1634    Note: See PetscSFFetchAndOpBegin() for more details.
1635 
1636 .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1637 @*/
1638 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1639 {
1640   PetscFunctionBegin;
1641   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1642   PetscCall(PetscSFSetUp(sf));
1643   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1644   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1645   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1646   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 /*@C
1651    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1652 
1653    Collective
1654 
1655    Input Parameters:
1656 +  sf - star forest
1657 .  unit - data type
1658 .  leafdata - leaf values to use in reduction
1659 -  op - operation to use for reduction
1660 
1661    Output Parameters:
1662 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1663 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1664 
1665    Level: advanced
1666 
1667 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1668 @*/
1669 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1670 {
1671   PetscFunctionBegin;
1672   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1673   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1674   PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op));
1675   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 /*@C
1680    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1681 
1682    Collective
1683 
1684    Input Parameter:
1685 .  sf - star forest
1686 
1687    Output Parameter:
1688 .  degree - degree of each root vertex
1689 
1690    Level: advanced
1691 
1692    Notes:
1693    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1694 
1695 .seealso: `PetscSFGatherBegin()`
1696 @*/
1697 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1698 {
1699   PetscFunctionBegin;
1700   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1701   PetscSFCheckGraphSet(sf,1);
1702   PetscValidPointer(degree,2);
1703   if (!sf->degreeknown) {
1704     PetscInt i, nroots = sf->nroots, maxlocal;
1705     PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1706     maxlocal = sf->maxleaf-sf->minleaf+1;
1707     PetscCall(PetscMalloc1(nroots,&sf->degree));
1708     PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1709     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1710     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1711     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1712   }
1713   *degree = NULL;
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 /*@C
1718    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1719 
1720    Collective
1721 
1722    Input Parameter:
1723 .  sf - star forest
1724 
1725    Output Parameter:
1726 .  degree - degree of each root vertex
1727 
1728    Level: developer
1729 
1730    Notes:
1731    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1732 
1733 .seealso:
1734 @*/
1735 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1736 {
1737   PetscFunctionBegin;
1738   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1739   PetscSFCheckGraphSet(sf,1);
1740   PetscValidPointer(degree,2);
1741   if (!sf->degreeknown) {
1742     PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1743     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1744     PetscCall(PetscFree(sf->degreetmp));
1745     sf->degreeknown = PETSC_TRUE;
1746   }
1747   *degree = sf->degree;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 /*@C
1752    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1753    Each multi-root is assigned index of the corresponding original root.
1754 
1755    Collective
1756 
1757    Input Parameters:
1758 +  sf - star forest
1759 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1760 
1761    Output Parameters:
1762 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1763 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1764 
1765    Level: developer
1766 
1767    Notes:
1768    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1769 
1770 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1771 @*/
1772 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1773 {
1774   PetscSF             msf;
1775   PetscInt            i, j, k, nroots, nmroots;
1776 
1777   PetscFunctionBegin;
1778   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1779   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1780   if (nroots) PetscValidIntPointer(degree,2);
1781   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1782   PetscValidPointer(multiRootsOrigNumbering,4);
1783   PetscCall(PetscSFGetMultiSF(sf,&msf));
1784   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1785   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1786   for (i=0,j=0,k=0; i<nroots; i++) {
1787     if (!degree[i]) continue;
1788     for (j=0; j<degree[i]; j++,k++) {
1789       (*multiRootsOrigNumbering)[k] = i;
1790     }
1791   }
1792   PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1793   if (nMultiRoots) *nMultiRoots = nmroots;
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 /*@C
1798    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1799 
1800    Collective
1801 
1802    Input Parameters:
1803 +  sf - star forest
1804 .  unit - data type
1805 -  leafdata - leaf data to gather to roots
1806 
1807    Output Parameter:
1808 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1809 
1810    Level: intermediate
1811 
1812 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1813 @*/
1814 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1815 {
1816   PetscSF        multi = NULL;
1817 
1818   PetscFunctionBegin;
1819   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1820   PetscCall(PetscSFSetUp(sf));
1821   PetscCall(PetscSFGetMultiSF(sf,&multi));
1822   PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 /*@C
1827    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1828 
1829    Collective
1830 
1831    Input Parameters:
1832 +  sf - star forest
1833 .  unit - data type
1834 -  leafdata - leaf data to gather to roots
1835 
1836    Output Parameter:
1837 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1838 
1839    Level: intermediate
1840 
1841 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1842 @*/
1843 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1844 {
1845   PetscSF        multi = NULL;
1846 
1847   PetscFunctionBegin;
1848   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1849   PetscCall(PetscSFGetMultiSF(sf,&multi));
1850   PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 /*@C
1855    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1856 
1857    Collective
1858 
1859    Input Parameters:
1860 +  sf - star forest
1861 .  unit - data type
1862 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1863 
1864    Output Parameter:
1865 .  leafdata - leaf data to be update with personal data from each respective root
1866 
1867    Level: intermediate
1868 
1869 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1870 @*/
1871 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1872 {
1873   PetscSF        multi = NULL;
1874 
1875   PetscFunctionBegin;
1876   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1877   PetscCall(PetscSFSetUp(sf));
1878   PetscCall(PetscSFGetMultiSF(sf,&multi));
1879   PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 /*@C
1884    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1885 
1886    Collective
1887 
1888    Input Parameters:
1889 +  sf - star forest
1890 .  unit - data type
1891 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1892 
1893    Output Parameter:
1894 .  leafdata - leaf data to be update with personal data from each respective root
1895 
1896    Level: intermediate
1897 
1898 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1899 @*/
1900 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1901 {
1902   PetscSF        multi = NULL;
1903 
1904   PetscFunctionBegin;
1905   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1906   PetscCall(PetscSFGetMultiSF(sf,&multi));
1907   PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1912 {
1913   PetscInt        i, n, nleaves;
1914   const PetscInt *ilocal = NULL;
1915   PetscHSetI      seen;
1916 
1917   PetscFunctionBegin;
1918   if (PetscDefined(USE_DEBUG)) {
1919     PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL));
1920     PetscCall(PetscHSetICreate(&seen));
1921     for (i = 0; i < nleaves; i++) {
1922       const PetscInt leaf = ilocal ? ilocal[i] : i;
1923       PetscCall(PetscHSetIAdd(seen,leaf));
1924     }
1925     PetscCall(PetscHSetIGetSize(seen,&n));
1926     PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1927     PetscCall(PetscHSetIDestroy(&seen));
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 /*@
1933   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1934 
1935   Input Parameters:
1936 + sfA - The first PetscSF
1937 - sfB - The second PetscSF
1938 
1939   Output Parameters:
1940 . sfBA - The composite SF
1941 
1942   Level: developer
1943 
1944   Notes:
1945   Currently, the two SFs must be defined on congruent communicators and they must be true star
1946   forests, i.e. the same leaf is not connected with different roots.
1947 
1948   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1949   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1950   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1951   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1952 
1953 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1954 @*/
1955 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1956 {
1957   const PetscSFNode *remotePointsA,*remotePointsB;
1958   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1959   const PetscInt    *localPointsA,*localPointsB;
1960   PetscInt          *localPointsBA;
1961   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1962   PetscBool         denseB;
1963 
1964   PetscFunctionBegin;
1965   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1966   PetscSFCheckGraphSet(sfA,1);
1967   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1968   PetscSFCheckGraphSet(sfB,2);
1969   PetscCheckSameComm(sfA,1,sfB,2);
1970   PetscValidPointer(sfBA,3);
1971   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1972   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1973 
1974   PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA));
1975   PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB));
1976   if (localPointsA) {
1977     PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA));
1978     for (i=0; i<numRootsB; i++) {
1979       reorderedRemotePointsA[i].rank = -1;
1980       reorderedRemotePointsA[i].index = -1;
1981     }
1982     for (i=0; i<numLeavesA; i++) {
1983       if (localPointsA[i] >= numRootsB) continue;
1984       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1985     }
1986     remotePointsA = reorderedRemotePointsA;
1987   }
1988   PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf));
1989   PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB));
1990   PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1991   PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1992   PetscCall(PetscFree(reorderedRemotePointsA));
1993 
1994   denseB = (PetscBool)!localPointsB;
1995   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1996     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1997     else numLeavesBA++;
1998   }
1999   if (denseB) {
2000     localPointsBA  = NULL;
2001     remotePointsBA = leafdataB;
2002   } else {
2003     PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA));
2004     PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA));
2005     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2006       const PetscInt l = localPointsB ? localPointsB[i] : i;
2007 
2008       if (leafdataB[l-minleaf].rank == -1) continue;
2009       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2010       localPointsBA[numLeavesBA] = l;
2011       numLeavesBA++;
2012     }
2013     PetscCall(PetscFree(leafdataB));
2014   }
2015   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2016   PetscCall(PetscSFSetFromOptions(*sfBA));
2017   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 /*@
2022   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2023 
2024   Input Parameters:
2025 + sfA - The first PetscSF
2026 - sfB - The second PetscSF
2027 
2028   Output Parameters:
2029 . sfBA - The composite SF.
2030 
2031   Level: developer
2032 
2033   Notes:
2034   Currently, the two SFs must be defined on congruent communicators and they must be true star
2035   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2036   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2037 
2038   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2039   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2040   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2041   a Reduce on sfB, on connected roots.
2042 
2043 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2044 @*/
2045 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2046 {
2047   const PetscSFNode *remotePointsA,*remotePointsB;
2048   PetscSFNode       *remotePointsBA;
2049   const PetscInt    *localPointsA,*localPointsB;
2050   PetscSFNode       *reorderedRemotePointsA = NULL;
2051   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2052   MPI_Op            op;
2053 #if defined(PETSC_USE_64BIT_INDICES)
2054   PetscBool         iswin;
2055 #endif
2056 
2057   PetscFunctionBegin;
2058   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2059   PetscSFCheckGraphSet(sfA,1);
2060   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2061   PetscSFCheckGraphSet(sfB,2);
2062   PetscCheckSameComm(sfA,1,sfB,2);
2063   PetscValidPointer(sfBA,3);
2064   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2065   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2066 
2067   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2068   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2069 
2070   /* TODO: Check roots of sfB have degree of 1 */
2071   /* Once we implement it, we can replace the MPI_MAXLOC
2072      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2073      We use MPI_MAXLOC only to have a deterministic output from this routine if
2074      the root condition is not meet.
2075    */
2076   op = MPI_MAXLOC;
2077 #if defined(PETSC_USE_64BIT_INDICES)
2078   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2079   PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin));
2080   if (iswin) op = MPI_REPLACE;
2081 #endif
2082 
2083   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2084   PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA));
2085   for (i=0; i<maxleaf - minleaf + 1; i++) {
2086     reorderedRemotePointsA[i].rank = -1;
2087     reorderedRemotePointsA[i].index = -1;
2088   }
2089   if (localPointsA) {
2090     for (i=0; i<numLeavesA; i++) {
2091       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2092       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2093     }
2094   } else {
2095     for (i=0; i<numLeavesA; i++) {
2096       if (i > maxleaf || i < minleaf) continue;
2097       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2098     }
2099   }
2100 
2101   PetscCall(PetscMalloc1(numRootsB,&localPointsBA));
2102   PetscCall(PetscMalloc1(numRootsB,&remotePointsBA));
2103   for (i=0; i<numRootsB; i++) {
2104     remotePointsBA[i].rank = -1;
2105     remotePointsBA[i].index = -1;
2106   }
2107 
2108   PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2109   PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2110   PetscCall(PetscFree(reorderedRemotePointsA));
2111   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2112     if (remotePointsBA[i].rank == -1) continue;
2113     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2114     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2115     localPointsBA[numLeavesBA] = i;
2116     numLeavesBA++;
2117   }
2118   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2119   PetscCall(PetscSFSetFromOptions(*sfBA));
2120   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 /*
2125   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2126 
2127   Input Parameters:
2128 . sf - The global PetscSF
2129 
2130   Output Parameters:
2131 . out - The local PetscSF
2132  */
2133 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2134 {
2135   MPI_Comm           comm;
2136   PetscMPIInt        myrank;
2137   const PetscInt     *ilocal;
2138   const PetscSFNode  *iremote;
2139   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2140   PetscSFNode        *liremote;
2141   PetscSF            lsf;
2142 
2143   PetscFunctionBegin;
2144   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2145   if (sf->ops->CreateLocalSF) {
2146     PetscCall((*sf->ops->CreateLocalSF)(sf,out));
2147   } else {
2148     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2149     PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
2150     PetscCallMPI(MPI_Comm_rank(comm,&myrank));
2151 
2152     /* Find out local edges and build a local SF */
2153     PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
2154     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2155     PetscCall(PetscMalloc1(lnleaves,&lilocal));
2156     PetscCall(PetscMalloc1(lnleaves,&liremote));
2157 
2158     for (i=j=0; i<nleaves; i++) {
2159       if (iremote[i].rank == (PetscInt)myrank) {
2160         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2161         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2162         liremote[j].index = iremote[i].index;
2163         j++;
2164       }
2165     }
2166     PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf));
2167     PetscCall(PetscSFSetFromOptions(lsf));
2168     PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER));
2169     PetscCall(PetscSFSetUp(lsf));
2170     *out = lsf;
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2176 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2177 {
2178   PetscMemType       rootmtype,leafmtype;
2179 
2180   PetscFunctionBegin;
2181   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2182   PetscCall(PetscSFSetUp(sf));
2183   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
2184   PetscCall(PetscGetMemType(rootdata,&rootmtype));
2185   PetscCall(PetscGetMemType(leafdata,&leafmtype));
2186   if (sf->ops->BcastToZero) {
2187     PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata));
2188   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2189   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /*@
2194   PetscSFConcatenate - concatenate multiple SFs into one
2195 
2196   Input Parameters:
2197 + comm - the communicator
2198 . nsfs - the number of input PetscSF
2199 . sfs  - the array of input PetscSF
2200 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2201 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2202 
2203   Output Parameters:
2204 . newsf - The resulting PetscSF
2205 
2206   Level: developer
2207 
2208   Notes:
2209   The communicator of all SFs in sfs must be comm.
2210 
2211   The offsets in leafOffsets are added to the original leaf indices.
2212 
2213   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2214   In this case, NULL is also passed as ilocal to the resulting SF.
2215 
2216   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2217   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2218 
2219 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2220 @*/
2221 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2222 {
2223   PetscInt            i, s, nLeaves, nRoots;
2224   PetscInt           *leafArrayOffsets;
2225   PetscInt           *ilocal_new;
2226   PetscSFNode        *iremote_new;
2227   PetscInt           *rootOffsets;
2228   PetscBool           all_ilocal_null = PETSC_FALSE;
2229   PetscMPIInt         rank;
2230 
2231   PetscFunctionBegin;
2232   {
2233     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2234 
2235     PetscCall(PetscSFCreate(comm,&dummy));
2236     PetscValidLogicalCollectiveInt(dummy,nsfs,2);
2237     PetscValidPointer(sfs,3);
2238     for (i=0; i<nsfs; i++) {
2239       PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3);
2240       PetscCheckSameComm(dummy,1,sfs[i],3);
2241     }
2242     PetscValidLogicalCollectiveBool(dummy,shareRoots,4);
2243     if (leafOffsets) PetscValidIntPointer(leafOffsets,5);
2244     PetscValidPointer(newsf,6);
2245     PetscCall(PetscSFDestroy(&dummy));
2246   }
2247   if (!nsfs) {
2248     PetscCall(PetscSFCreate(comm, newsf));
2249     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2250     PetscFunctionReturn(0);
2251   }
2252   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2253 
2254   PetscCall(PetscCalloc1(nsfs+1, &rootOffsets));
2255   if (shareRoots) {
2256     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2257     if (PetscDefined(USE_DEBUG)) {
2258       for (s=1; s<nsfs; s++) {
2259         PetscInt nr;
2260 
2261         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2262         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2263       }
2264     }
2265   } else {
2266     for (s=0; s<nsfs; s++) {
2267       PetscInt nr;
2268 
2269       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2270       rootOffsets[s+1] = rootOffsets[s] + nr;
2271     }
2272     nRoots = rootOffsets[nsfs];
2273   }
2274 
2275   /* Calculate leaf array offsets and automatic root offsets */
2276   PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets));
2277   leafArrayOffsets[0] = 0;
2278   for (s=0; s<nsfs; s++) {
2279     PetscInt        nl;
2280 
2281     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2282     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2283   }
2284   nLeaves = leafArrayOffsets[nsfs];
2285 
2286   if (!leafOffsets) {
2287     all_ilocal_null = PETSC_TRUE;
2288     for (s=0; s<nsfs; s++) {
2289       const PetscInt *ilocal;
2290 
2291       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2292       if (ilocal) {
2293         all_ilocal_null = PETSC_FALSE;
2294         break;
2295       }
2296     }
2297     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2298   }
2299 
2300   /* Renumber and concatenate local leaves */
2301   ilocal_new = NULL;
2302   if (!all_ilocal_null) {
2303     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2304     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2305     for (s = 0; s<nsfs; s++) {
2306       const PetscInt   *ilocal;
2307       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2308       PetscInt          i, nleaves_l;
2309 
2310       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2311       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2312     }
2313   }
2314 
2315   /* Renumber and concatenate remote roots */
2316   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2317   for (i = 0; i < nLeaves; i++) {
2318     iremote_new[i].rank   = -1;
2319     iremote_new[i].index  = -1;
2320   }
2321   for (s = 0; s<nsfs; s++) {
2322     PetscInt            i, nl, nr;
2323     PetscSF             tmp_sf;
2324     const PetscSFNode  *iremote;
2325     PetscSFNode        *tmp_rootdata;
2326     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2327 
2328     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2329     PetscCall(PetscSFCreate(comm, &tmp_sf));
2330     /* create helper SF with contiguous leaves */
2331     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES));
2332     PetscCall(PetscSFSetUp(tmp_sf));
2333     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2334     for (i = 0; i < nr; i++) {
2335       tmp_rootdata[i].index = i + rootOffsets[s];
2336       tmp_rootdata[i].rank  = (PetscInt) rank;
2337     }
2338     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2339     PetscCall(PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2340     PetscCall(PetscSFDestroy(&tmp_sf));
2341     PetscCall(PetscFree(tmp_rootdata));
2342   }
2343 
2344   /* Build the new SF */
2345   PetscCall(PetscSFCreate(comm, newsf));
2346   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2347   PetscCall(PetscSFSetUp(*newsf));
2348   PetscCall(PetscFree(rootOffsets));
2349   PetscCall(PetscFree(leafArrayOffsets));
2350   PetscFunctionReturn(0);
2351 }
2352