xref: /petsc/src/vec/is/sf/interface/sf.c (revision 7c441f3aff93c611491d4ea0564d57010b1fd4e9)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF, int);
16 #else
17   #if defined(PETSC_USE_DEBUG)
18     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME);
19   #else
20     #define PetscSFCheckGraphSet(sf, arg) \
21       do { \
22       } while (0)
23   #endif
24 #endif
25 
26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
27 
28 /*@
29    PetscSFCreate - create a star forest communication context
30 
31    Collective
32 
33    Input Parameter:
34 .  comm - communicator on which the star forest will operate
35 
36    Output Parameter:
37 .  sf - new star forest context
38 
39    Options Database Keys:
40 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
41 .  -sf_type window    -Use MPI-3 one-sided window for communication
42 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
43 
44    Level: intermediate
45 
46    Notes:
47    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
48    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
49    SFs are optimized and they have better performance than general SFs.
50 
51 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
52 @*/
53 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
54 {
55   PetscSF b;
56 
57   PetscFunctionBegin;
58   PetscValidPointer(sf, 2);
59   PetscCall(PetscSFInitializePackage());
60 
61   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
62 
63   b->nroots    = -1;
64   b->nleaves   = -1;
65   b->minleaf   = PETSC_MAX_INT;
66   b->maxleaf   = PETSC_MIN_INT;
67   b->nranks    = -1;
68   b->rankorder = PETSC_TRUE;
69   b->ingroup   = MPI_GROUP_NULL;
70   b->outgroup  = MPI_GROUP_NULL;
71   b->graphset  = PETSC_FALSE;
72 #if defined(PETSC_HAVE_DEVICE)
73   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
74   b->use_stream_aware_mpi = PETSC_FALSE;
75   b->unknown_input_stream = PETSC_FALSE;
76   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
77   b->backend = PETSCSF_BACKEND_KOKKOS;
78   #elif defined(PETSC_HAVE_CUDA)
79   b->backend = PETSCSF_BACKEND_CUDA;
80   #elif defined(PETSC_HAVE_HIP)
81   b->backend = PETSCSF_BACKEND_HIP;
82   #endif
83 
84   #if defined(PETSC_HAVE_NVSHMEM)
85   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
86   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
87   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
88   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
89   #endif
90 #endif
91   b->vscat.from_n = -1;
92   b->vscat.to_n   = -1;
93   b->vscat.unit   = MPIU_SCALAR;
94   *sf             = b;
95   PetscFunctionReturn(0);
96 }
97 
98 /*@
99    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
100 
101    Collective
102 
103    Input Parameter:
104 .  sf - star forest
105 
106    Level: advanced
107 
108 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
109 @*/
110 PetscErrorCode PetscSFReset(PetscSF sf)
111 {
112   PetscFunctionBegin;
113   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
114   PetscTryTypeMethod(sf, Reset);
115   sf->nroots   = -1;
116   sf->nleaves  = -1;
117   sf->minleaf  = PETSC_MAX_INT;
118   sf->maxleaf  = PETSC_MIN_INT;
119   sf->mine     = NULL;
120   sf->remote   = NULL;
121   sf->graphset = PETSC_FALSE;
122   PetscCall(PetscFree(sf->mine_alloc));
123   PetscCall(PetscFree(sf->remote_alloc));
124   sf->nranks = -1;
125   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
126   sf->degreeknown = PETSC_FALSE;
127   PetscCall(PetscFree(sf->degree));
128   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
129   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
130   if (sf->multi) sf->multi->multi = NULL;
131   PetscCall(PetscSFDestroy(&sf->multi));
132   PetscCall(PetscLayoutDestroy(&sf->map));
133 
134 #if defined(PETSC_HAVE_DEVICE)
135   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
136 #endif
137 
138   sf->setupcalled = PETSC_FALSE;
139   PetscFunctionReturn(0);
140 }
141 
142 /*@C
143    PetscSFSetType - Set the PetscSF communication implementation
144 
145    Collective on PetscSF
146 
147    Input Parameters:
148 +  sf - the PetscSF context
149 -  type - a known method
150 
151    Options Database Key:
152 .  -sf_type <type> - Sets the method; use -help for a list
153    of available methods (for instance, window, basic, neighbor)
154 
155    Notes:
156    See "include/petscsf.h" for available methods (for instance)
157 +    PETSCSFWINDOW - MPI-2/3 one-sided
158 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
159 
160   Level: intermediate
161 
162 .seealso: `PetscSFType`, `PetscSFCreate()`
163 @*/
164 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
165 {
166   PetscBool match;
167   PetscErrorCode (*r)(PetscSF);
168 
169   PetscFunctionBegin;
170   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
171   PetscValidCharPointer(type, 2);
172 
173   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
174   if (match) PetscFunctionReturn(0);
175 
176   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
177   PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
178   /* Destroy the previous PetscSF implementation context */
179   PetscTryTypeMethod(sf, Destroy);
180   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
181   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
182   PetscCall((*r)(sf));
183   PetscFunctionReturn(0);
184 }
185 
186 /*@C
187   PetscSFGetType - Get the PetscSF communication implementation
188 
189   Not Collective
190 
191   Input Parameter:
192 . sf  - the PetscSF context
193 
194   Output Parameter:
195 . type - the PetscSF type name
196 
197   Level: intermediate
198 
199 .seealso: `PetscSFSetType()`, `PetscSFCreate()`
200 @*/
201 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
202 {
203   PetscFunctionBegin;
204   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
205   PetscValidPointer(type, 2);
206   *type = ((PetscObject)sf)->type_name;
207   PetscFunctionReturn(0);
208 }
209 
210 /*@C
211    PetscSFDestroy - destroy star forest
212 
213    Collective
214 
215    Input Parameter:
216 .  sf - address of star forest
217 
218    Level: intermediate
219 
220 .seealso: `PetscSFCreate()`, `PetscSFReset()`
221 @*/
222 PetscErrorCode PetscSFDestroy(PetscSF *sf)
223 {
224   PetscFunctionBegin;
225   if (!*sf) PetscFunctionReturn(0);
226   PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1);
227   if (--((PetscObject)(*sf))->refct > 0) {
228     *sf = NULL;
229     PetscFunctionReturn(0);
230   }
231   PetscCall(PetscSFReset(*sf));
232   PetscTryTypeMethod((*sf), Destroy);
233   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
234   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
235   PetscCall(PetscHeaderDestroy(sf));
236   PetscFunctionReturn(0);
237 }
238 
239 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
240 {
241   PetscInt           i, nleaves;
242   PetscMPIInt        size;
243   const PetscInt    *ilocal;
244   const PetscSFNode *iremote;
245 
246   PetscFunctionBegin;
247   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
248   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
249   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
250   for (i = 0; i < nleaves; i++) {
251     const PetscInt rank   = iremote[i].rank;
252     const PetscInt remote = iremote[i].index;
253     const PetscInt leaf   = ilocal ? ilocal[i] : i;
254     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
255     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
256     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 /*@
262    PetscSFSetUp - set up communication structures
263 
264    Collective
265 
266    Input Parameter:
267 .  sf - star forest communication object
268 
269    Level: beginner
270 
271 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()`
272 @*/
273 PetscErrorCode PetscSFSetUp(PetscSF sf)
274 {
275   PetscFunctionBegin;
276   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
277   PetscSFCheckGraphSet(sf, 1);
278   if (sf->setupcalled) PetscFunctionReturn(0);
279   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
280   PetscCall(PetscSFCheckGraphValid_Private(sf));
281   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
282   PetscTryTypeMethod(sf, SetUp);
283 #if defined(PETSC_HAVE_CUDA)
284   if (sf->backend == PETSCSF_BACKEND_CUDA) {
285     sf->ops->Malloc = PetscSFMalloc_CUDA;
286     sf->ops->Free   = PetscSFFree_CUDA;
287   }
288 #endif
289 #if defined(PETSC_HAVE_HIP)
290   if (sf->backend == PETSCSF_BACKEND_HIP) {
291     sf->ops->Malloc = PetscSFMalloc_HIP;
292     sf->ops->Free   = PetscSFFree_HIP;
293   }
294 #endif
295 
296 #
297 #if defined(PETSC_HAVE_KOKKOS)
298   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
299     sf->ops->Malloc = PetscSFMalloc_Kokkos;
300     sf->ops->Free   = PetscSFFree_Kokkos;
301   }
302 #endif
303   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
304   sf->setupcalled = PETSC_TRUE;
305   PetscFunctionReturn(0);
306 }
307 
308 /*@
309    PetscSFSetFromOptions - set PetscSF options using the options database
310 
311    Logically Collective
312 
313    Input Parameter:
314 .  sf - star forest
315 
316    Options Database Keys:
317 +  -sf_type               - implementation type, see PetscSFSetType()
318 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
319 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
320                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
321                             If true, this option only works with -use_gpu_aware_mpi 1.
322 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
323                                If true, this option only works with -use_gpu_aware_mpi 1.
324 
325 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
326                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
327                               the only available is kokkos.
328 
329    Level: intermediate
330 @*/
331 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
332 {
333   PetscSFType deft;
334   char        type[256];
335   PetscBool   flg;
336 
337   PetscFunctionBegin;
338   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
339   PetscObjectOptionsBegin((PetscObject)sf);
340   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
341   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
342   PetscCall(PetscSFSetType(sf, flg ? type : deft));
343   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
344 #if defined(PETSC_HAVE_DEVICE)
345   {
346     char      backendstr[32] = {0};
347     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
348     /* Change the defaults set in PetscSFCreate() with command line options */
349     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
350     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
351     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
352     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
353     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
354     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
355   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
356     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
357     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
358     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
359     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
360   #elif defined(PETSC_HAVE_KOKKOS)
361     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
362   #endif
363   }
364 #endif
365   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
366   PetscOptionsEnd();
367   PetscFunctionReturn(0);
368 }
369 
370 /*@
371    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
372 
373    Logically Collective
374 
375    Input Parameters:
376 +  sf - star forest
377 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
378 
379    Level: advanced
380 
381 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
382 @*/
383 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
384 {
385   PetscFunctionBegin;
386   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
387   PetscValidLogicalCollectiveBool(sf, flg, 2);
388   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
389   sf->rankorder = flg;
390   PetscFunctionReturn(0);
391 }
392 
393 /*@C
394    PetscSFSetGraph - Set a parallel star forest
395 
396    Collective
397 
398    Input Parameters:
399 +  sf - star forest
400 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
401 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
402 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
403 during setup in debug mode)
404 .  localmode - copy mode for ilocal
405 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
406 during setup in debug mode)
407 -  remotemode - copy mode for iremote
408 
409    Level: intermediate
410 
411    Notes:
412    Leaf indices in ilocal must be unique, otherwise an error occurs.
413 
414    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
415    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
416    PETSc might modify the respective array;
417    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
418    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
419 
420    Fortran Notes:
421    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
422 
423    Developer Notes:
424    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
425    This also allows to compare leaf sets of two SFs easily.
426 
427 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
428 @*/
429 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
430 {
431   PetscBool unique, contiguous;
432 
433   PetscFunctionBegin;
434   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
435   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4);
436   if (nleaves > 0) PetscValidPointer(iremote, 6);
437   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
438   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
439   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
440    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
441   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
442   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
443 
444   if (sf->nroots >= 0) { /* Reset only if graph already set */
445     PetscCall(PetscSFReset(sf));
446   }
447 
448   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
449 
450   sf->nroots  = nroots;
451   sf->nleaves = nleaves;
452 
453   if (localmode == PETSC_COPY_VALUES && ilocal) {
454     PetscInt *tlocal = NULL;
455 
456     PetscCall(PetscMalloc1(nleaves, &tlocal));
457     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
458     ilocal = tlocal;
459   }
460   if (remotemode == PETSC_COPY_VALUES) {
461     PetscSFNode *tremote = NULL;
462 
463     PetscCall(PetscMalloc1(nleaves, &tremote));
464     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
465     iremote = tremote;
466   }
467 
468   if (nleaves && ilocal) {
469     PetscSFNode work;
470 
471     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
472     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
473     unique = PetscNot(unique);
474     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
475     sf->minleaf = ilocal[0];
476     sf->maxleaf = ilocal[nleaves - 1];
477     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
478   } else {
479     sf->minleaf = 0;
480     sf->maxleaf = nleaves - 1;
481     unique      = PETSC_TRUE;
482     contiguous  = PETSC_TRUE;
483   }
484 
485   if (contiguous) {
486     if (localmode == PETSC_USE_POINTER) {
487       ilocal = NULL;
488     } else {
489       PetscCall(PetscFree(ilocal));
490     }
491   }
492   sf->mine = ilocal;
493   if (localmode == PETSC_USE_POINTER) {
494     sf->mine_alloc = NULL;
495   } else {
496     sf->mine_alloc = ilocal;
497   }
498   sf->remote = iremote;
499   if (remotemode == PETSC_USE_POINTER) {
500     sf->remote_alloc = NULL;
501   } else {
502     sf->remote_alloc = iremote;
503   }
504   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
505   sf->graphset = PETSC_TRUE;
506   PetscFunctionReturn(0);
507 }
508 
509 /*@
510   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
511 
512   Collective
513 
514   Input Parameters:
515 + sf      - The PetscSF
516 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
517 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
518 
519   Notes:
520   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
521   n and N are local and global sizes of x respectively.
522 
523   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
524   sequential vectors y on all ranks.
525 
526   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
527   sequential vector y on rank 0.
528 
529   In above cases, entries of x are roots and entries of y are leaves.
530 
531   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
532   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
533   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
534   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
535   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
536 
537   In this case, roots and leaves are symmetric.
538 
539   Level: intermediate
540  @*/
541 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
542 {
543   MPI_Comm    comm;
544   PetscInt    n, N, res[2];
545   PetscMPIInt rank, size;
546   PetscSFType type;
547 
548   PetscFunctionBegin;
549   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
550   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2);
551   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
552   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
553   PetscCallMPI(MPI_Comm_rank(comm, &rank));
554   PetscCallMPI(MPI_Comm_size(comm, &size));
555 
556   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
557     type = PETSCSFALLTOALL;
558     PetscCall(PetscLayoutCreate(comm, &sf->map));
559     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
560     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
561     PetscCall(PetscLayoutSetUp(sf->map));
562   } else {
563     PetscCall(PetscLayoutGetLocalSize(map, &n));
564     PetscCall(PetscLayoutGetSize(map, &N));
565     res[0] = n;
566     res[1] = -n;
567     /* Check if n are same over all ranks so that we can optimize it */
568     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
569     if (res[0] == -res[1]) { /* same n */
570       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
571     } else {
572       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
573     }
574     PetscCall(PetscLayoutReference(map, &sf->map));
575   }
576   PetscCall(PetscSFSetType(sf, type));
577 
578   sf->pattern = pattern;
579   sf->mine    = NULL; /* Contiguous */
580 
581   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
582      Also set other easy stuff.
583    */
584   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
585     sf->nleaves = N;
586     sf->nroots  = n;
587     sf->nranks  = size;
588     sf->minleaf = 0;
589     sf->maxleaf = N - 1;
590   } else if (pattern == PETSCSF_PATTERN_GATHER) {
591     sf->nleaves = rank ? 0 : N;
592     sf->nroots  = n;
593     sf->nranks  = rank ? 0 : size;
594     sf->minleaf = 0;
595     sf->maxleaf = rank ? -1 : N - 1;
596   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
597     sf->nleaves = size;
598     sf->nroots  = size;
599     sf->nranks  = size;
600     sf->minleaf = 0;
601     sf->maxleaf = size - 1;
602   }
603   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
604   sf->graphset = PETSC_TRUE;
605   PetscFunctionReturn(0);
606 }
607 
608 /*@
609    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
610 
611    Collective
612 
613    Input Parameter:
614 .  sf - star forest to invert
615 
616    Output Parameter:
617 .  isf - inverse of sf
618 
619    Level: advanced
620 
621    Notes:
622    All roots must have degree 1.
623 
624    The local space may be a permutation, but cannot be sparse.
625 
626 .seealso: `PetscSFSetGraph()`
627 @*/
628 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
629 {
630   PetscMPIInt     rank;
631   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
632   const PetscInt *ilocal;
633   PetscSFNode    *roots, *leaves;
634 
635   PetscFunctionBegin;
636   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
637   PetscSFCheckGraphSet(sf, 1);
638   PetscValidPointer(isf, 2);
639 
640   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
641   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
642 
643   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
644   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
645   for (i = 0; i < maxlocal; i++) {
646     leaves[i].rank  = rank;
647     leaves[i].index = i;
648   }
649   for (i = 0; i < nroots; i++) {
650     roots[i].rank  = -1;
651     roots[i].index = -1;
652   }
653   PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
654   PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
655 
656   /* Check whether our leaves are sparse */
657   for (i = 0, count = 0; i < nroots; i++)
658     if (roots[i].rank >= 0) count++;
659   if (count == nroots) newilocal = NULL;
660   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
661     for (i = 0, count = 0; i < nroots; i++) {
662       if (roots[i].rank >= 0) {
663         newilocal[count]   = i;
664         roots[count].rank  = roots[i].rank;
665         roots[count].index = roots[i].index;
666         count++;
667       }
668     }
669   }
670 
671   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
672   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
673   PetscCall(PetscFree2(roots, leaves));
674   PetscFunctionReturn(0);
675 }
676 
677 /*@
678    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
679 
680    Collective
681 
682    Input Parameters:
683 +  sf - communication object to duplicate
684 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
685 
686    Output Parameter:
687 .  newsf - new communication object
688 
689    Level: beginner
690 
691 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
692 @*/
693 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
694 {
695   PetscSFType  type;
696   MPI_Datatype dtype = MPIU_SCALAR;
697 
698   PetscFunctionBegin;
699   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
700   PetscValidLogicalCollectiveEnum(sf, opt, 2);
701   PetscValidPointer(newsf, 3);
702   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
703   PetscCall(PetscSFGetType(sf, &type));
704   if (type) PetscCall(PetscSFSetType(*newsf, type));
705   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
706   if (opt == PETSCSF_DUPLICATE_GRAPH) {
707     PetscSFCheckGraphSet(sf, 1);
708     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
709       PetscInt           nroots, nleaves;
710       const PetscInt    *ilocal;
711       const PetscSFNode *iremote;
712       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
713       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
714     } else {
715       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
716     }
717   }
718   /* Since oldtype is committed, so is newtype, according to MPI */
719   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
720   (*newsf)->vscat.bs     = sf->vscat.bs;
721   (*newsf)->vscat.unit   = dtype;
722   (*newsf)->vscat.to_n   = sf->vscat.to_n;
723   (*newsf)->vscat.from_n = sf->vscat.from_n;
724   /* Do not copy lsf. Build it on demand since it is rarely used */
725 
726 #if defined(PETSC_HAVE_DEVICE)
727   (*newsf)->backend              = sf->backend;
728   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
729   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
730   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
731 #endif
732   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
733   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
734   PetscFunctionReturn(0);
735 }
736 
737 /*@C
738    PetscSFGetGraph - Get the graph specifying a parallel star forest
739 
740    Not Collective
741 
742    Input Parameter:
743 .  sf - star forest
744 
745    Output Parameters:
746 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
747 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
748 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
749 -  iremote - remote locations of root vertices for each leaf on the current process
750 
751    Notes:
752      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
753 
754      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
755      see its manpage for details.
756 
757    Fortran Notes:
758      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
759      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
760 
761      To check for a NULL ilocal use
762 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
763 
764    Level: intermediate
765 
766 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
767 @*/
768 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
769 {
770   PetscFunctionBegin;
771   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
772   if (sf->ops->GetGraph) {
773     PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote));
774   } else {
775     if (nroots) *nroots = sf->nroots;
776     if (nleaves) *nleaves = sf->nleaves;
777     if (ilocal) *ilocal = sf->mine;
778     if (iremote) *iremote = sf->remote;
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 /*@
784    PetscSFGetLeafRange - Get the active leaf ranges
785 
786    Not Collective
787 
788    Input Parameter:
789 .  sf - star forest
790 
791    Output Parameters:
792 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
793 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
794 
795    Level: developer
796 
797 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
798 @*/
799 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
800 {
801   PetscFunctionBegin;
802   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
803   PetscSFCheckGraphSet(sf, 1);
804   if (minleaf) *minleaf = sf->minleaf;
805   if (maxleaf) *maxleaf = sf->maxleaf;
806   PetscFunctionReturn(0);
807 }
808 
809 /*@C
810    PetscSFViewFromOptions - View from Options
811 
812    Collective on PetscSF
813 
814    Input Parameters:
815 +  A - the star forest
816 .  obj - Optional object
817 -  name - command line option
818 
819    Level: intermediate
820 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
821 @*/
822 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
823 {
824   PetscFunctionBegin;
825   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
826   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
827   PetscFunctionReturn(0);
828 }
829 
830 /*@C
831    PetscSFView - view a star forest
832 
833    Collective
834 
835    Input Parameters:
836 +  sf - star forest
837 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
838 
839    Level: beginner
840 
841 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`
842 @*/
843 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
844 {
845   PetscBool         iascii;
846   PetscViewerFormat format;
847 
848   PetscFunctionBegin;
849   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
850   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
851   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
852   PetscCheckSameComm(sf, 1, viewer, 2);
853   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
854   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
855   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
856     PetscMPIInt rank;
857     PetscInt    ii, i, j;
858 
859     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
860     PetscCall(PetscViewerASCIIPushTab(viewer));
861     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
862       if (!sf->graphset) {
863         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
864         PetscCall(PetscViewerASCIIPopTab(viewer));
865         PetscFunctionReturn(0);
866       }
867       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
868       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
869       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
870       for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
871       PetscCall(PetscViewerFlush(viewer));
872       PetscCall(PetscViewerGetFormat(viewer, &format));
873       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
874         PetscMPIInt *tmpranks, *perm;
875         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
876         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
877         for (i = 0; i < sf->nranks; i++) perm[i] = i;
878         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
879         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
880         for (ii = 0; ii < sf->nranks; ii++) {
881           i = perm[ii];
882           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
883           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
884         }
885         PetscCall(PetscFree2(tmpranks, perm));
886       }
887       PetscCall(PetscViewerFlush(viewer));
888       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
889     }
890     PetscCall(PetscViewerASCIIPopTab(viewer));
891   }
892   PetscTryTypeMethod(sf, View, viewer);
893   PetscFunctionReturn(0);
894 }
895 
896 /*@C
897    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
898 
899    Not Collective
900 
901    Input Parameter:
902 .  sf - star forest
903 
904    Output Parameters:
905 +  nranks - number of ranks referenced by local part
906 .  ranks - array of ranks
907 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
908 .  rmine - concatenated array holding local indices referencing each remote rank
909 -  rremote - concatenated array holding remote indices referenced for each remote rank
910 
911    Level: developer
912 
913 .seealso: `PetscSFGetLeafRanks()`
914 @*/
915 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
916 {
917   PetscFunctionBegin;
918   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
919   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
920   if (sf->ops->GetRootRanks) {
921     PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote));
922   } else {
923     /* The generic implementation */
924     if (nranks) *nranks = sf->nranks;
925     if (ranks) *ranks = sf->ranks;
926     if (roffset) *roffset = sf->roffset;
927     if (rmine) *rmine = sf->rmine;
928     if (rremote) *rremote = sf->rremote;
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 /*@C
934    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
935 
936    Not Collective
937 
938    Input Parameter:
939 .  sf - star forest
940 
941    Output Parameters:
942 +  niranks - number of leaf ranks referencing roots on this process
943 .  iranks - array of ranks
944 .  ioffset - offset in irootloc for each rank (length niranks+1)
945 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
946 
947    Level: developer
948 
949 .seealso: `PetscSFGetRootRanks()`
950 @*/
951 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
952 {
953   PetscFunctionBegin;
954   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
955   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
956   if (sf->ops->GetLeafRanks) {
957     PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc));
958   } else {
959     PetscSFType type;
960     PetscCall(PetscSFGetType(sf, &type));
961     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
967 {
968   PetscInt i;
969   for (i = 0; i < n; i++) {
970     if (needle == list[i]) return PETSC_TRUE;
971   }
972   return PETSC_FALSE;
973 }
974 
975 /*@C
976    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
977 
978    Collective
979 
980    Input Parameters:
981 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
982 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
983 
984    Level: developer
985 
986 .seealso: `PetscSFGetRootRanks()`
987 @*/
988 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
989 {
990   PetscTable         table;
991   PetscTablePosition pos;
992   PetscMPIInt        size, groupsize, *groupranks;
993   PetscInt          *rcount, *ranks;
994   PetscInt           i, irank = -1, orank = -1;
995 
996   PetscFunctionBegin;
997   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
998   PetscSFCheckGraphSet(sf, 1);
999   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1000   PetscCall(PetscTableCreate(10, size, &table));
1001   for (i = 0; i < sf->nleaves; i++) {
1002     /* Log 1-based rank */
1003     PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1004   }
1005   PetscCall(PetscTableGetCount(table, &sf->nranks));
1006   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1007   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1008   PetscCall(PetscTableGetHeadPosition(table, &pos));
1009   for (i = 0; i < sf->nranks; i++) {
1010     PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i]));
1011     ranks[i]--; /* Convert back to 0-based */
1012   }
1013   PetscCall(PetscTableDestroy(&table));
1014 
1015   /* We expect that dgroup is reliably "small" while nranks could be large */
1016   {
1017     MPI_Group    group = MPI_GROUP_NULL;
1018     PetscMPIInt *dgroupranks;
1019     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1020     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1021     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1022     PetscCall(PetscMalloc1(groupsize, &groupranks));
1023     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1024     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1025     PetscCallMPI(MPI_Group_free(&group));
1026     PetscCall(PetscFree(dgroupranks));
1027   }
1028 
1029   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1030   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1031     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1032       if (InList(ranks[i], groupsize, groupranks)) break;
1033     }
1034     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1035       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1036     }
1037     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1038       PetscInt tmprank, tmpcount;
1039 
1040       tmprank             = ranks[i];
1041       tmpcount            = rcount[i];
1042       ranks[i]            = ranks[sf->ndranks];
1043       rcount[i]           = rcount[sf->ndranks];
1044       ranks[sf->ndranks]  = tmprank;
1045       rcount[sf->ndranks] = tmpcount;
1046       sf->ndranks++;
1047     }
1048   }
1049   PetscCall(PetscFree(groupranks));
1050   PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
1051   PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1052   sf->roffset[0] = 0;
1053   for (i = 0; i < sf->nranks; i++) {
1054     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1055     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1056     rcount[i]          = 0;
1057   }
1058   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1059     /* short circuit */
1060     if (orank != sf->remote[i].rank) {
1061       /* Search for index of iremote[i].rank in sf->ranks */
1062       PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1063       if (irank < 0) {
1064         PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1065         if (irank >= 0) irank += sf->ndranks;
1066       }
1067       orank = sf->remote[i].rank;
1068     }
1069     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
1070     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1071     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1072     rcount[irank]++;
1073   }
1074   PetscCall(PetscFree2(rcount, ranks));
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 /*@C
1079    PetscSFGetGroups - gets incoming and outgoing process groups
1080 
1081    Collective
1082 
1083    Input Parameter:
1084 .  sf - star forest
1085 
1086    Output Parameters:
1087 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1088 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1089 
1090    Level: developer
1091 
1092 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1093 @*/
1094 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1095 {
1096   MPI_Group group = MPI_GROUP_NULL;
1097 
1098   PetscFunctionBegin;
1099   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1100   if (sf->ingroup == MPI_GROUP_NULL) {
1101     PetscInt        i;
1102     const PetscInt *indegree;
1103     PetscMPIInt     rank, *outranks, *inranks;
1104     PetscSFNode    *remote;
1105     PetscSF         bgcount;
1106 
1107     /* Compute the number of incoming ranks */
1108     PetscCall(PetscMalloc1(sf->nranks, &remote));
1109     for (i = 0; i < sf->nranks; i++) {
1110       remote[i].rank  = sf->ranks[i];
1111       remote[i].index = 0;
1112     }
1113     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1114     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1115     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1116     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1117     /* Enumerate the incoming ranks */
1118     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1119     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1120     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1121     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1122     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1123     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1124     PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
1125     PetscCallMPI(MPI_Group_free(&group));
1126     PetscCall(PetscFree2(inranks, outranks));
1127     PetscCall(PetscSFDestroy(&bgcount));
1128   }
1129   *incoming = sf->ingroup;
1130 
1131   if (sf->outgroup == MPI_GROUP_NULL) {
1132     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1133     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1134     PetscCallMPI(MPI_Group_free(&group));
1135   }
1136   *outgoing = sf->outgroup;
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 /*@
1141    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1142 
1143    Collective
1144 
1145    Input Parameter:
1146 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1147 
1148    Output Parameter:
1149 .  multi - star forest with split roots, such that each root has degree exactly 1
1150 
1151    Level: developer
1152 
1153    Notes:
1154 
1155    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1156    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1157    edge, it is a candidate for future optimization that might involve its removal.
1158 
1159 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1160 @*/
1161 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1162 {
1163   PetscFunctionBegin;
1164   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1165   PetscValidPointer(multi, 2);
1166   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1167     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1168     *multi           = sf->multi;
1169     sf->multi->multi = sf->multi;
1170     PetscFunctionReturn(0);
1171   }
1172   if (!sf->multi) {
1173     const PetscInt *indegree;
1174     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1175     PetscSFNode    *remote;
1176     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1177     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1178     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1179     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1180     inoffset[0] = 0;
1181     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1182     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1183     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1184     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1185     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1186     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1187       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1188     }
1189     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1190     for (i = 0; i < sf->nleaves; i++) {
1191       remote[i].rank  = sf->remote[i].rank;
1192       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1193     }
1194     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1195     sf->multi->multi = sf->multi;
1196     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1197     if (sf->rankorder) { /* Sort the ranks */
1198       PetscMPIInt  rank;
1199       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1200       PetscSFNode *newremote;
1201       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1202       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1203       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1204       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1205       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1206       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1207       /* Sort the incoming ranks at each vertex, build the inverse map */
1208       for (i = 0; i < sf->nroots; i++) {
1209         PetscInt j;
1210         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1211         PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset));
1212         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1213       }
1214       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1215       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1216       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1217       for (i = 0; i < sf->nleaves; i++) {
1218         newremote[i].rank  = sf->remote[i].rank;
1219         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1220       }
1221       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1222       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1223     }
1224     PetscCall(PetscFree3(inoffset, outones, outoffset));
1225   }
1226   *multi = sf->multi;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 /*@C
1231    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1232 
1233    Collective
1234 
1235    Input Parameters:
1236 +  sf - original star forest
1237 .  nselected  - number of selected roots on this process
1238 -  selected   - indices of the selected roots on this process
1239 
1240    Output Parameter:
1241 .  esf - new star forest
1242 
1243    Level: advanced
1244 
1245    Note:
1246    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1247    be done by calling PetscSFGetGraph().
1248 
1249 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()`
1250 @*/
1251 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1252 {
1253   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1254   const PetscInt    *ilocal;
1255   signed char       *rootdata, *leafdata, *leafmem;
1256   const PetscSFNode *iremote;
1257   PetscSFNode       *new_iremote;
1258   MPI_Comm           comm;
1259 
1260   PetscFunctionBegin;
1261   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1262   PetscSFCheckGraphSet(sf, 1);
1263   if (nselected) PetscValidIntPointer(selected, 3);
1264   PetscValidPointer(esf, 4);
1265 
1266   PetscCall(PetscSFSetUp(sf));
1267   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1268   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1269   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1270 
1271   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or  out of range indices */
1272     PetscBool dups;
1273     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1274     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1275     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1276   }
1277 
1278   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1279   else {
1280     /* A generic version of creating embedded sf */
1281     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1282     maxlocal = maxleaf - minleaf + 1;
1283     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1284     leafdata = leafmem - minleaf;
1285     /* Tag selected roots and bcast to leaves */
1286     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1287     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1288     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1289 
1290     /* Build esf with leaves that are still connected */
1291     esf_nleaves = 0;
1292     for (i = 0; i < nleaves; i++) {
1293       j = ilocal ? ilocal[i] : i;
1294       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1295          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1296       */
1297       esf_nleaves += (leafdata[j] ? 1 : 0);
1298     }
1299     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1300     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1301     for (i = n = 0; i < nleaves; i++) {
1302       j = ilocal ? ilocal[i] : i;
1303       if (leafdata[j]) {
1304         new_ilocal[n]        = j;
1305         new_iremote[n].rank  = iremote[i].rank;
1306         new_iremote[n].index = iremote[i].index;
1307         ++n;
1308       }
1309     }
1310     PetscCall(PetscSFCreate(comm, esf));
1311     PetscCall(PetscSFSetFromOptions(*esf));
1312     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1313     PetscCall(PetscFree2(rootdata, leafmem));
1314   }
1315   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /*@C
1320   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1321 
1322   Collective
1323 
1324   Input Parameters:
1325 + sf - original star forest
1326 . nselected  - number of selected leaves on this process
1327 - selected   - indices of the selected leaves on this process
1328 
1329   Output Parameter:
1330 .  newsf - new star forest
1331 
1332   Level: advanced
1333 
1334 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1335 @*/
1336 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1337 {
1338   const PetscSFNode *iremote;
1339   PetscSFNode       *new_iremote;
1340   const PetscInt    *ilocal;
1341   PetscInt           i, nroots, *leaves, *new_ilocal;
1342   MPI_Comm           comm;
1343 
1344   PetscFunctionBegin;
1345   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1346   PetscSFCheckGraphSet(sf, 1);
1347   if (nselected) PetscValidIntPointer(selected, 3);
1348   PetscValidPointer(newsf, 4);
1349 
1350   /* Uniq selected[] and put results in leaves[] */
1351   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1352   PetscCall(PetscMalloc1(nselected, &leaves));
1353   PetscCall(PetscArraycpy(leaves, selected, nselected));
1354   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1355   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1356 
1357   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1358   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1359   else {
1360     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1361     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1362     PetscCall(PetscMalloc1(nselected, &new_iremote));
1363     for (i = 0; i < nselected; ++i) {
1364       const PetscInt l     = leaves[i];
1365       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1366       new_iremote[i].rank  = iremote[l].rank;
1367       new_iremote[i].index = iremote[l].index;
1368     }
1369     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1370     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1371   }
1372   PetscCall(PetscFree(leaves));
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 /*@C
1377    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1378 
1379    Collective on PetscSF
1380 
1381    Input Parameters:
1382 +  sf - star forest on which to communicate
1383 .  unit - data type associated with each node
1384 .  rootdata - buffer to broadcast
1385 -  op - operation to use for reduction
1386 
1387    Output Parameter:
1388 .  leafdata - buffer to be reduced with values from each leaf's respective root
1389 
1390    Level: intermediate
1391 
1392    Notes:
1393     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1394     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1395     use PetscSFBcastWithMemTypeBegin() instead.
1396 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1397 @*/
1398 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1399 {
1400   PetscMemType rootmtype, leafmtype;
1401 
1402   PetscFunctionBegin;
1403   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1404   PetscCall(PetscSFSetUp(sf));
1405   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1406   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1407   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1408   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1409   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 /*@C
1414    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1415 
1416    Collective on PetscSF
1417 
1418    Input Parameters:
1419 +  sf - star forest on which to communicate
1420 .  unit - data type associated with each node
1421 .  rootmtype - memory type of rootdata
1422 .  rootdata - buffer to broadcast
1423 .  leafmtype - memory type of leafdata
1424 -  op - operation to use for reduction
1425 
1426    Output Parameter:
1427 .  leafdata - buffer to be reduced with values from each leaf's respective root
1428 
1429    Level: intermediate
1430 
1431 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1432 @*/
1433 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1434 {
1435   PetscFunctionBegin;
1436   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1437   PetscCall(PetscSFSetUp(sf));
1438   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1439   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1440   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 /*@C
1445    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1446 
1447    Collective
1448 
1449    Input Parameters:
1450 +  sf - star forest
1451 .  unit - data type
1452 .  rootdata - buffer to broadcast
1453 -  op - operation to use for reduction
1454 
1455    Output Parameter:
1456 .  leafdata - buffer to be reduced with values from each leaf's respective root
1457 
1458    Level: intermediate
1459 
1460 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1461 @*/
1462 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1463 {
1464   PetscFunctionBegin;
1465   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1466   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1467   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1468   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 /*@C
1473    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1474 
1475    Collective
1476 
1477    Input Parameters:
1478 +  sf - star forest
1479 .  unit - data type
1480 .  leafdata - values to reduce
1481 -  op - reduction operation
1482 
1483    Output Parameter:
1484 .  rootdata - result of reduction of values from all leaves of each root
1485 
1486    Level: intermediate
1487 
1488    Notes:
1489     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1490     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1491     use PetscSFReduceWithMemTypeBegin() instead.
1492 
1493 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1494 @*/
1495 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1496 {
1497   PetscMemType rootmtype, leafmtype;
1498 
1499   PetscFunctionBegin;
1500   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1501   PetscCall(PetscSFSetUp(sf));
1502   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1503   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1504   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1505   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1506   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 /*@C
1511    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1512 
1513    Collective
1514 
1515    Input Parameters:
1516 +  sf - star forest
1517 .  unit - data type
1518 .  leafmtype - memory type of leafdata
1519 .  leafdata - values to reduce
1520 .  rootmtype - memory type of rootdata
1521 -  op - reduction operation
1522 
1523    Output Parameter:
1524 .  rootdata - result of reduction of values from all leaves of each root
1525 
1526    Level: intermediate
1527 
1528 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1529 @*/
1530 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1531 {
1532   PetscFunctionBegin;
1533   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1534   PetscCall(PetscSFSetUp(sf));
1535   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1536   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1537   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 /*@C
1542    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1543 
1544    Collective
1545 
1546    Input Parameters:
1547 +  sf - star forest
1548 .  unit - data type
1549 .  leafdata - values to reduce
1550 -  op - reduction operation
1551 
1552    Output Parameter:
1553 .  rootdata - result of reduction of values from all leaves of each root
1554 
1555    Level: intermediate
1556 
1557 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1558 @*/
1559 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1560 {
1561   PetscFunctionBegin;
1562   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1563   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1564   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1565   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 /*@C
1570    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1571 
1572    Collective
1573 
1574    Input Parameters:
1575 +  sf - star forest
1576 .  unit - data type
1577 .  leafdata - leaf values to use in reduction
1578 -  op - operation to use for reduction
1579 
1580    Output Parameters:
1581 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1582 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1583 
1584    Level: advanced
1585 
1586    Note:
1587    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1588    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1589    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1590    integers.
1591 
1592 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1593 @*/
1594 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1595 {
1596   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1597 
1598   PetscFunctionBegin;
1599   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1600   PetscCall(PetscSFSetUp(sf));
1601   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1602   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1603   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1604   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1605   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1606   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1607   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 /*@C
1612    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1613 
1614    Collective
1615 
1616    Input Parameters:
1617 +  sf - star forest
1618 .  unit - data type
1619 .  rootmtype - memory type of rootdata
1620 .  leafmtype - memory type of leafdata
1621 .  leafdata - leaf values to use in reduction
1622 .  leafupdatemtype - memory type of leafupdate
1623 -  op - operation to use for reduction
1624 
1625    Output Parameters:
1626 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1627 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1628 
1629    Level: advanced
1630 
1631    Note: See PetscSFFetchAndOpBegin() for more details.
1632 
1633 .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1634 @*/
1635 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1636 {
1637   PetscFunctionBegin;
1638   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1639   PetscCall(PetscSFSetUp(sf));
1640   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1641   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1642   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1643   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 /*@C
1648    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1649 
1650    Collective
1651 
1652    Input Parameters:
1653 +  sf - star forest
1654 .  unit - data type
1655 .  leafdata - leaf values to use in reduction
1656 -  op - operation to use for reduction
1657 
1658    Output Parameters:
1659 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1660 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1661 
1662    Level: advanced
1663 
1664 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1665 @*/
1666 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1667 {
1668   PetscFunctionBegin;
1669   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1670   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1671   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1672   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 /*@C
1677    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1678 
1679    Collective
1680 
1681    Input Parameter:
1682 .  sf - star forest
1683 
1684    Output Parameter:
1685 .  degree - degree of each root vertex
1686 
1687    Level: advanced
1688 
1689    Notes:
1690    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1691 
1692 .seealso: `PetscSFGatherBegin()`
1693 @*/
1694 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1695 {
1696   PetscFunctionBegin;
1697   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1698   PetscSFCheckGraphSet(sf, 1);
1699   PetscValidPointer(degree, 2);
1700   if (!sf->degreeknown) {
1701     PetscInt i, nroots = sf->nroots, maxlocal;
1702     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1703     maxlocal = sf->maxleaf - sf->minleaf + 1;
1704     PetscCall(PetscMalloc1(nroots, &sf->degree));
1705     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1706     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1707     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1708     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1709   }
1710   *degree = NULL;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 /*@C
1715    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1716 
1717    Collective
1718 
1719    Input Parameter:
1720 .  sf - star forest
1721 
1722    Output Parameter:
1723 .  degree - degree of each root vertex
1724 
1725    Level: developer
1726 
1727    Notes:
1728    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1729 
1730 .seealso:
1731 @*/
1732 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1733 {
1734   PetscFunctionBegin;
1735   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1736   PetscSFCheckGraphSet(sf, 1);
1737   PetscValidPointer(degree, 2);
1738   if (!sf->degreeknown) {
1739     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1740     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1741     PetscCall(PetscFree(sf->degreetmp));
1742     sf->degreeknown = PETSC_TRUE;
1743   }
1744   *degree = sf->degree;
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /*@C
1749    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1750    Each multi-root is assigned index of the corresponding original root.
1751 
1752    Collective
1753 
1754    Input Parameters:
1755 +  sf - star forest
1756 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1757 
1758    Output Parameters:
1759 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1760 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1761 
1762    Level: developer
1763 
1764    Notes:
1765    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1766 
1767 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1768 @*/
1769 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1770 {
1771   PetscSF  msf;
1772   PetscInt i, j, k, nroots, nmroots;
1773 
1774   PetscFunctionBegin;
1775   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1776   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1777   if (nroots) PetscValidIntPointer(degree, 2);
1778   if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3);
1779   PetscValidPointer(multiRootsOrigNumbering, 4);
1780   PetscCall(PetscSFGetMultiSF(sf, &msf));
1781   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1782   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1783   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1784     if (!degree[i]) continue;
1785     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1786   }
1787   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1788   if (nMultiRoots) *nMultiRoots = nmroots;
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 /*@C
1793    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1794 
1795    Collective
1796 
1797    Input Parameters:
1798 +  sf - star forest
1799 .  unit - data type
1800 -  leafdata - leaf data to gather to roots
1801 
1802    Output Parameter:
1803 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1804 
1805    Level: intermediate
1806 
1807 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1808 @*/
1809 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1810 {
1811   PetscSF multi = NULL;
1812 
1813   PetscFunctionBegin;
1814   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1815   PetscCall(PetscSFSetUp(sf));
1816   PetscCall(PetscSFGetMultiSF(sf, &multi));
1817   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 /*@C
1822    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1823 
1824    Collective
1825 
1826    Input Parameters:
1827 +  sf - star forest
1828 .  unit - data type
1829 -  leafdata - leaf data to gather to roots
1830 
1831    Output Parameter:
1832 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1833 
1834    Level: intermediate
1835 
1836 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1837 @*/
1838 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1839 {
1840   PetscSF multi = NULL;
1841 
1842   PetscFunctionBegin;
1843   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1844   PetscCall(PetscSFGetMultiSF(sf, &multi));
1845   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 /*@C
1850    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1851 
1852    Collective
1853 
1854    Input Parameters:
1855 +  sf - star forest
1856 .  unit - data type
1857 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1858 
1859    Output Parameter:
1860 .  leafdata - leaf data to be update with personal data from each respective root
1861 
1862    Level: intermediate
1863 
1864 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1865 @*/
1866 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1867 {
1868   PetscSF multi = NULL;
1869 
1870   PetscFunctionBegin;
1871   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1872   PetscCall(PetscSFSetUp(sf));
1873   PetscCall(PetscSFGetMultiSF(sf, &multi));
1874   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 /*@C
1879    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1880 
1881    Collective
1882 
1883    Input Parameters:
1884 +  sf - star forest
1885 .  unit - data type
1886 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1887 
1888    Output Parameter:
1889 .  leafdata - leaf data to be update with personal data from each respective root
1890 
1891    Level: intermediate
1892 
1893 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1894 @*/
1895 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1896 {
1897   PetscSF multi = NULL;
1898 
1899   PetscFunctionBegin;
1900   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1901   PetscCall(PetscSFGetMultiSF(sf, &multi));
1902   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1907 {
1908   PetscInt        i, n, nleaves;
1909   const PetscInt *ilocal = NULL;
1910   PetscHSetI      seen;
1911 
1912   PetscFunctionBegin;
1913   if (PetscDefined(USE_DEBUG)) {
1914     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
1915     PetscCall(PetscHSetICreate(&seen));
1916     for (i = 0; i < nleaves; i++) {
1917       const PetscInt leaf = ilocal ? ilocal[i] : i;
1918       PetscCall(PetscHSetIAdd(seen, leaf));
1919     }
1920     PetscCall(PetscHSetIGetSize(seen, &n));
1921     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
1922     PetscCall(PetscHSetIDestroy(&seen));
1923   }
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 /*@
1928   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1929 
1930   Input Parameters:
1931 + sfA - The first PetscSF
1932 - sfB - The second PetscSF
1933 
1934   Output Parameters:
1935 . sfBA - The composite SF
1936 
1937   Level: developer
1938 
1939   Notes:
1940   Currently, the two SFs must be defined on congruent communicators and they must be true star
1941   forests, i.e. the same leaf is not connected with different roots.
1942 
1943   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1944   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1945   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1946   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1947 
1948 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1949 @*/
1950 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1951 {
1952   const PetscSFNode *remotePointsA, *remotePointsB;
1953   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1954   const PetscInt    *localPointsA, *localPointsB;
1955   PetscInt          *localPointsBA;
1956   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1957   PetscBool          denseB;
1958 
1959   PetscFunctionBegin;
1960   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
1961   PetscSFCheckGraphSet(sfA, 1);
1962   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
1963   PetscSFCheckGraphSet(sfB, 2);
1964   PetscCheckSameComm(sfA, 1, sfB, 2);
1965   PetscValidPointer(sfBA, 3);
1966   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1967   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1968 
1969   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
1970   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
1971   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size       */
1972   /* numRootsB; otherwise, garbage will be broadcasted.                                   */
1973   /* Example (comm size = 1):                                                             */
1974   /* sfA: 0 <- (0, 0)                                                                     */
1975   /* sfB: 100 <- (0, 0)                                                                   */
1976   /*      101 <- (0, 1)                                                                   */
1977   /* Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget  */
1978   /* of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would */
1979   /* receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on  */
1980   /* remotePointsA; if not recasted, point 101 would receive a garbage value.             */
1981   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
1982   for (i = 0; i < numRootsB; i++) {
1983     reorderedRemotePointsA[i].rank  = -1;
1984     reorderedRemotePointsA[i].index = -1;
1985   }
1986   for (i = 0; i < numLeavesA; i++) {
1987     PetscInt localp = localPointsA ? localPointsA[i] : i;
1988 
1989     if (localp >= numRootsB) continue;
1990     reorderedRemotePointsA[localp] = remotePointsA[i];
1991   }
1992   remotePointsA = reorderedRemotePointsA;
1993   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
1994   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
1995   for (i = 0; i < maxleaf - minleaf + 1; i++) {
1996     leafdataB[i].rank  = -1;
1997     leafdataB[i].index = -1;
1998   }
1999   PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2000   PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
2001   PetscCall(PetscFree(reorderedRemotePointsA));
2002 
2003   denseB = (PetscBool)!localPointsB;
2004   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2005     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2006     else numLeavesBA++;
2007   }
2008   if (denseB) {
2009     localPointsBA  = NULL;
2010     remotePointsBA = leafdataB;
2011   } else {
2012     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2013     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2014     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2015       const PetscInt l = localPointsB ? localPointsB[i] : i;
2016 
2017       if (leafdataB[l - minleaf].rank == -1) continue;
2018       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2019       localPointsBA[numLeavesBA]  = l;
2020       numLeavesBA++;
2021     }
2022     PetscCall(PetscFree(leafdataB));
2023   }
2024   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2025   PetscCall(PetscSFSetFromOptions(*sfBA));
2026   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 /*@
2031   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2032 
2033   Input Parameters:
2034 + sfA - The first PetscSF
2035 - sfB - The second PetscSF
2036 
2037   Output Parameters:
2038 . sfBA - The composite SF.
2039 
2040   Level: developer
2041 
2042   Notes:
2043   Currently, the two SFs must be defined on congruent communicators and they must be true star
2044   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2045   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2046 
2047   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2048   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2049   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2050   a Reduce on sfB, on connected roots.
2051 
2052 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2053 @*/
2054 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2055 {
2056   const PetscSFNode *remotePointsA, *remotePointsB;
2057   PetscSFNode       *remotePointsBA;
2058   const PetscInt    *localPointsA, *localPointsB;
2059   PetscSFNode       *reorderedRemotePointsA = NULL;
2060   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2061   MPI_Op             op;
2062 #if defined(PETSC_USE_64BIT_INDICES)
2063   PetscBool iswin;
2064 #endif
2065 
2066   PetscFunctionBegin;
2067   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2068   PetscSFCheckGraphSet(sfA, 1);
2069   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2070   PetscSFCheckGraphSet(sfB, 2);
2071   PetscCheckSameComm(sfA, 1, sfB, 2);
2072   PetscValidPointer(sfBA, 3);
2073   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2074   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2075 
2076   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2077   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2078 
2079   /* TODO: Check roots of sfB have degree of 1 */
2080   /* Once we implement it, we can replace the MPI_MAXLOC
2081      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2082      We use MPI_MAXLOC only to have a deterministic output from this routine if
2083      the root condition is not meet.
2084    */
2085   op = MPI_MAXLOC;
2086 #if defined(PETSC_USE_64BIT_INDICES)
2087   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2088   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2089   if (iswin) op = MPI_REPLACE;
2090 #endif
2091 
2092   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2093   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2094   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2095     reorderedRemotePointsA[i].rank  = -1;
2096     reorderedRemotePointsA[i].index = -1;
2097   }
2098   if (localPointsA) {
2099     for (i = 0; i < numLeavesA; i++) {
2100       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2101       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2102     }
2103   } else {
2104     for (i = 0; i < numLeavesA; i++) {
2105       if (i > maxleaf || i < minleaf) continue;
2106       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2107     }
2108   }
2109 
2110   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2111   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2112   for (i = 0; i < numRootsB; i++) {
2113     remotePointsBA[i].rank  = -1;
2114     remotePointsBA[i].index = -1;
2115   }
2116 
2117   PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2118   PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2119   PetscCall(PetscFree(reorderedRemotePointsA));
2120   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2121     if (remotePointsBA[i].rank == -1) continue;
2122     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2123     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2124     localPointsBA[numLeavesBA]        = i;
2125     numLeavesBA++;
2126   }
2127   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2128   PetscCall(PetscSFSetFromOptions(*sfBA));
2129   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /*
2134   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2135 
2136   Input Parameters:
2137 . sf - The global PetscSF
2138 
2139   Output Parameters:
2140 . out - The local PetscSF
2141  */
2142 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2143 {
2144   MPI_Comm           comm;
2145   PetscMPIInt        myrank;
2146   const PetscInt    *ilocal;
2147   const PetscSFNode *iremote;
2148   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2149   PetscSFNode       *liremote;
2150   PetscSF            lsf;
2151 
2152   PetscFunctionBegin;
2153   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2154   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2155   else {
2156     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2157     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2158     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2159 
2160     /* Find out local edges and build a local SF */
2161     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2162     for (i = lnleaves = 0; i < nleaves; i++) {
2163       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2164     }
2165     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2166     PetscCall(PetscMalloc1(lnleaves, &liremote));
2167 
2168     for (i = j = 0; i < nleaves; i++) {
2169       if (iremote[i].rank == (PetscInt)myrank) {
2170         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2171         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2172         liremote[j].index = iremote[i].index;
2173         j++;
2174       }
2175     }
2176     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2177     PetscCall(PetscSFSetFromOptions(lsf));
2178     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2179     PetscCall(PetscSFSetUp(lsf));
2180     *out = lsf;
2181   }
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2186 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2187 {
2188   PetscMemType rootmtype, leafmtype;
2189 
2190   PetscFunctionBegin;
2191   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2192   PetscCall(PetscSFSetUp(sf));
2193   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2194   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2195   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2196   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2197   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 /*@
2202   PetscSFConcatenate - concatenate multiple SFs into one
2203 
2204   Input Parameters:
2205 + comm - the communicator
2206 . nsfs - the number of input PetscSF
2207 . sfs  - the array of input PetscSF
2208 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2209 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2210 
2211   Output Parameters:
2212 . newsf - The resulting PetscSF
2213 
2214   Level: developer
2215 
2216   Notes:
2217   The communicator of all SFs in sfs must be comm.
2218 
2219   The offsets in leafOffsets are added to the original leaf indices.
2220 
2221   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2222   In this case, NULL is also passed as ilocal to the resulting SF.
2223 
2224   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2225   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2226 
2227 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2228 @*/
2229 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2230 {
2231   PetscInt     i, s, nLeaves, nRoots;
2232   PetscInt    *leafArrayOffsets;
2233   PetscInt    *ilocal_new;
2234   PetscSFNode *iremote_new;
2235   PetscInt    *rootOffsets;
2236   PetscBool    all_ilocal_null = PETSC_FALSE;
2237   PetscMPIInt  rank;
2238 
2239   PetscFunctionBegin;
2240   {
2241     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2242 
2243     PetscCall(PetscSFCreate(comm, &dummy));
2244     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2245     PetscValidPointer(sfs, 3);
2246     for (i = 0; i < nsfs; i++) {
2247       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2248       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2249     }
2250     PetscValidLogicalCollectiveBool(dummy, shareRoots, 4);
2251     if (leafOffsets) PetscValidIntPointer(leafOffsets, 5);
2252     PetscValidPointer(newsf, 6);
2253     PetscCall(PetscSFDestroy(&dummy));
2254   }
2255   if (!nsfs) {
2256     PetscCall(PetscSFCreate(comm, newsf));
2257     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2258     PetscFunctionReturn(0);
2259   }
2260   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2261 
2262   PetscCall(PetscCalloc1(nsfs + 1, &rootOffsets));
2263   if (shareRoots) {
2264     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2265     if (PetscDefined(USE_DEBUG)) {
2266       for (s = 1; s < nsfs; s++) {
2267         PetscInt nr;
2268 
2269         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2270         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2271       }
2272     }
2273   } else {
2274     for (s = 0; s < nsfs; s++) {
2275       PetscInt nr;
2276 
2277       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2278       rootOffsets[s + 1] = rootOffsets[s] + nr;
2279     }
2280     nRoots = rootOffsets[nsfs];
2281   }
2282 
2283   /* Calculate leaf array offsets and automatic root offsets */
2284   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2285   leafArrayOffsets[0] = 0;
2286   for (s = 0; s < nsfs; s++) {
2287     PetscInt nl;
2288 
2289     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2290     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2291   }
2292   nLeaves = leafArrayOffsets[nsfs];
2293 
2294   if (!leafOffsets) {
2295     all_ilocal_null = PETSC_TRUE;
2296     for (s = 0; s < nsfs; s++) {
2297       const PetscInt *ilocal;
2298 
2299       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2300       if (ilocal) {
2301         all_ilocal_null = PETSC_FALSE;
2302         break;
2303       }
2304     }
2305     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2306   }
2307 
2308   /* Renumber and concatenate local leaves */
2309   ilocal_new = NULL;
2310   if (!all_ilocal_null) {
2311     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2312     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2313     for (s = 0; s < nsfs; s++) {
2314       const PetscInt *ilocal;
2315       PetscInt       *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2316       PetscInt        i, nleaves_l;
2317 
2318       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2319       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2320     }
2321   }
2322 
2323   /* Renumber and concatenate remote roots */
2324   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2325   for (i = 0; i < nLeaves; i++) {
2326     iremote_new[i].rank  = -1;
2327     iremote_new[i].index = -1;
2328   }
2329   for (s = 0; s < nsfs; s++) {
2330     PetscInt           i, nl, nr;
2331     PetscSF            tmp_sf;
2332     const PetscSFNode *iremote;
2333     PetscSFNode       *tmp_rootdata;
2334     PetscSFNode       *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2335 
2336     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2337     PetscCall(PetscSFCreate(comm, &tmp_sf));
2338     /* create helper SF with contiguous leaves */
2339     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2340     PetscCall(PetscSFSetUp(tmp_sf));
2341     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2342     for (i = 0; i < nr; i++) {
2343       tmp_rootdata[i].index = i + rootOffsets[s];
2344       tmp_rootdata[i].rank  = (PetscInt)rank;
2345     }
2346     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2347     PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2348     PetscCall(PetscSFDestroy(&tmp_sf));
2349     PetscCall(PetscFree(tmp_rootdata));
2350   }
2351 
2352   /* Build the new SF */
2353   PetscCall(PetscSFCreate(comm, newsf));
2354   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2355   PetscCall(PetscSFSetUp(*newsf));
2356   PetscCall(PetscFree(rootOffsets));
2357   PetscCall(PetscFree(leafArrayOffsets));
2358   PetscFunctionReturn(0);
2359 }
2360