xref: /petsc/src/vec/is/sf/interface/sf.c (revision e91c04dfc8a52dee1965211bb1cc8e5bf775178f)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8   #include <petscdevice_cuda.h>
9 #endif
10 
11 #if defined(PETSC_HAVE_HIP)
12   #include <hip/hip_runtime.h>
13 #endif
14 
15 #if defined(PETSC_CLANG_STATIC_ANALYZER)
16 extern void PetscSFCheckGraphSet(PetscSF, int);
17 #else
18   #if defined(PETSC_USE_DEBUG)
19     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
20   #else
21     #define PetscSFCheckGraphSet(sf, arg) \
22       do { \
23       } while (0)
24   #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
29 
30 /*@
31   PetscSFCreate - create a star forest communication context
32 
33   Collective
34 
35   Input Parameter:
36 . comm - communicator on which the star forest will operate
37 
38   Output Parameter:
39 . sf - new star forest context
40 
41   Options Database Key:
42 + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
43 . -sf_type window                - Use MPI-3 one-sided window for communication
44 . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46 
47   Level: intermediate
48 
49   Note:
50   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
52   `SF`s are optimized and they have better performance than the general `SF`s.
53 
54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
55 @*/
56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57 {
58   PetscSF b;
59 
60   PetscFunctionBegin;
61   PetscAssertPointer(sf, 2);
62   PetscCall(PetscSFInitializePackage());
63 
64   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
65   b->nroots    = -1;
66   b->nleaves   = -1;
67   b->minleaf   = PETSC_INT_MAX;
68   b->maxleaf   = PETSC_INT_MIN;
69   b->nranks    = -1;
70   b->rankorder = PETSC_TRUE;
71   b->ingroup   = MPI_GROUP_NULL;
72   b->outgroup  = MPI_GROUP_NULL;
73   b->graphset  = PETSC_FALSE;
74 #if defined(PETSC_HAVE_DEVICE)
75   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
76   b->use_stream_aware_mpi = PETSC_FALSE;
77   b->unknown_input_stream = PETSC_FALSE;
78   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79   b->backend = PETSCSF_BACKEND_KOKKOS;
80   #elif defined(PETSC_HAVE_CUDA)
81   b->backend = PETSCSF_BACKEND_CUDA;
82   #elif defined(PETSC_HAVE_HIP)
83   b->backend = PETSCSF_BACKEND_HIP;
84   #endif
85 
86   #if defined(PETSC_HAVE_NVSHMEM)
87   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
88   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
90   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
91   #endif
92 #endif
93   b->vscat.from_n = -1;
94   b->vscat.to_n   = -1;
95   b->vscat.unit   = MPIU_SCALAR;
96   *sf             = b;
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 /*@
101   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
102 
103   Collective
104 
105   Input Parameter:
106 . sf - star forest
107 
108   Level: advanced
109 
110 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
111 @*/
112 PetscErrorCode PetscSFReset(PetscSF sf)
113 {
114   PetscFunctionBegin;
115   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
116   PetscTryTypeMethod(sf, Reset);
117   PetscCall(PetscSFDestroy(&sf->rankssf));
118 
119   sf->nroots   = -1;
120   sf->nleaves  = -1;
121   sf->minleaf  = PETSC_INT_MAX;
122   sf->maxleaf  = PETSC_INT_MIN;
123   sf->mine     = NULL;
124   sf->remote   = NULL;
125   sf->graphset = PETSC_FALSE;
126   PetscCall(PetscFree(sf->mine_alloc));
127   PetscCall(PetscFree(sf->remote_alloc));
128   sf->nranks = -1;
129   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
130   sf->degreeknown = PETSC_FALSE;
131   PetscCall(PetscFree(sf->degree));
132   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
133   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
134 
135   if (sf->multi) sf->multi->multi = NULL;
136   PetscCall(PetscSFDestroy(&sf->multi));
137 
138   PetscCall(PetscLayoutDestroy(&sf->map));
139 
140 #if defined(PETSC_HAVE_DEVICE)
141   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
142 #endif
143 
144   sf->setupcalled = PETSC_FALSE;
145   PetscFunctionReturn(PETSC_SUCCESS);
146 }
147 
148 /*@
149   PetscSFSetType - Set the `PetscSF` communication implementation
150 
151   Collective
152 
153   Input Parameters:
154 + sf   - the `PetscSF` context
155 - type - a known method
156 .vb
157     PETSCSFWINDOW - MPI-2/3 one-sided
158     PETSCSFBASIC - basic implementation using MPI-1 two-sided
159 .ve
160 
161   Options Database Key:
162 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
163 
164   Level: intermediate
165 
166   Notes:
167   See `PetscSFType` for possible values
168 
169 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
170 @*/
171 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
172 {
173   PetscBool match;
174   PetscErrorCode (*r)(PetscSF);
175 
176   PetscFunctionBegin;
177   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
178   PetscAssertPointer(type, 2);
179 
180   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
181   if (match) PetscFunctionReturn(PETSC_SUCCESS);
182 
183   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
184   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
185   /* Destroy the previous PetscSF implementation context */
186   PetscTryTypeMethod(sf, Destroy);
187   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
188   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
189   PetscCall((*r)(sf));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 /*@
194   PetscSFGetType - Get the `PetscSF` communication implementation
195 
196   Not Collective
197 
198   Input Parameter:
199 . sf - the `PetscSF` context
200 
201   Output Parameter:
202 . type - the `PetscSF` type name
203 
204   Level: intermediate
205 
206 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
207 @*/
208 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
209 {
210   PetscFunctionBegin;
211   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
212   PetscAssertPointer(type, 2);
213   *type = ((PetscObject)sf)->type_name;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 /*@
218   PetscSFDestroy - destroy a star forest
219 
220   Collective
221 
222   Input Parameter:
223 . sf - address of star forest
224 
225   Level: intermediate
226 
227 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
228 @*/
229 PetscErrorCode PetscSFDestroy(PetscSF *sf)
230 {
231   PetscFunctionBegin;
232   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
234   if (--((PetscObject)*sf)->refct > 0) {
235     *sf = NULL;
236     PetscFunctionReturn(PETSC_SUCCESS);
237   }
238   PetscCall(PetscSFReset(*sf));
239   PetscTryTypeMethod(*sf, Destroy);
240   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
241   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
242 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
243   if ((*sf)->use_stream_aware_mpi) {
244     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
245     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
246   }
247 #endif
248   PetscCall(PetscHeaderDestroy(sf));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
253 {
254   PetscInt           i, nleaves;
255   PetscMPIInt        size;
256   const PetscInt    *ilocal;
257   const PetscSFNode *iremote;
258 
259   PetscFunctionBegin;
260   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
261   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
262   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
263   for (i = 0; i < nleaves; i++) {
264     const PetscInt rank   = iremote[i].rank;
265     const PetscInt remote = iremote[i].index;
266     const PetscInt leaf   = ilocal ? ilocal[i] : i;
267     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
268     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
269     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
270   }
271   PetscFunctionReturn(PETSC_SUCCESS);
272 }
273 
274 /*@
275   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
276 
277   Collective
278 
279   Input Parameter:
280 . sf - star forest communication object
281 
282   Level: beginner
283 
284 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
285 @*/
286 PetscErrorCode PetscSFSetUp(PetscSF sf)
287 {
288   PetscFunctionBegin;
289   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
290   PetscSFCheckGraphSet(sf, 1);
291   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
292   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
293   PetscCall(PetscSFCheckGraphValid_Private(sf));
294   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
295   PetscTryTypeMethod(sf, SetUp);
296 #if defined(PETSC_HAVE_CUDA)
297   if (sf->backend == PETSCSF_BACKEND_CUDA) {
298     sf->ops->Malloc = PetscSFMalloc_CUDA;
299     sf->ops->Free   = PetscSFFree_CUDA;
300   }
301 #endif
302 #if defined(PETSC_HAVE_HIP)
303   if (sf->backend == PETSCSF_BACKEND_HIP) {
304     sf->ops->Malloc = PetscSFMalloc_HIP;
305     sf->ops->Free   = PetscSFFree_HIP;
306   }
307 #endif
308 
309 #if defined(PETSC_HAVE_KOKKOS)
310   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
311     sf->ops->Malloc = PetscSFMalloc_Kokkos;
312     sf->ops->Free   = PetscSFFree_Kokkos;
313   }
314 #endif
315   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
316   sf->setupcalled = PETSC_TRUE;
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 /*@
321   PetscSFSetFromOptions - set `PetscSF` options using the options database
322 
323   Logically Collective
324 
325   Input Parameter:
326 . sf - star forest
327 
328   Options Database Keys:
329 + -sf_type                      - implementation type, see `PetscSFSetType()`
330 . -sf_rank_order                - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
331 . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
332                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
333                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
334 . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
335                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
336 
337 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
338                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
339                                   the only available is kokkos.
340 
341   Level: intermediate
342 
343 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
344 @*/
345 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
346 {
347   PetscSFType deft;
348   char        type[256];
349   PetscBool   flg;
350 
351   PetscFunctionBegin;
352   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
353   PetscObjectOptionsBegin((PetscObject)sf);
354   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
355   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
356   PetscCall(PetscSFSetType(sf, flg ? type : deft));
357   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
358   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
359 #if defined(PETSC_HAVE_DEVICE)
360   {
361     char      backendstr[32] = {0};
362     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
363     /* Change the defaults set in PetscSFCreate() with command line options */
364     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
365     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
366     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
367     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
368     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
369     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
370   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
371     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
374     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
375   #elif defined(PETSC_HAVE_KOKKOS)
376     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
377   #endif
378 
379   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
380     if (sf->use_stream_aware_mpi) {
381       MPI_Info info;
382 
383       PetscCallMPI(MPI_Info_create(&info));
384       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
385       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
386       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
387       PetscCallMPI(MPI_Info_free(&info));
388       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
389     }
390   #endif
391   }
392 #endif
393   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
394   PetscOptionsEnd();
395   PetscFunctionReturn(PETSC_SUCCESS);
396 }
397 
398 /*@
399   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
400 
401   Logically Collective
402 
403   Input Parameters:
404 + sf  - star forest
405 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
406 
407   Level: advanced
408 
409 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
410 @*/
411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
412 {
413   PetscFunctionBegin;
414   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
415   PetscValidLogicalCollectiveBool(sf, flg, 2);
416   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
417   sf->rankorder = flg;
418   PetscFunctionReturn(PETSC_SUCCESS);
419 }
420 
421 /*@
422   PetscSFSetGraph - Set a parallel star forest
423 
424   Collective
425 
426   Input Parameters:
427 + sf         - star forest
428 . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
429 . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
430 . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
431 during setup in debug mode)
432 . localmode  - copy mode for `ilocal`
433 . iremote    - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
434 during setup in debug mode)
435 - remotemode - copy mode for `iremote`
436 
437   Level: intermediate
438 
439   Notes:
440   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
441 
442   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
443   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
444   PETSc might modify the respective array;
445   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
446   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
447 
448   Fortran Notes:
449   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
450 
451   Developer Notes:
452   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
453   This also allows to compare leaf sets of two `PetscSF`s easily.
454 
455 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
456 @*/
457 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
458 {
459   PetscBool unique, contiguous;
460 
461   PetscFunctionBegin;
462   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
463   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
464   if (nleaves > 0) PetscAssertPointer(iremote, 6);
465   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
466   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
467   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
468    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
469   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
470   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
471 
472   if (sf->nroots >= 0) { /* Reset only if graph already set */
473     PetscCall(PetscSFReset(sf));
474   }
475 
476   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
477   if (PetscDefined(USE_DEBUG)) {
478     PetscMPIInt size;
479 
480     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
481     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
482   }
483 
484   sf->nroots  = nroots;
485   sf->nleaves = nleaves;
486 
487   if (localmode == PETSC_COPY_VALUES && ilocal) {
488     PetscInt *tlocal = NULL;
489 
490     PetscCall(PetscMalloc1(nleaves, &tlocal));
491     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
492     ilocal = tlocal;
493   }
494   if (remotemode == PETSC_COPY_VALUES) {
495     PetscSFNode *tremote = NULL;
496 
497     PetscCall(PetscMalloc1(nleaves, &tremote));
498     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
499     iremote = tremote;
500   }
501 
502   if (nleaves && ilocal) {
503     PetscSFNode work;
504 
505     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
506     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
507     unique = PetscNot(unique);
508     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
509     sf->minleaf = ilocal[0];
510     sf->maxleaf = ilocal[nleaves - 1];
511     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
512   } else {
513     sf->minleaf = 0;
514     sf->maxleaf = nleaves - 1;
515     unique      = PETSC_TRUE;
516     contiguous  = PETSC_TRUE;
517   }
518 
519   if (contiguous) {
520     if (localmode == PETSC_USE_POINTER) {
521       ilocal = NULL;
522     } else {
523       PetscCall(PetscFree(ilocal));
524     }
525   }
526   sf->mine = ilocal;
527   if (localmode == PETSC_USE_POINTER) {
528     sf->mine_alloc = NULL;
529   } else {
530     sf->mine_alloc = ilocal;
531   }
532   if (PetscDefined(USE_DEBUG)) {
533     PetscMPIInt size;
534 
535     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
536     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
537   }
538   sf->remote = iremote;
539   if (remotemode == PETSC_USE_POINTER) {
540     sf->remote_alloc = NULL;
541   } else {
542     sf->remote_alloc = iremote;
543   }
544   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
545   sf->graphset = PETSC_TRUE;
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 /*@
550   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
551 
552   Collective
553 
554   Input Parameters:
555 + sf      - The `PetscSF`
556 . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
557 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
558 
559   Level: intermediate
560 
561   Notes:
562   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
563   `n` and `N` are the local and global sizes of `x` respectively.
564 
565   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
566   sequential vectors `y` on all MPI processes.
567 
568   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
569   sequential vector `y` on rank 0.
570 
571   In above cases, entries of `x` are roots and entries of `y` are leaves.
572 
573   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
574   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
575   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
576   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
577   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
578 
579   In this case, roots and leaves are symmetric.
580 
581 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
582  @*/
583 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
584 {
585   MPI_Comm    comm;
586   PetscInt    n, N, res[2];
587   PetscMPIInt rank, size;
588   PetscSFType type;
589 
590   PetscFunctionBegin;
591   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
592   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
593   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
594   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
595   PetscCallMPI(MPI_Comm_rank(comm, &rank));
596   PetscCallMPI(MPI_Comm_size(comm, &size));
597 
598   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
599     PetscInt sizei = size;
600 
601     type = PETSCSFALLTOALL;
602     PetscCall(PetscLayoutCreate(comm, &sf->map));
603     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
604     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
605     PetscCall(PetscLayoutSetUp(sf->map));
606   } else {
607     PetscCall(PetscLayoutGetLocalSize(map, &n));
608     PetscCall(PetscLayoutGetSize(map, &N));
609     res[0] = n;
610     res[1] = -n;
611     /* Check if n are same over all ranks so that we can optimize it */
612     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
613     if (res[0] == -res[1]) { /* same n */
614       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
615     } else {
616       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
617     }
618     PetscCall(PetscLayoutReference(map, &sf->map));
619   }
620   PetscCall(PetscSFSetType(sf, type));
621 
622   sf->pattern = pattern;
623   sf->mine    = NULL; /* Contiguous */
624 
625   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
626      Also set other easy stuff.
627    */
628   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
629     sf->nleaves = N;
630     sf->nroots  = n;
631     sf->nranks  = size;
632     sf->minleaf = 0;
633     sf->maxleaf = N - 1;
634   } else if (pattern == PETSCSF_PATTERN_GATHER) {
635     sf->nleaves = rank ? 0 : N;
636     sf->nroots  = n;
637     sf->nranks  = rank ? 0 : size;
638     sf->minleaf = 0;
639     sf->maxleaf = rank ? -1 : N - 1;
640   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
641     sf->nleaves = size;
642     sf->nroots  = size;
643     sf->nranks  = size;
644     sf->minleaf = 0;
645     sf->maxleaf = size - 1;
646   }
647   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
648   sf->graphset = PETSC_TRUE;
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 /*@
653   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
654 
655   Collective
656 
657   Input Parameter:
658 . sf - star forest to invert
659 
660   Output Parameter:
661 . isf - inverse of `sf`
662 
663   Level: advanced
664 
665   Notes:
666   All roots must have degree 1.
667 
668   The local space may be a permutation, but cannot be sparse.
669 
670 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
671 @*/
672 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
673 {
674   PetscMPIInt     rank;
675   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
676   const PetscInt *ilocal;
677   PetscSFNode    *roots, *leaves;
678 
679   PetscFunctionBegin;
680   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
681   PetscSFCheckGraphSet(sf, 1);
682   PetscAssertPointer(isf, 2);
683 
684   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
685   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
686 
687   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
688   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
689   for (i = 0; i < maxlocal; i++) {
690     leaves[i].rank  = rank;
691     leaves[i].index = i;
692   }
693   for (i = 0; i < nroots; i++) {
694     roots[i].rank  = -1;
695     roots[i].index = -1;
696   }
697   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
698   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
699 
700   /* Check whether our leaves are sparse */
701   for (i = 0, count = 0; i < nroots; i++)
702     if (roots[i].rank >= 0) count++;
703   if (count == nroots) newilocal = NULL;
704   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
705     for (i = 0, count = 0; i < nroots; i++) {
706       if (roots[i].rank >= 0) {
707         newilocal[count]   = i;
708         roots[count].rank  = roots[i].rank;
709         roots[count].index = roots[i].index;
710         count++;
711       }
712     }
713   }
714 
715   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
716   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
717   PetscCall(PetscFree2(roots, leaves));
718   PetscFunctionReturn(PETSC_SUCCESS);
719 }
720 
721 /*@
722   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
723 
724   Collective
725 
726   Input Parameters:
727 + sf  - communication object to duplicate
728 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
729 
730   Output Parameter:
731 . newsf - new communication object
732 
733   Level: beginner
734 
735 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
736 @*/
737 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
738 {
739   PetscSFType  type;
740   MPI_Datatype dtype = MPIU_SCALAR;
741 
742   PetscFunctionBegin;
743   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
744   PetscValidLogicalCollectiveEnum(sf, opt, 2);
745   PetscAssertPointer(newsf, 3);
746   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
747   PetscCall(PetscSFGetType(sf, &type));
748   if (type) PetscCall(PetscSFSetType(*newsf, type));
749   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
750   if (opt == PETSCSF_DUPLICATE_GRAPH) {
751     PetscSFCheckGraphSet(sf, 1);
752     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
753       PetscInt           nroots, nleaves;
754       const PetscInt    *ilocal;
755       const PetscSFNode *iremote;
756       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
757       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
758     } else {
759       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
760     }
761   }
762   /* Since oldtype is committed, so is newtype, according to MPI */
763   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
764   (*newsf)->vscat.bs     = sf->vscat.bs;
765   (*newsf)->vscat.unit   = dtype;
766   (*newsf)->vscat.to_n   = sf->vscat.to_n;
767   (*newsf)->vscat.from_n = sf->vscat.from_n;
768   /* Do not copy lsf. Build it on demand since it is rarely used */
769 
770 #if defined(PETSC_HAVE_DEVICE)
771   (*newsf)->backend              = sf->backend;
772   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
773   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
774   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
775 #endif
776   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
777   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
778   PetscFunctionReturn(PETSC_SUCCESS);
779 }
780 
781 /*@C
782   PetscSFGetGraph - Get the graph specifying a parallel star forest
783 
784   Not Collective
785 
786   Input Parameter:
787 . sf - star forest
788 
789   Output Parameters:
790 + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
791 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
792 . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
793 - iremote - remote locations of root vertices for each leaf on the current process
794 
795   Level: intermediate
796 
797   Notes:
798   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
799 
800   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
801 
802   Fortran Notes:
803   The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
804   want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
805 
806   To check for a `NULL` `ilocal` use
807 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
808 
809 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
810 @*/
811 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
812 {
813   PetscFunctionBegin;
814   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
815   if (sf->ops->GetGraph) {
816     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
817   } else {
818     if (nroots) *nroots = sf->nroots;
819     if (nleaves) *nleaves = sf->nleaves;
820     if (ilocal) *ilocal = sf->mine;
821     if (iremote) *iremote = sf->remote;
822   }
823   PetscFunctionReturn(PETSC_SUCCESS);
824 }
825 
826 /*@
827   PetscSFGetLeafRange - Get the active leaf ranges
828 
829   Not Collective
830 
831   Input Parameter:
832 . sf - star forest
833 
834   Output Parameters:
835 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
836 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
837 
838   Level: developer
839 
840 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
841 @*/
842 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
843 {
844   PetscFunctionBegin;
845   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
846   PetscSFCheckGraphSet(sf, 1);
847   if (minleaf) *minleaf = sf->minleaf;
848   if (maxleaf) *maxleaf = sf->maxleaf;
849   PetscFunctionReturn(PETSC_SUCCESS);
850 }
851 
852 /*@
853   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
854 
855   Collective
856 
857   Input Parameters:
858 + A    - the star forest
859 . obj  - Optional object that provides the prefix for the option names
860 - name - command line option
861 
862   Level: intermediate
863 
864   Note:
865   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
866 
867 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
868 @*/
869 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
870 {
871   PetscFunctionBegin;
872   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
873   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
874   PetscFunctionReturn(PETSC_SUCCESS);
875 }
876 
877 /*@
878   PetscSFView - view a star forest
879 
880   Collective
881 
882   Input Parameters:
883 + sf     - star forest
884 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
885 
886   Level: beginner
887 
888 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
889 @*/
890 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
891 {
892   PetscBool         iascii;
893   PetscViewerFormat format;
894 
895   PetscFunctionBegin;
896   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
897   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
898   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
899   PetscCheckSameComm(sf, 1, viewer, 2);
900   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
901   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
902   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
903     PetscMPIInt rank;
904     PetscInt    j;
905 
906     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
907     PetscCall(PetscViewerASCIIPushTab(viewer));
908     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
909       if (!sf->graphset) {
910         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
911         PetscCall(PetscViewerASCIIPopTab(viewer));
912         PetscFunctionReturn(PETSC_SUCCESS);
913       }
914       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
915       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
916       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
917       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
918       PetscCall(PetscViewerFlush(viewer));
919       PetscCall(PetscViewerGetFormat(viewer, &format));
920       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
921         PetscMPIInt *tmpranks, *perm;
922 
923         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
924         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
925         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
926         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
927         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
928         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
929           PetscMPIInt i = perm[ii];
930 
931           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
932           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
933         }
934         PetscCall(PetscFree2(tmpranks, perm));
935       }
936       PetscCall(PetscViewerFlush(viewer));
937       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
938     }
939     PetscCall(PetscViewerASCIIPopTab(viewer));
940   }
941   PetscTryTypeMethod(sf, View, viewer);
942   PetscFunctionReturn(PETSC_SUCCESS);
943 }
944 
945 /*@C
946   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
947 
948   Not Collective
949 
950   Input Parameter:
951 . sf - star forest
952 
953   Output Parameters:
954 + nranks  - number of ranks referenced by local part
955 . ranks   - [`nranks`] array of ranks
956 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
957 . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
958 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
959 
960   Level: developer
961 
962 .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
963 @*/
964 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
965 {
966   PetscFunctionBegin;
967   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
968   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
969   if (sf->ops->GetRootRanks) {
970     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
971   } else {
972     /* The generic implementation */
973     if (nranks) *nranks = sf->nranks;
974     if (ranks) *ranks = sf->ranks;
975     if (roffset) *roffset = sf->roffset;
976     if (rmine) *rmine = sf->rmine;
977     if (rremote) *rremote = sf->rremote;
978   }
979   PetscFunctionReturn(PETSC_SUCCESS);
980 }
981 
982 /*@C
983   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
984 
985   Not Collective
986 
987   Input Parameter:
988 . sf - star forest
989 
990   Output Parameters:
991 + niranks  - number of leaf ranks referencing roots on this process
992 . iranks   - [`niranks`] array of ranks
993 . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
994 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
995 
996   Level: developer
997 
998 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
999 @*/
1000 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
1001 {
1002   PetscFunctionBegin;
1003   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1004   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
1005   if (sf->ops->GetLeafRanks) {
1006     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1007   } else {
1008     PetscSFType type;
1009     PetscCall(PetscSFGetType(sf, &type));
1010     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1011   }
1012   PetscFunctionReturn(PETSC_SUCCESS);
1013 }
1014 
1015 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1016 {
1017   PetscInt i;
1018   for (i = 0; i < n; i++) {
1019     if (needle == list[i]) return PETSC_TRUE;
1020   }
1021   return PETSC_FALSE;
1022 }
1023 
1024 /*@C
1025   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1026 
1027   Collective
1028 
1029   Input Parameters:
1030 + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1031 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1032 
1033   Level: developer
1034 
1035 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1036 @*/
1037 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1038 {
1039   PetscHMapI    table;
1040   PetscHashIter pos;
1041   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1042   PetscInt     *rcount;
1043   PetscInt      irank, sfnrank, ranksi;
1044   PetscMPIInt   i, orank = -1;
1045 
1046   PetscFunctionBegin;
1047   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1048   PetscSFCheckGraphSet(sf, 1);
1049   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1050   PetscCall(PetscHMapICreateWithSize(10, &table));
1051   for (i = 0; i < sf->nleaves; i++) {
1052     /* Log 1-based rank */
1053     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1054   }
1055   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1056   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1057   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1058   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1059   PetscHashIterBegin(table, pos);
1060   for (i = 0; i < sf->nranks; i++) {
1061     PetscHashIterGetKey(table, pos, ranksi);
1062     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1063     PetscHashIterGetVal(table, pos, rcount[i]);
1064     PetscHashIterNext(table, pos);
1065     ranks[i]--; /* Convert back to 0-based */
1066   }
1067   PetscCall(PetscHMapIDestroy(&table));
1068 
1069   /* We expect that dgroup is reliably "small" while nranks could be large */
1070   {
1071     MPI_Group    group = MPI_GROUP_NULL;
1072     PetscMPIInt *dgroupranks;
1073 
1074     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1075     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1076     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1077     PetscCall(PetscMalloc1(groupsize, &groupranks));
1078     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1079     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1080     PetscCallMPI(MPI_Group_free(&group));
1081     PetscCall(PetscFree(dgroupranks));
1082   }
1083 
1084   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1085   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1086     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1087       if (InList(ranks[i], groupsize, groupranks)) break;
1088     }
1089     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1090       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1091     }
1092     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1093       PetscMPIInt tmprank;
1094       PetscInt    tmpcount;
1095 
1096       tmprank             = ranks[i];
1097       tmpcount            = rcount[i];
1098       ranks[i]            = ranks[sf->ndranks];
1099       rcount[i]           = rcount[sf->ndranks];
1100       ranks[sf->ndranks]  = tmprank;
1101       rcount[sf->ndranks] = tmpcount;
1102       sf->ndranks++;
1103     }
1104   }
1105   PetscCall(PetscFree(groupranks));
1106   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1107   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1108   sf->roffset[0] = 0;
1109   for (i = 0; i < sf->nranks; i++) {
1110     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1111     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1112     rcount[i]          = 0;
1113   }
1114   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1115     /* short circuit */
1116     if (orank != sf->remote[i].rank) {
1117       /* Search for index of iremote[i].rank in sf->ranks */
1118       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1119       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1120       if (irank < 0) {
1121         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1122         if (irank >= 0) irank += sf->ndranks;
1123       }
1124     }
1125     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1126     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1127     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1128     rcount[irank]++;
1129   }
1130   PetscCall(PetscFree2(rcount, ranks));
1131   PetscFunctionReturn(PETSC_SUCCESS);
1132 }
1133 
1134 /*@C
1135   PetscSFGetGroups - gets incoming and outgoing process groups
1136 
1137   Collective
1138 
1139   Input Parameter:
1140 . sf - star forest
1141 
1142   Output Parameters:
1143 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1144 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1145 
1146   Level: developer
1147 
1148 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1149 @*/
1150 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1151 {
1152   MPI_Group group = MPI_GROUP_NULL;
1153 
1154   PetscFunctionBegin;
1155   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1156   if (sf->ingroup == MPI_GROUP_NULL) {
1157     PetscInt        i;
1158     const PetscInt *indegree;
1159     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1160     PetscSFNode    *remote;
1161     PetscSF         bgcount;
1162 
1163     /* Compute the number of incoming ranks */
1164     PetscCall(PetscMalloc1(sf->nranks, &remote));
1165     for (i = 0; i < sf->nranks; i++) {
1166       remote[i].rank  = sf->ranks[i];
1167       remote[i].index = 0;
1168     }
1169     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1170     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1171     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1172     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1173     /* Enumerate the incoming ranks */
1174     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1175     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1176     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1177     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1178     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1179     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1180     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1181     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1182     PetscCallMPI(MPI_Group_free(&group));
1183     PetscCall(PetscFree2(inranks, outranks));
1184     PetscCall(PetscSFDestroy(&bgcount));
1185   }
1186   *incoming = sf->ingroup;
1187 
1188   if (sf->outgroup == MPI_GROUP_NULL) {
1189     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1190     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1191     PetscCallMPI(MPI_Group_free(&group));
1192   }
1193   *outgoing = sf->outgroup;
1194   PetscFunctionReturn(PETSC_SUCCESS);
1195 }
1196 
1197 /*@
1198   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1199 
1200   Collective
1201 
1202   Input Parameter:
1203 . sf - star forest
1204 
1205   Output Parameter:
1206 . rsf - the star forest with a single root per process to perform communications
1207 
1208   Level: developer
1209 
1210 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1211 @*/
1212 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1213 {
1214   PetscFunctionBegin;
1215   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1216   PetscAssertPointer(rsf, 2);
1217   if (!sf->rankssf) {
1218     PetscSFNode       *rremotes;
1219     const PetscMPIInt *ranks;
1220     PetscMPIInt        nranks;
1221 
1222     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1223     PetscCall(PetscMalloc1(nranks, &rremotes));
1224     for (PetscInt i = 0; i < nranks; i++) {
1225       rremotes[i].rank  = ranks[i];
1226       rremotes[i].index = 0;
1227     }
1228     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1229     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1230   }
1231   *rsf = sf->rankssf;
1232   PetscFunctionReturn(PETSC_SUCCESS);
1233 }
1234 
1235 /*@
1236   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1237 
1238   Collective
1239 
1240   Input Parameter:
1241 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1242 
1243   Output Parameter:
1244 . multi - star forest with split roots, such that each root has degree exactly 1
1245 
1246   Level: developer
1247 
1248   Note:
1249   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1250   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1251   edge, it is a candidate for future optimization that might involve its removal.
1252 
1253 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1254 @*/
1255 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1256 {
1257   PetscFunctionBegin;
1258   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1259   PetscAssertPointer(multi, 2);
1260   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1261     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1262     *multi           = sf->multi;
1263     sf->multi->multi = sf->multi;
1264     PetscFunctionReturn(PETSC_SUCCESS);
1265   }
1266   if (!sf->multi) {
1267     const PetscInt *indegree;
1268     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1269     PetscSFNode    *remote;
1270     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1271     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1272     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1273     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1274     inoffset[0] = 0;
1275     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1276     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1277     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1278     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1279     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1280     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1281       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1282     }
1283     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1284     for (i = 0; i < sf->nleaves; i++) {
1285       remote[i].rank  = sf->remote[i].rank;
1286       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1287     }
1288     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1289     sf->multi->multi = sf->multi;
1290     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1291     if (sf->rankorder) { /* Sort the ranks */
1292       PetscMPIInt  rank;
1293       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1294       PetscSFNode *newremote;
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1296       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1297       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1298       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1299       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1300       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1301       /* Sort the incoming ranks at each vertex, build the inverse map */
1302       for (i = 0; i < sf->nroots; i++) {
1303         PetscInt j;
1304         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1305         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1306         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1307       }
1308       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1309       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1310       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1311       for (i = 0; i < sf->nleaves; i++) {
1312         newremote[i].rank  = sf->remote[i].rank;
1313         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1314       }
1315       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1316       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1317     }
1318     PetscCall(PetscFree3(inoffset, outones, outoffset));
1319   }
1320   *multi = sf->multi;
1321   PetscFunctionReturn(PETSC_SUCCESS);
1322 }
1323 
1324 /*@C
1325   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1326 
1327   Collective
1328 
1329   Input Parameters:
1330 + sf        - original star forest
1331 . nselected - number of selected roots on this process
1332 - selected  - indices of the selected roots on this process
1333 
1334   Output Parameter:
1335 . esf - new star forest
1336 
1337   Level: advanced
1338 
1339   Note:
1340   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1341   be done by calling PetscSFGetGraph().
1342 
1343 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1344 @*/
1345 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1346 {
1347   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1348   const PetscInt    *ilocal;
1349   signed char       *rootdata, *leafdata, *leafmem;
1350   const PetscSFNode *iremote;
1351   PetscSFNode       *new_iremote;
1352   MPI_Comm           comm;
1353 
1354   PetscFunctionBegin;
1355   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1356   PetscSFCheckGraphSet(sf, 1);
1357   if (nselected) PetscAssertPointer(selected, 3);
1358   PetscAssertPointer(esf, 4);
1359 
1360   PetscCall(PetscSFSetUp(sf));
1361   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1362   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1363   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1364 
1365   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1366     PetscBool dups;
1367     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1368     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1369     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1370   }
1371 
1372   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1373   else {
1374     /* A generic version of creating embedded sf */
1375     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1376     maxlocal = maxleaf - minleaf + 1;
1377     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1378     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1379     /* Tag selected roots and bcast to leaves */
1380     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1381     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1382     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1383 
1384     /* Build esf with leaves that are still connected */
1385     esf_nleaves = 0;
1386     for (i = 0; i < nleaves; i++) {
1387       j = ilocal ? ilocal[i] : i;
1388       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1389          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1390       */
1391       esf_nleaves += (leafdata[j] ? 1 : 0);
1392     }
1393     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1394     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1395     for (i = n = 0; i < nleaves; i++) {
1396       j = ilocal ? ilocal[i] : i;
1397       if (leafdata[j]) {
1398         new_ilocal[n]        = j;
1399         new_iremote[n].rank  = iremote[i].rank;
1400         new_iremote[n].index = iremote[i].index;
1401         ++n;
1402       }
1403     }
1404     PetscCall(PetscSFCreate(comm, esf));
1405     PetscCall(PetscSFSetFromOptions(*esf));
1406     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1407     PetscCall(PetscFree2(rootdata, leafmem));
1408   }
1409   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1410   PetscFunctionReturn(PETSC_SUCCESS);
1411 }
1412 
1413 /*@C
1414   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1415 
1416   Collective
1417 
1418   Input Parameters:
1419 + sf        - original star forest
1420 . nselected - number of selected leaves on this process
1421 - selected  - indices of the selected leaves on this process
1422 
1423   Output Parameter:
1424 . newsf - new star forest
1425 
1426   Level: advanced
1427 
1428 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1429 @*/
1430 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1431 {
1432   const PetscSFNode *iremote;
1433   PetscSFNode       *new_iremote;
1434   const PetscInt    *ilocal;
1435   PetscInt           i, nroots, *leaves, *new_ilocal;
1436   MPI_Comm           comm;
1437 
1438   PetscFunctionBegin;
1439   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1440   PetscSFCheckGraphSet(sf, 1);
1441   if (nselected) PetscAssertPointer(selected, 3);
1442   PetscAssertPointer(newsf, 4);
1443 
1444   /* Uniq selected[] and put results in leaves[] */
1445   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1446   PetscCall(PetscMalloc1(nselected, &leaves));
1447   PetscCall(PetscArraycpy(leaves, selected, nselected));
1448   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1449   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1450 
1451   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1452   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1453   else {
1454     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1455     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1456     PetscCall(PetscMalloc1(nselected, &new_iremote));
1457     for (i = 0; i < nselected; ++i) {
1458       const PetscInt l     = leaves[i];
1459       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1460       new_iremote[i].rank  = iremote[l].rank;
1461       new_iremote[i].index = iremote[l].index;
1462     }
1463     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1464     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1465   }
1466   PetscCall(PetscFree(leaves));
1467   PetscFunctionReturn(PETSC_SUCCESS);
1468 }
1469 
1470 /*@C
1471   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1472 
1473   Collective
1474 
1475   Input Parameters:
1476 + sf       - star forest on which to communicate
1477 . unit     - data type associated with each node
1478 . rootdata - buffer to broadcast
1479 - op       - operation to use for reduction
1480 
1481   Output Parameter:
1482 . leafdata - buffer to be reduced with values from each leaf's respective root
1483 
1484   Level: intermediate
1485 
1486   Note:
1487   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1488   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1489   use `PetscSFBcastWithMemTypeBegin()` instead.
1490 
1491 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1492 @*/
1493 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1494 {
1495   PetscMemType rootmtype, leafmtype;
1496 
1497   PetscFunctionBegin;
1498   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1499   PetscCall(PetscSFSetUp(sf));
1500   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1501   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1502   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1503   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1504   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1505   PetscFunctionReturn(PETSC_SUCCESS);
1506 }
1507 
1508 /*@C
1509   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1510   to `PetscSFBcastEnd()`
1511 
1512   Collective
1513 
1514   Input Parameters:
1515 + sf        - star forest on which to communicate
1516 . unit      - data type associated with each node
1517 . rootmtype - memory type of rootdata
1518 . rootdata  - buffer to broadcast
1519 . leafmtype - memory type of leafdata
1520 - op        - operation to use for reduction
1521 
1522   Output Parameter:
1523 . leafdata - buffer to be reduced with values from each leaf's respective root
1524 
1525   Level: intermediate
1526 
1527 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1528 @*/
1529 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1530 {
1531   PetscFunctionBegin;
1532   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1533   PetscCall(PetscSFSetUp(sf));
1534   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1535   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1536   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1537   PetscFunctionReturn(PETSC_SUCCESS);
1538 }
1539 
1540 /*@C
1541   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1542 
1543   Collective
1544 
1545   Input Parameters:
1546 + sf       - star forest
1547 . unit     - data type
1548 . rootdata - buffer to broadcast
1549 - op       - operation to use for reduction
1550 
1551   Output Parameter:
1552 . leafdata - buffer to be reduced with values from each leaf's respective root
1553 
1554   Level: intermediate
1555 
1556 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1557 @*/
1558 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1559 {
1560   PetscFunctionBegin;
1561   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1562   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1563   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1564   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1565   PetscFunctionReturn(PETSC_SUCCESS);
1566 }
1567 
1568 /*@C
1569   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1570 
1571   Collective
1572 
1573   Input Parameters:
1574 + sf       - star forest
1575 . unit     - data type
1576 . leafdata - values to reduce
1577 - op       - reduction operation
1578 
1579   Output Parameter:
1580 . rootdata - result of reduction of values from all leaves of each root
1581 
1582   Level: intermediate
1583 
1584   Note:
1585   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1586   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1587   use `PetscSFReduceWithMemTypeBegin()` instead.
1588 
1589 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1590 @*/
1591 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1592 {
1593   PetscMemType rootmtype, leafmtype;
1594 
1595   PetscFunctionBegin;
1596   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1597   PetscCall(PetscSFSetUp(sf));
1598   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1599   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1600   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1601   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1602   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1603   PetscFunctionReturn(PETSC_SUCCESS);
1604 }
1605 
1606 /*@C
1607   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1608 
1609   Collective
1610 
1611   Input Parameters:
1612 + sf        - star forest
1613 . unit      - data type
1614 . leafmtype - memory type of leafdata
1615 . leafdata  - values to reduce
1616 . rootmtype - memory type of rootdata
1617 - op        - reduction operation
1618 
1619   Output Parameter:
1620 . rootdata - result of reduction of values from all leaves of each root
1621 
1622   Level: intermediate
1623 
1624 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1625 @*/
1626 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1627 {
1628   PetscFunctionBegin;
1629   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1630   PetscCall(PetscSFSetUp(sf));
1631   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1632   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1633   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1634   PetscFunctionReturn(PETSC_SUCCESS);
1635 }
1636 
1637 /*@C
1638   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1639 
1640   Collective
1641 
1642   Input Parameters:
1643 + sf       - star forest
1644 . unit     - data type
1645 . leafdata - values to reduce
1646 - op       - reduction operation
1647 
1648   Output Parameter:
1649 . rootdata - result of reduction of values from all leaves of each root
1650 
1651   Level: intermediate
1652 
1653 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1654 @*/
1655 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1656 {
1657   PetscFunctionBegin;
1658   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1659   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1660   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1661   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 /*@C
1666   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1667   to be completed with `PetscSFFetchAndOpEnd()`
1668 
1669   Collective
1670 
1671   Input Parameters:
1672 + sf       - star forest
1673 . unit     - data type
1674 . leafdata - leaf values to use in reduction
1675 - op       - operation to use for reduction
1676 
1677   Output Parameters:
1678 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1679 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1680 
1681   Level: advanced
1682 
1683   Note:
1684   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1685   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1686   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1687   integers.
1688 
1689 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1690 @*/
1691 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1692 {
1693   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1694 
1695   PetscFunctionBegin;
1696   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1697   PetscCall(PetscSFSetUp(sf));
1698   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1699   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1700   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1701   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1702   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1703   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1704   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1705   PetscFunctionReturn(PETSC_SUCCESS);
1706 }
1707 
1708 /*@C
1709   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1710   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1711 
1712   Collective
1713 
1714   Input Parameters:
1715 + sf              - star forest
1716 . unit            - data type
1717 . rootmtype       - memory type of rootdata
1718 . leafmtype       - memory type of leafdata
1719 . leafdata        - leaf values to use in reduction
1720 . leafupdatemtype - memory type of leafupdate
1721 - op              - operation to use for reduction
1722 
1723   Output Parameters:
1724 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1725 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1726 
1727   Level: advanced
1728 
1729   Note:
1730   See `PetscSFFetchAndOpBegin()` for more details.
1731 
1732 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1733 @*/
1734 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1735 {
1736   PetscFunctionBegin;
1737   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1738   PetscCall(PetscSFSetUp(sf));
1739   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1740   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1741   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1742   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1743   PetscFunctionReturn(PETSC_SUCCESS);
1744 }
1745 
1746 /*@C
1747   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1748   to fetch values from roots and update atomically by applying operation using my leaf value
1749 
1750   Collective
1751 
1752   Input Parameters:
1753 + sf       - star forest
1754 . unit     - data type
1755 . leafdata - leaf values to use in reduction
1756 - op       - operation to use for reduction
1757 
1758   Output Parameters:
1759 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1760 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1761 
1762   Level: advanced
1763 
1764 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1765 @*/
1766 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1767 {
1768   PetscFunctionBegin;
1769   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1770   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1771   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1772   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1773   PetscFunctionReturn(PETSC_SUCCESS);
1774 }
1775 
1776 /*@C
1777   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1778 
1779   Collective
1780 
1781   Input Parameter:
1782 . sf - star forest
1783 
1784   Output Parameter:
1785 . degree - degree of each root vertex
1786 
1787   Level: advanced
1788 
1789   Note:
1790   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1791 
1792 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1793 @*/
1794 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1795 {
1796   PetscFunctionBegin;
1797   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1798   PetscSFCheckGraphSet(sf, 1);
1799   PetscAssertPointer(degree, 2);
1800   if (!sf->degreeknown) {
1801     PetscInt i, nroots = sf->nroots, maxlocal;
1802     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1803     maxlocal = sf->maxleaf - sf->minleaf + 1;
1804     PetscCall(PetscMalloc1(nroots, &sf->degree));
1805     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1806     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1807     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1808     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1809   }
1810   *degree = NULL;
1811   PetscFunctionReturn(PETSC_SUCCESS);
1812 }
1813 
1814 /*@C
1815   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1816 
1817   Collective
1818 
1819   Input Parameter:
1820 . sf - star forest
1821 
1822   Output Parameter:
1823 . degree - degree of each root vertex
1824 
1825   Level: developer
1826 
1827   Note:
1828   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1829 
1830 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1831 @*/
1832 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1833 {
1834   PetscFunctionBegin;
1835   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1836   PetscSFCheckGraphSet(sf, 1);
1837   PetscAssertPointer(degree, 2);
1838   if (!sf->degreeknown) {
1839     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1840     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1841     PetscCall(PetscFree(sf->degreetmp));
1842     sf->degreeknown = PETSC_TRUE;
1843   }
1844   *degree = sf->degree;
1845   PetscFunctionReturn(PETSC_SUCCESS);
1846 }
1847 
1848 /*@C
1849   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1850   Each multi-root is assigned index of the corresponding original root.
1851 
1852   Collective
1853 
1854   Input Parameters:
1855 + sf     - star forest
1856 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1857 
1858   Output Parameters:
1859 + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1860 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1861 
1862   Level: developer
1863 
1864   Note:
1865   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1866 
1867 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1868 @*/
1869 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1870 {
1871   PetscSF  msf;
1872   PetscInt k = 0, nroots, nmroots;
1873 
1874   PetscFunctionBegin;
1875   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1876   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1877   if (nroots) PetscAssertPointer(degree, 2);
1878   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1879   PetscAssertPointer(multiRootsOrigNumbering, 4);
1880   PetscCall(PetscSFGetMultiSF(sf, &msf));
1881   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1882   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1883   for (PetscInt i = 0; i < nroots; i++) {
1884     if (!degree[i]) continue;
1885     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1886   }
1887   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1888   if (nMultiRoots) *nMultiRoots = nmroots;
1889   PetscFunctionReturn(PETSC_SUCCESS);
1890 }
1891 
1892 /*@C
1893   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1894 
1895   Collective
1896 
1897   Input Parameters:
1898 + sf       - star forest
1899 . unit     - data type
1900 - leafdata - leaf data to gather to roots
1901 
1902   Output Parameter:
1903 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1904 
1905   Level: intermediate
1906 
1907 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1908 @*/
1909 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1910 {
1911   PetscSF multi = NULL;
1912 
1913   PetscFunctionBegin;
1914   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1915   PetscCall(PetscSFSetUp(sf));
1916   PetscCall(PetscSFGetMultiSF(sf, &multi));
1917   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1918   PetscFunctionReturn(PETSC_SUCCESS);
1919 }
1920 
1921 /*@C
1922   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1923 
1924   Collective
1925 
1926   Input Parameters:
1927 + sf       - star forest
1928 . unit     - data type
1929 - leafdata - leaf data to gather to roots
1930 
1931   Output Parameter:
1932 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1933 
1934   Level: intermediate
1935 
1936 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1937 @*/
1938 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1939 {
1940   PetscSF multi = NULL;
1941 
1942   PetscFunctionBegin;
1943   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1944   PetscCall(PetscSFGetMultiSF(sf, &multi));
1945   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1946   PetscFunctionReturn(PETSC_SUCCESS);
1947 }
1948 
1949 /*@C
1950   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1951 
1952   Collective
1953 
1954   Input Parameters:
1955 + sf            - star forest
1956 . unit          - data type
1957 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1958 
1959   Output Parameter:
1960 . leafdata - leaf data to be update with personal data from each respective root
1961 
1962   Level: intermediate
1963 
1964 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1965 @*/
1966 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1967 {
1968   PetscSF multi = NULL;
1969 
1970   PetscFunctionBegin;
1971   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1972   PetscCall(PetscSFSetUp(sf));
1973   PetscCall(PetscSFGetMultiSF(sf, &multi));
1974   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1975   PetscFunctionReturn(PETSC_SUCCESS);
1976 }
1977 
1978 /*@C
1979   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1980 
1981   Collective
1982 
1983   Input Parameters:
1984 + sf            - star forest
1985 . unit          - data type
1986 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1987 
1988   Output Parameter:
1989 . leafdata - leaf data to be update with personal data from each respective root
1990 
1991   Level: intermediate
1992 
1993 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1994 @*/
1995 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1996 {
1997   PetscSF multi = NULL;
1998 
1999   PetscFunctionBegin;
2000   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2001   PetscCall(PetscSFGetMultiSF(sf, &multi));
2002   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
2006 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2007 {
2008   PetscInt        i, n, nleaves;
2009   const PetscInt *ilocal = NULL;
2010   PetscHSetI      seen;
2011 
2012   PetscFunctionBegin;
2013   if (PetscDefined(USE_DEBUG)) {
2014     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2015     PetscCall(PetscHSetICreate(&seen));
2016     for (i = 0; i < nleaves; i++) {
2017       const PetscInt leaf = ilocal ? ilocal[i] : i;
2018       PetscCall(PetscHSetIAdd(seen, leaf));
2019     }
2020     PetscCall(PetscHSetIGetSize(seen, &n));
2021     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2022     PetscCall(PetscHSetIDestroy(&seen));
2023   }
2024   PetscFunctionReturn(PETSC_SUCCESS);
2025 }
2026 
2027 /*@
2028   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2029 
2030   Input Parameters:
2031 + sfA - The first `PetscSF`
2032 - sfB - The second `PetscSF`
2033 
2034   Output Parameter:
2035 . sfBA - The composite `PetscSF`
2036 
2037   Level: developer
2038 
2039   Notes:
2040   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2041   forests, i.e. the same leaf is not connected with different roots.
2042 
2043   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2044   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2045   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2046   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2047 
2048 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2049 @*/
2050 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2051 {
2052   const PetscSFNode *remotePointsA, *remotePointsB;
2053   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2054   const PetscInt    *localPointsA, *localPointsB;
2055   PetscInt          *localPointsBA;
2056   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2057   PetscBool          denseB;
2058 
2059   PetscFunctionBegin;
2060   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2061   PetscSFCheckGraphSet(sfA, 1);
2062   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2063   PetscSFCheckGraphSet(sfB, 2);
2064   PetscCheckSameComm(sfA, 1, sfB, 2);
2065   PetscAssertPointer(sfBA, 3);
2066   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2067   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2068 
2069   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2070   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2071   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2072      numRootsB; otherwise, garbage will be broadcasted.
2073      Example (comm size = 1):
2074      sfA: 0 <- (0, 0)
2075      sfB: 100 <- (0, 0)
2076           101 <- (0, 1)
2077      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2078      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2079      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2080      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2081   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2082   for (i = 0; i < numRootsB; i++) {
2083     reorderedRemotePointsA[i].rank  = -1;
2084     reorderedRemotePointsA[i].index = -1;
2085   }
2086   for (i = 0; i < numLeavesA; i++) {
2087     PetscInt localp = localPointsA ? localPointsA[i] : i;
2088 
2089     if (localp >= numRootsB) continue;
2090     reorderedRemotePointsA[localp] = remotePointsA[i];
2091   }
2092   remotePointsA = reorderedRemotePointsA;
2093   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2094   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2095   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2096     leafdataB[i].rank  = -1;
2097     leafdataB[i].index = -1;
2098   }
2099   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2100   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2101   PetscCall(PetscFree(reorderedRemotePointsA));
2102 
2103   denseB = (PetscBool)!localPointsB;
2104   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2105     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2106     else numLeavesBA++;
2107   }
2108   if (denseB) {
2109     localPointsBA  = NULL;
2110     remotePointsBA = leafdataB;
2111   } else {
2112     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2113     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2114     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2115       const PetscInt l = localPointsB ? localPointsB[i] : i;
2116 
2117       if (leafdataB[l - minleaf].rank == -1) continue;
2118       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2119       localPointsBA[numLeavesBA]  = l;
2120       numLeavesBA++;
2121     }
2122     PetscCall(PetscFree(leafdataB));
2123   }
2124   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2125   PetscCall(PetscSFSetFromOptions(*sfBA));
2126   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2127   PetscFunctionReturn(PETSC_SUCCESS);
2128 }
2129 
2130 /*@
2131   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2132 
2133   Input Parameters:
2134 + sfA - The first `PetscSF`
2135 - sfB - The second `PetscSF`
2136 
2137   Output Parameter:
2138 . sfBA - The composite `PetscSF`.
2139 
2140   Level: developer
2141 
2142   Notes:
2143   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2144   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2145   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2146 
2147   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2148   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2149   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2150   on `sfA`, then
2151   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2152 
2153 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2154 @*/
2155 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2156 {
2157   const PetscSFNode *remotePointsA, *remotePointsB;
2158   PetscSFNode       *remotePointsBA;
2159   const PetscInt    *localPointsA, *localPointsB;
2160   PetscSFNode       *reorderedRemotePointsA = NULL;
2161   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2162   MPI_Op             op;
2163 #if defined(PETSC_USE_64BIT_INDICES)
2164   PetscBool iswin;
2165 #endif
2166 
2167   PetscFunctionBegin;
2168   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2169   PetscSFCheckGraphSet(sfA, 1);
2170   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2171   PetscSFCheckGraphSet(sfB, 2);
2172   PetscCheckSameComm(sfA, 1, sfB, 2);
2173   PetscAssertPointer(sfBA, 3);
2174   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2175   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2176 
2177   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2178   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2179 
2180   /* TODO: Check roots of sfB have degree of 1 */
2181   /* Once we implement it, we can replace the MPI_MAXLOC
2182      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2183      We use MPI_MAXLOC only to have a deterministic output from this routine if
2184      the root condition is not meet.
2185    */
2186   op = MPI_MAXLOC;
2187 #if defined(PETSC_USE_64BIT_INDICES)
2188   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2189   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2190   if (iswin) op = MPI_REPLACE;
2191 #endif
2192 
2193   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2194   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2195   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2196     reorderedRemotePointsA[i].rank  = -1;
2197     reorderedRemotePointsA[i].index = -1;
2198   }
2199   if (localPointsA) {
2200     for (i = 0; i < numLeavesA; i++) {
2201       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2202       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2203     }
2204   } else {
2205     for (i = 0; i < numLeavesA; i++) {
2206       if (i > maxleaf || i < minleaf) continue;
2207       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2208     }
2209   }
2210 
2211   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2212   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2213   for (i = 0; i < numRootsB; i++) {
2214     remotePointsBA[i].rank  = -1;
2215     remotePointsBA[i].index = -1;
2216   }
2217 
2218   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2219   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2220   PetscCall(PetscFree(reorderedRemotePointsA));
2221   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2222     if (remotePointsBA[i].rank == -1) continue;
2223     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2224     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2225     localPointsBA[numLeavesBA]        = i;
2226     numLeavesBA++;
2227   }
2228   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2229   PetscCall(PetscSFSetFromOptions(*sfBA));
2230   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2231   PetscFunctionReturn(PETSC_SUCCESS);
2232 }
2233 
2234 /*
2235   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2236 
2237   Input Parameter:
2238 . sf - The global `PetscSF`
2239 
2240   Output Parameter:
2241 . out - The local `PetscSF`
2242 
2243 .seealso: `PetscSF`, `PetscSFCreate()`
2244  */
2245 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2246 {
2247   MPI_Comm           comm;
2248   PetscMPIInt        myrank;
2249   const PetscInt    *ilocal;
2250   const PetscSFNode *iremote;
2251   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2252   PetscSFNode       *liremote;
2253   PetscSF            lsf;
2254 
2255   PetscFunctionBegin;
2256   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2257   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2258   else {
2259     PetscMPIInt irank;
2260 
2261     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2262     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2263     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2264 
2265     /* Find out local edges and build a local SF */
2266     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2267     for (i = lnleaves = 0; i < nleaves; i++) {
2268       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2269       if (irank == myrank) lnleaves++;
2270     }
2271     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2272     PetscCall(PetscMalloc1(lnleaves, &liremote));
2273 
2274     for (i = j = 0; i < nleaves; i++) {
2275       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2276       if (irank == myrank) {
2277         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2278         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2279         liremote[j].index = iremote[i].index;
2280         j++;
2281       }
2282     }
2283     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2284     PetscCall(PetscSFSetFromOptions(lsf));
2285     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2286     PetscCall(PetscSFSetUp(lsf));
2287     *out = lsf;
2288   }
2289   PetscFunctionReturn(PETSC_SUCCESS);
2290 }
2291 
2292 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2293 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2294 {
2295   PetscMemType rootmtype, leafmtype;
2296 
2297   PetscFunctionBegin;
2298   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2299   PetscCall(PetscSFSetUp(sf));
2300   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2301   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2302   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2303   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2304   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2305   PetscFunctionReturn(PETSC_SUCCESS);
2306 }
2307 
2308 /*@
2309   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2310 
2311   Input Parameters:
2312 + comm        - the communicator
2313 . nsfs        - the number of input `PetscSF`
2314 . sfs         - the array of input `PetscSF`
2315 . rootMode    - the root mode specifying how roots are handled
2316 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2317 
2318   Output Parameter:
2319 . newsf - The resulting `PetscSF`
2320 
2321   Level: advanced
2322 
2323   Notes:
2324   The communicator of all `PetscSF`s in `sfs` must be comm.
2325 
2326   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2327 
2328   The offsets in `leafOffsets` are added to the original leaf indices.
2329 
2330   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2331   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2332 
2333   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2334   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2335 
2336   All root modes retain the essential connectivity condition.
2337   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2338   Parameter `rootMode` controls how the input root spaces are combined.
2339   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2340   and is also the same in the output `PetscSF`.
2341   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2342   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2343   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2344   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2345   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2346   the original root ranks are ignored.
2347   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2348   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2349   to keep the load balancing.
2350   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2351 
2352   Example:
2353   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2354 .vb
2355   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2356   for m in {local,global,shared}; do
2357     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2358   done
2359 .ve
2360   we generate two identical `PetscSF`s sf_0 and sf_1,
2361 .vb
2362   PetscSF Object: sf_0 2 MPI processes
2363     type: basic
2364     rank #leaves #roots
2365     [ 0]       4      2
2366     [ 1]       4      2
2367     leaves      roots       roots in global numbering
2368     ( 0,  0) <- ( 0,  0)  =   0
2369     ( 0,  1) <- ( 0,  1)  =   1
2370     ( 0,  2) <- ( 1,  0)  =   2
2371     ( 0,  3) <- ( 1,  1)  =   3
2372     ( 1,  0) <- ( 0,  0)  =   0
2373     ( 1,  1) <- ( 0,  1)  =   1
2374     ( 1,  2) <- ( 1,  0)  =   2
2375     ( 1,  3) <- ( 1,  1)  =   3
2376 .ve
2377   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2378 .vb
2379   rootMode = local:
2380   PetscSF Object: result_sf 2 MPI processes
2381     type: basic
2382     rank #leaves #roots
2383     [ 0]       8      4
2384     [ 1]       8      4
2385     leaves      roots       roots in global numbering
2386     ( 0,  0) <- ( 0,  0)  =   0
2387     ( 0,  1) <- ( 0,  1)  =   1
2388     ( 0,  2) <- ( 1,  0)  =   4
2389     ( 0,  3) <- ( 1,  1)  =   5
2390     ( 0,  4) <- ( 0,  2)  =   2
2391     ( 0,  5) <- ( 0,  3)  =   3
2392     ( 0,  6) <- ( 1,  2)  =   6
2393     ( 0,  7) <- ( 1,  3)  =   7
2394     ( 1,  0) <- ( 0,  0)  =   0
2395     ( 1,  1) <- ( 0,  1)  =   1
2396     ( 1,  2) <- ( 1,  0)  =   4
2397     ( 1,  3) <- ( 1,  1)  =   5
2398     ( 1,  4) <- ( 0,  2)  =   2
2399     ( 1,  5) <- ( 0,  3)  =   3
2400     ( 1,  6) <- ( 1,  2)  =   6
2401     ( 1,  7) <- ( 1,  3)  =   7
2402 
2403   rootMode = global:
2404   PetscSF Object: result_sf 2 MPI processes
2405     type: basic
2406     rank #leaves #roots
2407     [ 0]       8      4
2408     [ 1]       8      4
2409     leaves      roots       roots in global numbering
2410     ( 0,  0) <- ( 0,  0)  =   0
2411     ( 0,  1) <- ( 0,  1)  =   1
2412     ( 0,  2) <- ( 0,  2)  =   2
2413     ( 0,  3) <- ( 0,  3)  =   3
2414     ( 0,  4) <- ( 1,  0)  =   4
2415     ( 0,  5) <- ( 1,  1)  =   5
2416     ( 0,  6) <- ( 1,  2)  =   6
2417     ( 0,  7) <- ( 1,  3)  =   7
2418     ( 1,  0) <- ( 0,  0)  =   0
2419     ( 1,  1) <- ( 0,  1)  =   1
2420     ( 1,  2) <- ( 0,  2)  =   2
2421     ( 1,  3) <- ( 0,  3)  =   3
2422     ( 1,  4) <- ( 1,  0)  =   4
2423     ( 1,  5) <- ( 1,  1)  =   5
2424     ( 1,  6) <- ( 1,  2)  =   6
2425     ( 1,  7) <- ( 1,  3)  =   7
2426 
2427   rootMode = shared:
2428   PetscSF Object: result_sf 2 MPI processes
2429     type: basic
2430     rank #leaves #roots
2431     [ 0]       8      2
2432     [ 1]       8      2
2433     leaves      roots       roots in global numbering
2434     ( 0,  0) <- ( 0,  0)  =   0
2435     ( 0,  1) <- ( 0,  1)  =   1
2436     ( 0,  2) <- ( 1,  0)  =   2
2437     ( 0,  3) <- ( 1,  1)  =   3
2438     ( 0,  4) <- ( 0,  0)  =   0
2439     ( 0,  5) <- ( 0,  1)  =   1
2440     ( 0,  6) <- ( 1,  0)  =   2
2441     ( 0,  7) <- ( 1,  1)  =   3
2442     ( 1,  0) <- ( 0,  0)  =   0
2443     ( 1,  1) <- ( 0,  1)  =   1
2444     ( 1,  2) <- ( 1,  0)  =   2
2445     ( 1,  3) <- ( 1,  1)  =   3
2446     ( 1,  4) <- ( 0,  0)  =   0
2447     ( 1,  5) <- ( 0,  1)  =   1
2448     ( 1,  6) <- ( 1,  0)  =   2
2449     ( 1,  7) <- ( 1,  1)  =   3
2450 .ve
2451 
2452 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2453 @*/
2454 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2455 {
2456   PetscInt     i, s, nLeaves, nRoots;
2457   PetscInt    *leafArrayOffsets;
2458   PetscInt    *ilocal_new;
2459   PetscSFNode *iremote_new;
2460   PetscBool    all_ilocal_null = PETSC_FALSE;
2461   PetscLayout  glayout         = NULL;
2462   PetscInt    *gremote         = NULL;
2463   PetscMPIInt  rank, size;
2464 
2465   PetscFunctionBegin;
2466   if (PetscDefined(USE_DEBUG)) {
2467     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2468 
2469     PetscCall(PetscSFCreate(comm, &dummy));
2470     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2471     PetscAssertPointer(sfs, 3);
2472     for (i = 0; i < nsfs; i++) {
2473       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2474       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2475     }
2476     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
2477     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2478     PetscAssertPointer(newsf, 6);
2479     PetscCall(PetscSFDestroy(&dummy));
2480   }
2481   if (!nsfs) {
2482     PetscCall(PetscSFCreate(comm, newsf));
2483     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2484     PetscFunctionReturn(PETSC_SUCCESS);
2485   }
2486   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2487   PetscCallMPI(MPI_Comm_size(comm, &size));
2488 
2489   /* Calculate leaf array offsets */
2490   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2491   leafArrayOffsets[0] = 0;
2492   for (s = 0; s < nsfs; s++) {
2493     PetscInt nl;
2494 
2495     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2496     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2497   }
2498   nLeaves = leafArrayOffsets[nsfs];
2499 
2500   /* Calculate number of roots */
2501   switch (rootMode) {
2502   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2503     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2504     if (PetscDefined(USE_DEBUG)) {
2505       for (s = 1; s < nsfs; s++) {
2506         PetscInt nr;
2507 
2508         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2509         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2510       }
2511     }
2512   } break;
2513   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2514     /* Calculate also global layout in this case */
2515     PetscInt    *nls;
2516     PetscLayout *lts;
2517     PetscInt   **inds;
2518     PetscInt     j;
2519     PetscInt     rootOffset = 0;
2520 
2521     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2522     PetscCall(PetscLayoutCreate(comm, &glayout));
2523     glayout->bs = 1;
2524     glayout->n  = 0;
2525     glayout->N  = 0;
2526     for (s = 0; s < nsfs; s++) {
2527       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2528       glayout->n += lts[s]->n;
2529       glayout->N += lts[s]->N;
2530     }
2531     PetscCall(PetscLayoutSetUp(glayout));
2532     PetscCall(PetscMalloc1(nLeaves, &gremote));
2533     for (s = 0, j = 0; s < nsfs; s++) {
2534       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2535       rootOffset += lts[s]->N;
2536       PetscCall(PetscLayoutDestroy(&lts[s]));
2537       PetscCall(PetscFree(inds[s]));
2538     }
2539     PetscCall(PetscFree3(lts, nls, inds));
2540     nRoots = glayout->N;
2541   } break;
2542   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2543     /* nRoots calculated later in this case */
2544     break;
2545   default:
2546     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2547   }
2548 
2549   if (!leafOffsets) {
2550     all_ilocal_null = PETSC_TRUE;
2551     for (s = 0; s < nsfs; s++) {
2552       const PetscInt *ilocal;
2553 
2554       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2555       if (ilocal) {
2556         all_ilocal_null = PETSC_FALSE;
2557         break;
2558       }
2559     }
2560     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2561   }
2562 
2563   /* Renumber and concatenate local leaves */
2564   ilocal_new = NULL;
2565   if (!all_ilocal_null) {
2566     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2567     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2568     for (s = 0; s < nsfs; s++) {
2569       const PetscInt *ilocal;
2570       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2571       PetscInt        i, nleaves_l;
2572 
2573       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2574       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2575     }
2576   }
2577 
2578   /* Renumber and concatenate remote roots */
2579   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2580     PetscInt rootOffset = 0;
2581 
2582     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2583     for (i = 0; i < nLeaves; i++) {
2584       iremote_new[i].rank  = -1;
2585       iremote_new[i].index = -1;
2586     }
2587     for (s = 0; s < nsfs; s++) {
2588       PetscInt           i, nl, nr;
2589       PetscSF            tmp_sf;
2590       const PetscSFNode *iremote;
2591       PetscSFNode       *tmp_rootdata;
2592       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2593 
2594       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2595       PetscCall(PetscSFCreate(comm, &tmp_sf));
2596       /* create helper SF with contiguous leaves */
2597       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2598       PetscCall(PetscSFSetUp(tmp_sf));
2599       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2600       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2601         for (i = 0; i < nr; i++) {
2602           tmp_rootdata[i].index = i + rootOffset;
2603           tmp_rootdata[i].rank  = rank;
2604         }
2605         rootOffset += nr;
2606       } else {
2607         for (i = 0; i < nr; i++) {
2608           tmp_rootdata[i].index = i;
2609           tmp_rootdata[i].rank  = rank;
2610         }
2611       }
2612       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2613       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2614       PetscCall(PetscSFDestroy(&tmp_sf));
2615       PetscCall(PetscFree(tmp_rootdata));
2616     }
2617     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2618 
2619     /* Build the new SF */
2620     PetscCall(PetscSFCreate(comm, newsf));
2621     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2622   } else {
2623     /* Build the new SF */
2624     PetscCall(PetscSFCreate(comm, newsf));
2625     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2626   }
2627   PetscCall(PetscSFSetUp(*newsf));
2628   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2629   PetscCall(PetscLayoutDestroy(&glayout));
2630   PetscCall(PetscFree(gremote));
2631   PetscCall(PetscFree(leafArrayOffsets));
2632   PetscFunctionReturn(PETSC_SUCCESS);
2633 }
2634 
2635 /*@
2636   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
2637 
2638   Collective
2639 
2640   Input Parameters:
2641 + sf       - star forest
2642 . unit     - the data type contained within the root and leaf data
2643 . rootdata - root data that will be used for multiple PetscSF communications
2644 - leafdata - leaf data that will be used for multiple PetscSF communications
2645 
2646   Level: advanced
2647 
2648   Notes:
2649   Implementations of `PetscSF` can make optimizations
2650   for repeated communication using the same memory regions, but these optimizations
2651   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2652   The intended pattern is
2653 
2654 .vb
2655   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2656 
2657   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2658   // repeated use of rootdata and leafdata will now be optimized
2659 
2660   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2661   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2662   // ...
2663   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2664   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2665   // ... (other communications)
2666 
2667   // rootdata and leafdata must be deregistered before freeing
2668   // skipping this can lead to undefined behavior including
2669   // deadlocks
2670   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2671 
2672   // it is now safe to free rootdata and leafdata
2673   PetscFree2(rootdata, leafdata);
2674 .ve
2675 
2676   If you do not register `rootdata` and `leafdata` it will not cause an error,
2677   but optimizations that reduce the setup time for each communication cannot be
2678   made.  Currently, the only implementation of `PetscSF` that benefits from
2679   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2680   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2681 
2682 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2683 @*/
2684 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2685 {
2686   PetscFunctionBegin;
2687   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2688   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2689   PetscFunctionReturn(PETSC_SUCCESS);
2690 }
2691 
2692 /*@
2693   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
2694 
2695   Collective
2696 
2697   Input Parameters:
2698 + sf       - star forest
2699 . unit     - the data type contained within the root and leaf data
2700 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2701 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2702 
2703   Level: advanced
2704 
2705   Note:
2706   See `PetscSFRegisterPersistent()` for when/how to use this function.
2707 
2708 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2709 @*/
2710 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2711 {
2712   PetscFunctionBegin;
2713   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2714   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2715   PetscFunctionReturn(PETSC_SUCCESS);
2716 }
2717 
2718 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2719 {
2720   MPI_Aint lb, lb_true, bytes, bytes_true;
2721 
2722   PetscFunctionBegin;
2723   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2724   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2725   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2726   *size = bytes;
2727   PetscFunctionReturn(PETSC_SUCCESS);
2728 }
2729