xref: /petsc/src/vec/is/sf/interface/sf.c (revision bfe80ac4a46d58cb7760074b25f5e81b2f541d8a)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8   #include <petscdevice_cuda.h>
9 #endif
10 
11 #if defined(PETSC_HAVE_HIP)
12   #include <hip/hip_runtime.h>
13 #endif
14 
15 #if defined(PETSC_CLANG_STATIC_ANALYZER)
16 extern void PetscSFCheckGraphSet(PetscSF, int);
17 #else
18   #if defined(PETSC_USE_DEBUG)
19     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
20   #else
21     #define PetscSFCheckGraphSet(sf, arg) \
22       do { \
23       } while (0)
24   #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
29 
30 /*@
31   PetscSFCreate - create a star forest communication context
32 
33   Collective
34 
35   Input Parameter:
36 . comm - communicator on which the star forest will operate
37 
38   Output Parameter:
39 . sf - new star forest context
40 
41   Options Database Key:
42 + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
43 . -sf_type window                - Use MPI-3 one-sided window for communication
44 . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46 
47   Level: intermediate
48 
49   Note:
50   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
52   `SF`s are optimized and they have better performance than the general `SF`s.
53 
54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
55 @*/
56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57 {
58   PetscSF b;
59 
60   PetscFunctionBegin;
61   PetscAssertPointer(sf, 2);
62   PetscCall(PetscSFInitializePackage());
63 
64   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
65   b->nroots    = -1;
66   b->nleaves   = -1;
67   b->minleaf   = PETSC_INT_MAX;
68   b->maxleaf   = PETSC_INT_MIN;
69   b->nranks    = -1;
70   b->rankorder = PETSC_TRUE;
71   b->ingroup   = MPI_GROUP_NULL;
72   b->outgroup  = MPI_GROUP_NULL;
73   b->graphset  = PETSC_FALSE;
74 #if defined(PETSC_HAVE_DEVICE)
75   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
76   b->use_stream_aware_mpi = PETSC_FALSE;
77   b->unknown_input_stream = PETSC_FALSE;
78   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79   b->backend = PETSCSF_BACKEND_KOKKOS;
80   #elif defined(PETSC_HAVE_CUDA)
81   b->backend = PETSCSF_BACKEND_CUDA;
82   #elif defined(PETSC_HAVE_HIP)
83   b->backend = PETSCSF_BACKEND_HIP;
84   #endif
85 
86   #if defined(PETSC_HAVE_NVSHMEM)
87   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
88   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
90   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
91   #endif
92 #endif
93   b->vscat.from_n = -1;
94   b->vscat.to_n   = -1;
95   b->vscat.unit   = MPIU_SCALAR;
96   *sf             = b;
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 /*@
101   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
102 
103   Collective
104 
105   Input Parameter:
106 . sf - star forest
107 
108   Level: advanced
109 
110 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
111 @*/
112 PetscErrorCode PetscSFReset(PetscSF sf)
113 {
114   PetscFunctionBegin;
115   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
116   PetscTryTypeMethod(sf, Reset);
117   PetscCall(PetscSFDestroy(&sf->rankssf));
118 
119   sf->nroots   = -1;
120   sf->nleaves  = -1;
121   sf->minleaf  = PETSC_INT_MAX;
122   sf->maxleaf  = PETSC_INT_MIN;
123   sf->mine     = NULL;
124   sf->remote   = NULL;
125   sf->graphset = PETSC_FALSE;
126   PetscCall(PetscFree(sf->mine_alloc));
127   PetscCall(PetscFree(sf->remote_alloc));
128   sf->nranks = -1;
129   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
130   sf->degreeknown = PETSC_FALSE;
131   PetscCall(PetscFree(sf->degree));
132   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
133   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
134 
135   if (sf->multi) sf->multi->multi = NULL;
136   PetscCall(PetscSFDestroy(&sf->multi));
137 
138   PetscCall(PetscLayoutDestroy(&sf->map));
139 
140 #if defined(PETSC_HAVE_DEVICE)
141   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
142 #endif
143 
144   sf->setupcalled = PETSC_FALSE;
145   PetscFunctionReturn(PETSC_SUCCESS);
146 }
147 
148 /*@
149   PetscSFSetType - Set the `PetscSF` communication implementation
150 
151   Collective
152 
153   Input Parameters:
154 + sf   - the `PetscSF` context
155 - type - a known method
156 .vb
157     PETSCSFWINDOW - MPI-2/3 one-sided
158     PETSCSFBASIC - basic implementation using MPI-1 two-sided
159 .ve
160 
161   Options Database Key:
162 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
163 
164   Level: intermediate
165 
166   Notes:
167   See `PetscSFType` for possible values
168 
169 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
170 @*/
171 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
172 {
173   PetscBool match;
174   PetscErrorCode (*r)(PetscSF);
175 
176   PetscFunctionBegin;
177   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
178   PetscAssertPointer(type, 2);
179 
180   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
181   if (match) PetscFunctionReturn(PETSC_SUCCESS);
182 
183   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
184   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
185   /* Destroy the previous PetscSF implementation context */
186   PetscTryTypeMethod(sf, Destroy);
187   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
188   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
189   PetscCall((*r)(sf));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 /*@
194   PetscSFGetType - Get the `PetscSF` communication implementation
195 
196   Not Collective
197 
198   Input Parameter:
199 . sf - the `PetscSF` context
200 
201   Output Parameter:
202 . type - the `PetscSF` type name
203 
204   Level: intermediate
205 
206 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
207 @*/
208 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
209 {
210   PetscFunctionBegin;
211   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
212   PetscAssertPointer(type, 2);
213   *type = ((PetscObject)sf)->type_name;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 /*@
218   PetscSFDestroy - destroy a star forest
219 
220   Collective
221 
222   Input Parameter:
223 . sf - address of star forest
224 
225   Level: intermediate
226 
227 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
228 @*/
229 PetscErrorCode PetscSFDestroy(PetscSF *sf)
230 {
231   PetscFunctionBegin;
232   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
234   if (--((PetscObject)*sf)->refct > 0) {
235     *sf = NULL;
236     PetscFunctionReturn(PETSC_SUCCESS);
237   }
238   PetscCall(PetscSFReset(*sf));
239   PetscTryTypeMethod(*sf, Destroy);
240   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
241   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
242 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
243   if ((*sf)->use_stream_aware_mpi) {
244     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
245     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
246   }
247 #endif
248   PetscCall(PetscHeaderDestroy(sf));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
253 {
254   PetscInt           i, nleaves;
255   PetscMPIInt        size;
256   const PetscInt    *ilocal;
257   const PetscSFNode *iremote;
258 
259   PetscFunctionBegin;
260   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
261   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
262   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
263   for (i = 0; i < nleaves; i++) {
264     const PetscInt rank   = iremote[i].rank;
265     const PetscInt remote = iremote[i].index;
266     const PetscInt leaf   = ilocal ? ilocal[i] : i;
267     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
268     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
269     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
270   }
271   PetscFunctionReturn(PETSC_SUCCESS);
272 }
273 
274 /*@
275   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
276 
277   Collective
278 
279   Input Parameter:
280 . sf - star forest communication object
281 
282   Level: beginner
283 
284 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
285 @*/
286 PetscErrorCode PetscSFSetUp(PetscSF sf)
287 {
288   PetscFunctionBegin;
289   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
290   PetscSFCheckGraphSet(sf, 1);
291   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
292   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
293   PetscCall(PetscSFCheckGraphValid_Private(sf));
294   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
295   PetscTryTypeMethod(sf, SetUp);
296 #if defined(PETSC_HAVE_CUDA)
297   if (sf->backend == PETSCSF_BACKEND_CUDA) {
298     sf->ops->Malloc = PetscSFMalloc_CUDA;
299     sf->ops->Free   = PetscSFFree_CUDA;
300   }
301 #endif
302 #if defined(PETSC_HAVE_HIP)
303   if (sf->backend == PETSCSF_BACKEND_HIP) {
304     sf->ops->Malloc = PetscSFMalloc_HIP;
305     sf->ops->Free   = PetscSFFree_HIP;
306   }
307 #endif
308 
309 #if defined(PETSC_HAVE_KOKKOS)
310   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
311     sf->ops->Malloc = PetscSFMalloc_Kokkos;
312     sf->ops->Free   = PetscSFFree_Kokkos;
313   }
314 #endif
315   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
316   sf->setupcalled = PETSC_TRUE;
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 /*@
321   PetscSFSetFromOptions - set `PetscSF` options using the options database
322 
323   Logically Collective
324 
325   Input Parameter:
326 . sf - star forest
327 
328   Options Database Keys:
329 + -sf_type                      - implementation type, see `PetscSFSetType()`
330 . -sf_rank_order                - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
331 . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
332                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
333                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
334 . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
335                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
336 
337 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
338                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
339                                   the only available is kokkos.
340 
341   Level: intermediate
342 
343 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
344 @*/
345 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
346 {
347   PetscSFType deft;
348   char        type[256];
349   PetscBool   flg;
350 
351   PetscFunctionBegin;
352   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
353   PetscObjectOptionsBegin((PetscObject)sf);
354   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
355   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
356   PetscCall(PetscSFSetType(sf, flg ? type : deft));
357   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
358   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
359 #if defined(PETSC_HAVE_DEVICE)
360   {
361     char      backendstr[32] = {0};
362     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
363     /* Change the defaults set in PetscSFCreate() with command line options */
364     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
365     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
366     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
367     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
368     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
369     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
370   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
371     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
374     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
375   #elif defined(PETSC_HAVE_KOKKOS)
376     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
377   #endif
378 
379   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
380     if (sf->use_stream_aware_mpi) {
381       MPI_Info info;
382 
383       PetscCallMPI(MPI_Info_create(&info));
384       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
385       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
386       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
387       PetscCallMPI(MPI_Info_free(&info));
388       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
389     }
390   #endif
391   }
392 #endif
393   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
394   PetscOptionsEnd();
395   PetscFunctionReturn(PETSC_SUCCESS);
396 }
397 
398 /*@
399   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
400 
401   Logically Collective
402 
403   Input Parameters:
404 + sf  - star forest
405 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
406 
407   Level: advanced
408 
409 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
410 @*/
411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
412 {
413   PetscFunctionBegin;
414   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
415   PetscValidLogicalCollectiveBool(sf, flg, 2);
416   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
417   sf->rankorder = flg;
418   PetscFunctionReturn(PETSC_SUCCESS);
419 }
420 
421 /*@
422   PetscSFSetGraph - Set a parallel star forest
423 
424   Collective
425 
426   Input Parameters:
427 + sf         - star forest
428 . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
429 . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
430 . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
431                during setup in debug mode)
432 . localmode  - copy mode for `ilocal`
433 . iremote    - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves'
434                (locations must be >= 0, enforced during setup in debug mode)
435 - remotemode - copy mode for `iremote`
436 
437   Level: intermediate
438 
439   Notes:
440   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
441 
442   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
443   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
444   PETSc might modify the respective array;
445   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
446   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
447 
448   Fortran Notes:
449   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
450 
451   Developer Notes:
452   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
453   This also allows to compare leaf sets of two `PetscSF`s easily.
454 
455 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
456 @*/
457 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
458 {
459   PetscBool unique, contiguous;
460 
461   PetscFunctionBegin;
462   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
463   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
464   if (nleaves > 0) PetscAssertPointer(iremote, 6);
465   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
466   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
467   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
468    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
469   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
470   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
471 
472   if (sf->nroots >= 0) { /* Reset only if graph already set */
473     PetscCall(PetscSFReset(sf));
474   }
475 
476   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
477   if (PetscDefined(USE_DEBUG)) {
478     PetscMPIInt size;
479 
480     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
481     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
482   }
483 
484   sf->nroots  = nroots;
485   sf->nleaves = nleaves;
486 
487   if (localmode == PETSC_COPY_VALUES && ilocal) {
488     PetscInt *tlocal = NULL;
489 
490     PetscCall(PetscMalloc1(nleaves, &tlocal));
491     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
492     ilocal = tlocal;
493   }
494   if (remotemode == PETSC_COPY_VALUES) {
495     PetscSFNode *tremote = NULL;
496 
497     PetscCall(PetscMalloc1(nleaves, &tremote));
498     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
499     iremote = tremote;
500   }
501 
502   if (nleaves && ilocal) {
503     PetscSFNode work;
504 
505     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
506     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
507     unique = PetscNot(unique);
508     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
509     sf->minleaf = ilocal[0];
510     sf->maxleaf = ilocal[nleaves - 1];
511     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
512   } else {
513     sf->minleaf = 0;
514     sf->maxleaf = nleaves - 1;
515     unique      = PETSC_TRUE;
516     contiguous  = PETSC_TRUE;
517   }
518 
519   if (contiguous) {
520     if (localmode == PETSC_USE_POINTER) {
521       ilocal = NULL;
522     } else {
523       PetscCall(PetscFree(ilocal));
524     }
525   }
526   sf->mine = ilocal;
527   if (localmode == PETSC_USE_POINTER) {
528     sf->mine_alloc = NULL;
529   } else {
530     sf->mine_alloc = ilocal;
531   }
532   if (PetscDefined(USE_DEBUG)) {
533     PetscMPIInt size;
534 
535     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
536     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
537   }
538   sf->remote = iremote;
539   if (remotemode == PETSC_USE_POINTER) {
540     sf->remote_alloc = NULL;
541   } else {
542     sf->remote_alloc = iremote;
543   }
544   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
545   sf->graphset = PETSC_TRUE;
546   PetscFunctionReturn(PETSC_SUCCESS);
547 }
548 
549 /*@
550   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
551 
552   Collective
553 
554   Input Parameters:
555 + sf      - The `PetscSF`
556 . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
557 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
558 
559   Level: intermediate
560 
561   Notes:
562   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
563   `n` and `N` are the local and global sizes of `x` respectively.
564 
565   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
566   sequential vectors `y` on all MPI processes.
567 
568   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
569   sequential vector `y` on rank 0.
570 
571   In above cases, entries of `x` are roots and entries of `y` are leaves.
572 
573   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
574   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
575   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
576   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
577   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
578 
579   In this case, roots and leaves are symmetric.
580 
581 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
582  @*/
583 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
584 {
585   MPI_Comm    comm;
586   PetscInt    n, N, res[2];
587   PetscMPIInt rank, size;
588   PetscSFType type;
589 
590   PetscFunctionBegin;
591   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
592   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
593   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
594   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
595   PetscCallMPI(MPI_Comm_rank(comm, &rank));
596   PetscCallMPI(MPI_Comm_size(comm, &size));
597 
598   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
599     PetscInt sizei = size;
600 
601     type = PETSCSFALLTOALL;
602     PetscCall(PetscLayoutCreate(comm, &sf->map));
603     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
604     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
605     PetscCall(PetscLayoutSetUp(sf->map));
606   } else {
607     PetscCall(PetscLayoutGetLocalSize(map, &n));
608     PetscCall(PetscLayoutGetSize(map, &N));
609     res[0] = n;
610     res[1] = -n;
611     /* Check if n are same over all ranks so that we can optimize it */
612     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
613     if (res[0] == -res[1]) { /* same n */
614       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
615     } else {
616       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
617     }
618     PetscCall(PetscLayoutReference(map, &sf->map));
619   }
620   PetscCall(PetscSFSetType(sf, type));
621 
622   sf->pattern = pattern;
623   sf->mine    = NULL; /* Contiguous */
624 
625   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
626      Also set other easy stuff.
627    */
628   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
629     sf->nleaves = N;
630     sf->nroots  = n;
631     sf->nranks  = size;
632     sf->minleaf = 0;
633     sf->maxleaf = N - 1;
634   } else if (pattern == PETSCSF_PATTERN_GATHER) {
635     sf->nleaves = rank ? 0 : N;
636     sf->nroots  = n;
637     sf->nranks  = rank ? 0 : size;
638     sf->minleaf = 0;
639     sf->maxleaf = rank ? -1 : N - 1;
640   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
641     sf->nleaves = size;
642     sf->nroots  = size;
643     sf->nranks  = size;
644     sf->minleaf = 0;
645     sf->maxleaf = size - 1;
646   }
647   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
648   sf->graphset = PETSC_TRUE;
649   PetscFunctionReturn(PETSC_SUCCESS);
650 }
651 
652 /*@
653   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
654 
655   Collective
656 
657   Input Parameter:
658 . sf - star forest to invert
659 
660   Output Parameter:
661 . isf - inverse of `sf`
662 
663   Level: advanced
664 
665   Notes:
666   All roots must have degree 1.
667 
668   The local space may be a permutation, but cannot be sparse.
669 
670 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
671 @*/
672 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
673 {
674   PetscMPIInt     rank;
675   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
676   const PetscInt *ilocal;
677   PetscSFNode    *roots, *leaves;
678 
679   PetscFunctionBegin;
680   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
681   PetscSFCheckGraphSet(sf, 1);
682   PetscAssertPointer(isf, 2);
683 
684   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
685   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
686 
687   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
688   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
689   for (i = 0; i < maxlocal; i++) {
690     leaves[i].rank  = rank;
691     leaves[i].index = i;
692   }
693   for (i = 0; i < nroots; i++) {
694     roots[i].rank  = -1;
695     roots[i].index = -1;
696   }
697   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
698   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
699 
700   /* Check whether our leaves are sparse */
701   for (i = 0, count = 0; i < nroots; i++)
702     if (roots[i].rank >= 0) count++;
703   if (count == nroots) newilocal = NULL;
704   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
705     for (i = 0, count = 0; i < nroots; i++) {
706       if (roots[i].rank >= 0) {
707         newilocal[count]   = i;
708         roots[count].rank  = roots[i].rank;
709         roots[count].index = roots[i].index;
710         count++;
711       }
712     }
713   }
714 
715   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
716   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
717   PetscCall(PetscFree2(roots, leaves));
718   PetscFunctionReturn(PETSC_SUCCESS);
719 }
720 
721 /*@
722   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
723 
724   Collective
725 
726   Input Parameters:
727 + sf  - communication object to duplicate
728 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
729 
730   Output Parameter:
731 . newsf - new communication object
732 
733   Level: beginner
734 
735 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
736 @*/
737 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
738 {
739   PetscSFType  type;
740   MPI_Datatype dtype = MPIU_SCALAR;
741 
742   PetscFunctionBegin;
743   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
744   PetscValidLogicalCollectiveEnum(sf, opt, 2);
745   PetscAssertPointer(newsf, 3);
746   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
747   PetscCall(PetscSFGetType(sf, &type));
748   if (type) PetscCall(PetscSFSetType(*newsf, type));
749   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
750   if (opt == PETSCSF_DUPLICATE_GRAPH) {
751     PetscSFCheckGraphSet(sf, 1);
752     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
753       PetscInt           nroots, nleaves;
754       const PetscInt    *ilocal;
755       const PetscSFNode *iremote;
756       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
757       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
758     } else {
759       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
760     }
761   }
762   /* Since oldtype is committed, so is newtype, according to MPI */
763   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
764   (*newsf)->vscat.bs     = sf->vscat.bs;
765   (*newsf)->vscat.unit   = dtype;
766   (*newsf)->vscat.to_n   = sf->vscat.to_n;
767   (*newsf)->vscat.from_n = sf->vscat.from_n;
768   /* Do not copy lsf. Build it on demand since it is rarely used */
769 
770 #if defined(PETSC_HAVE_DEVICE)
771   (*newsf)->backend              = sf->backend;
772   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
773   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
774   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
775 #endif
776   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
777   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
778   PetscFunctionReturn(PETSC_SUCCESS);
779 }
780 
781 /*@C
782   PetscSFGetGraph - Get the graph specifying a parallel star forest
783 
784   Not Collective
785 
786   Input Parameter:
787 . sf - star forest
788 
789   Output Parameters:
790 + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
791 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
792 . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
793 - iremote - remote locations of root vertices for each leaf on the current process
794 
795   Level: intermediate
796 
797   Notes:
798   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
799 
800   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
801 
802   Fortran Note:
803   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
804 
805 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
806 @*/
807 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
808 {
809   PetscFunctionBegin;
810   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
811   if (sf->ops->GetGraph) {
812     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
813   } else {
814     if (nroots) *nroots = sf->nroots;
815     if (nleaves) *nleaves = sf->nleaves;
816     if (ilocal) *ilocal = sf->mine;
817     if (iremote) *iremote = sf->remote;
818   }
819   PetscFunctionReturn(PETSC_SUCCESS);
820 }
821 
822 /*@
823   PetscSFGetLeafRange - Get the active leaf ranges
824 
825   Not Collective
826 
827   Input Parameter:
828 . sf - star forest
829 
830   Output Parameters:
831 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
832 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
833 
834   Level: developer
835 
836 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
837 @*/
838 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
839 {
840   PetscFunctionBegin;
841   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
842   PetscSFCheckGraphSet(sf, 1);
843   if (minleaf) *minleaf = sf->minleaf;
844   if (maxleaf) *maxleaf = sf->maxleaf;
845   PetscFunctionReturn(PETSC_SUCCESS);
846 }
847 
848 /*@
849   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
850 
851   Collective
852 
853   Input Parameters:
854 + A    - the star forest
855 . obj  - Optional object that provides the prefix for the option names
856 - name - command line option
857 
858   Level: intermediate
859 
860   Note:
861   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
862 
863 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
864 @*/
865 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
866 {
867   PetscFunctionBegin;
868   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
869   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
870   PetscFunctionReturn(PETSC_SUCCESS);
871 }
872 
873 /*@
874   PetscSFView - view a star forest
875 
876   Collective
877 
878   Input Parameters:
879 + sf     - star forest
880 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
881 
882   Level: beginner
883 
884 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
885 @*/
886 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
887 {
888   PetscBool         iascii;
889   PetscViewerFormat format;
890 
891   PetscFunctionBegin;
892   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
893   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
894   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
895   PetscCheckSameComm(sf, 1, viewer, 2);
896   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
897   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
898   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
899     PetscMPIInt rank;
900     PetscInt    j;
901 
902     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
903     PetscCall(PetscViewerASCIIPushTab(viewer));
904     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
905       if (!sf->graphset) {
906         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
907         PetscCall(PetscViewerASCIIPopTab(viewer));
908         PetscFunctionReturn(PETSC_SUCCESS);
909       }
910       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
911       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
912       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
913       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
914       PetscCall(PetscViewerFlush(viewer));
915       PetscCall(PetscViewerGetFormat(viewer, &format));
916       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
917         PetscMPIInt *tmpranks, *perm;
918 
919         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
920         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
921         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
922         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
923         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
924         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
925           PetscMPIInt i = perm[ii];
926 
927           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
928           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
929         }
930         PetscCall(PetscFree2(tmpranks, perm));
931       }
932       PetscCall(PetscViewerFlush(viewer));
933       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
934     }
935     PetscCall(PetscViewerASCIIPopTab(viewer));
936   }
937   PetscTryTypeMethod(sf, View, viewer);
938   PetscFunctionReturn(PETSC_SUCCESS);
939 }
940 
941 /*@C
942   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
943 
944   Not Collective
945 
946   Input Parameter:
947 . sf - star forest
948 
949   Output Parameters:
950 + nranks  - number of ranks referenced by local part
951 . ranks   - [`nranks`] array of ranks
952 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
953 . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
954 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
955 
956   Level: developer
957 
958 .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
959 @*/
960 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
961 {
962   PetscFunctionBegin;
963   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
964   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
965   if (sf->ops->GetRootRanks) {
966     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
967   } else {
968     /* The generic implementation */
969     if (nranks) *nranks = sf->nranks;
970     if (ranks) *ranks = sf->ranks;
971     if (roffset) *roffset = sf->roffset;
972     if (rmine) *rmine = sf->rmine;
973     if (rremote) *rremote = sf->rremote;
974   }
975   PetscFunctionReturn(PETSC_SUCCESS);
976 }
977 
978 /*@C
979   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
980 
981   Not Collective
982 
983   Input Parameter:
984 . sf - star forest
985 
986   Output Parameters:
987 + niranks  - number of leaf ranks referencing roots on this process
988 . iranks   - [`niranks`] array of ranks
989 . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
990 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
991 
992   Level: developer
993 
994 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
995 @*/
996 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
997 {
998   PetscFunctionBegin;
999   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1000   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
1001   if (sf->ops->GetLeafRanks) {
1002     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1003   } else {
1004     PetscSFType type;
1005     PetscCall(PetscSFGetType(sf, &type));
1006     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1007   }
1008   PetscFunctionReturn(PETSC_SUCCESS);
1009 }
1010 
1011 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1012 {
1013   PetscInt i;
1014   for (i = 0; i < n; i++) {
1015     if (needle == list[i]) return PETSC_TRUE;
1016   }
1017   return PETSC_FALSE;
1018 }
1019 
1020 /*@C
1021   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1022 
1023   Collective
1024 
1025   Input Parameters:
1026 + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1027 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1028 
1029   Level: developer
1030 
1031 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1032 @*/
1033 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1034 {
1035   PetscHMapI    table;
1036   PetscHashIter pos;
1037   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1038   PetscInt     *rcount;
1039   PetscInt      irank, sfnrank, ranksi;
1040   PetscMPIInt   i, orank = -1;
1041 
1042   PetscFunctionBegin;
1043   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1044   PetscSFCheckGraphSet(sf, 1);
1045   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1046   PetscCall(PetscHMapICreateWithSize(10, &table));
1047   for (i = 0; i < sf->nleaves; i++) {
1048     /* Log 1-based rank */
1049     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1050   }
1051   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1052   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1053   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1054   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1055   PetscHashIterBegin(table, pos);
1056   for (i = 0; i < sf->nranks; i++) {
1057     PetscHashIterGetKey(table, pos, ranksi);
1058     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1059     PetscHashIterGetVal(table, pos, rcount[i]);
1060     PetscHashIterNext(table, pos);
1061     ranks[i]--; /* Convert back to 0-based */
1062   }
1063   PetscCall(PetscHMapIDestroy(&table));
1064 
1065   /* We expect that dgroup is reliably "small" while nranks could be large */
1066   {
1067     MPI_Group    group = MPI_GROUP_NULL;
1068     PetscMPIInt *dgroupranks;
1069 
1070     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1071     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1072     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1073     PetscCall(PetscMalloc1(groupsize, &groupranks));
1074     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1075     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1076     PetscCallMPI(MPI_Group_free(&group));
1077     PetscCall(PetscFree(dgroupranks));
1078   }
1079 
1080   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1081   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1082     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1083       if (InList(ranks[i], groupsize, groupranks)) break;
1084     }
1085     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1086       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1087     }
1088     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1089       PetscMPIInt tmprank;
1090       PetscInt    tmpcount;
1091 
1092       tmprank             = ranks[i];
1093       tmpcount            = rcount[i];
1094       ranks[i]            = ranks[sf->ndranks];
1095       rcount[i]           = rcount[sf->ndranks];
1096       ranks[sf->ndranks]  = tmprank;
1097       rcount[sf->ndranks] = tmpcount;
1098       sf->ndranks++;
1099     }
1100   }
1101   PetscCall(PetscFree(groupranks));
1102   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1103   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1104   sf->roffset[0] = 0;
1105   for (i = 0; i < sf->nranks; i++) {
1106     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1107     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1108     rcount[i]          = 0;
1109   }
1110   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1111     /* short circuit */
1112     if (orank != sf->remote[i].rank) {
1113       /* Search for index of iremote[i].rank in sf->ranks */
1114       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1115       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1116       if (irank < 0) {
1117         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1118         if (irank >= 0) irank += sf->ndranks;
1119       }
1120     }
1121     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1122     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1123     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1124     rcount[irank]++;
1125   }
1126   PetscCall(PetscFree2(rcount, ranks));
1127   PetscFunctionReturn(PETSC_SUCCESS);
1128 }
1129 
1130 /*@C
1131   PetscSFGetGroups - gets incoming and outgoing process groups
1132 
1133   Collective
1134 
1135   Input Parameter:
1136 . sf - star forest
1137 
1138   Output Parameters:
1139 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1140 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1141 
1142   Level: developer
1143 
1144 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1145 @*/
1146 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1147 {
1148   MPI_Group group = MPI_GROUP_NULL;
1149 
1150   PetscFunctionBegin;
1151   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1152   if (sf->ingroup == MPI_GROUP_NULL) {
1153     PetscInt        i;
1154     const PetscInt *indegree;
1155     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1156     PetscSFNode    *remote;
1157     PetscSF         bgcount;
1158 
1159     /* Compute the number of incoming ranks */
1160     PetscCall(PetscMalloc1(sf->nranks, &remote));
1161     for (i = 0; i < sf->nranks; i++) {
1162       remote[i].rank  = sf->ranks[i];
1163       remote[i].index = 0;
1164     }
1165     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1166     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1167     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1168     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1169     /* Enumerate the incoming ranks */
1170     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1171     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1172     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1173     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1174     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1175     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1176     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1177     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1178     PetscCallMPI(MPI_Group_free(&group));
1179     PetscCall(PetscFree2(inranks, outranks));
1180     PetscCall(PetscSFDestroy(&bgcount));
1181   }
1182   *incoming = sf->ingroup;
1183 
1184   if (sf->outgroup == MPI_GROUP_NULL) {
1185     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1186     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1187     PetscCallMPI(MPI_Group_free(&group));
1188   }
1189   *outgoing = sf->outgroup;
1190   PetscFunctionReturn(PETSC_SUCCESS);
1191 }
1192 
1193 /*@
1194   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1195 
1196   Collective
1197 
1198   Input Parameter:
1199 . sf - star forest
1200 
1201   Output Parameter:
1202 . rsf - the star forest with a single root per process to perform communications
1203 
1204   Level: developer
1205 
1206 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1207 @*/
1208 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1209 {
1210   PetscFunctionBegin;
1211   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1212   PetscAssertPointer(rsf, 2);
1213   if (!sf->rankssf) {
1214     PetscSFNode       *rremotes;
1215     const PetscMPIInt *ranks;
1216     PetscMPIInt        nranks;
1217 
1218     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1219     PetscCall(PetscMalloc1(nranks, &rremotes));
1220     for (PetscInt i = 0; i < nranks; i++) {
1221       rremotes[i].rank  = ranks[i];
1222       rremotes[i].index = 0;
1223     }
1224     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1225     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1226   }
1227   *rsf = sf->rankssf;
1228   PetscFunctionReturn(PETSC_SUCCESS);
1229 }
1230 
1231 /*@
1232   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1233 
1234   Collective
1235 
1236   Input Parameter:
1237 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1238 
1239   Output Parameter:
1240 . multi - star forest with split roots, such that each root has degree exactly 1
1241 
1242   Level: developer
1243 
1244   Note:
1245   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1246   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1247   edge, it is a candidate for future optimization that might involve its removal.
1248 
1249 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1250 @*/
1251 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1252 {
1253   PetscFunctionBegin;
1254   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1255   PetscAssertPointer(multi, 2);
1256   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1257     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1258     *multi           = sf->multi;
1259     sf->multi->multi = sf->multi;
1260     PetscFunctionReturn(PETSC_SUCCESS);
1261   }
1262   if (!sf->multi) {
1263     const PetscInt *indegree;
1264     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1265     PetscSFNode    *remote;
1266     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1267     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1268     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1269     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1270     inoffset[0] = 0;
1271     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1272     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1273     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1274     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1275     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1276     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1277       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1278     }
1279     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1280     for (i = 0; i < sf->nleaves; i++) {
1281       remote[i].rank  = sf->remote[i].rank;
1282       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1283     }
1284     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1285     sf->multi->multi = sf->multi;
1286     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1287     if (sf->rankorder) { /* Sort the ranks */
1288       PetscMPIInt  rank;
1289       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1290       PetscSFNode *newremote;
1291       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1292       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1293       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1294       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1295       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1296       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1297       /* Sort the incoming ranks at each vertex, build the inverse map */
1298       for (i = 0; i < sf->nroots; i++) {
1299         PetscInt j;
1300         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1301         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1302         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1303       }
1304       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1305       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1306       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1307       for (i = 0; i < sf->nleaves; i++) {
1308         newremote[i].rank  = sf->remote[i].rank;
1309         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1310       }
1311       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1312       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1313     }
1314     PetscCall(PetscFree3(inoffset, outones, outoffset));
1315   }
1316   *multi = sf->multi;
1317   PetscFunctionReturn(PETSC_SUCCESS);
1318 }
1319 
1320 /*@C
1321   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1322 
1323   Collective
1324 
1325   Input Parameters:
1326 + sf        - original star forest
1327 . nselected - number of selected roots on this process
1328 - selected  - indices of the selected roots on this process
1329 
1330   Output Parameter:
1331 . esf - new star forest
1332 
1333   Level: advanced
1334 
1335   Note:
1336   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1337   be done by calling PetscSFGetGraph().
1338 
1339 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1340 @*/
1341 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1342 {
1343   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1344   const PetscInt    *ilocal;
1345   signed char       *rootdata, *leafdata, *leafmem;
1346   const PetscSFNode *iremote;
1347   PetscSFNode       *new_iremote;
1348   MPI_Comm           comm;
1349 
1350   PetscFunctionBegin;
1351   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1352   PetscSFCheckGraphSet(sf, 1);
1353   if (nselected) PetscAssertPointer(selected, 3);
1354   PetscAssertPointer(esf, 4);
1355 
1356   PetscCall(PetscSFSetUp(sf));
1357   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1358   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1359   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1360 
1361   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1362     PetscBool dups;
1363     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1364     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1365     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1366   }
1367 
1368   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1369   else {
1370     /* A generic version of creating embedded sf */
1371     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1372     maxlocal = maxleaf - minleaf + 1;
1373     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1374     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1375     /* Tag selected roots and bcast to leaves */
1376     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1377     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1378     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1379 
1380     /* Build esf with leaves that are still connected */
1381     esf_nleaves = 0;
1382     for (i = 0; i < nleaves; i++) {
1383       j = ilocal ? ilocal[i] : i;
1384       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1385          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1386       */
1387       esf_nleaves += (leafdata[j] ? 1 : 0);
1388     }
1389     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1390     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1391     for (i = n = 0; i < nleaves; i++) {
1392       j = ilocal ? ilocal[i] : i;
1393       if (leafdata[j]) {
1394         new_ilocal[n]        = j;
1395         new_iremote[n].rank  = iremote[i].rank;
1396         new_iremote[n].index = iremote[i].index;
1397         ++n;
1398       }
1399     }
1400     PetscCall(PetscSFCreate(comm, esf));
1401     PetscCall(PetscSFSetFromOptions(*esf));
1402     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1403     PetscCall(PetscFree2(rootdata, leafmem));
1404   }
1405   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1406   PetscFunctionReturn(PETSC_SUCCESS);
1407 }
1408 
1409 /*@C
1410   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1411 
1412   Collective
1413 
1414   Input Parameters:
1415 + sf        - original star forest
1416 . nselected - number of selected leaves on this process
1417 - selected  - indices of the selected leaves on this process
1418 
1419   Output Parameter:
1420 . newsf - new star forest
1421 
1422   Level: advanced
1423 
1424 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1425 @*/
1426 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1427 {
1428   const PetscSFNode *iremote;
1429   PetscSFNode       *new_iremote;
1430   const PetscInt    *ilocal;
1431   PetscInt           i, nroots, *leaves, *new_ilocal;
1432   MPI_Comm           comm;
1433 
1434   PetscFunctionBegin;
1435   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1436   PetscSFCheckGraphSet(sf, 1);
1437   if (nselected) PetscAssertPointer(selected, 3);
1438   PetscAssertPointer(newsf, 4);
1439 
1440   /* Uniq selected[] and put results in leaves[] */
1441   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1442   PetscCall(PetscMalloc1(nselected, &leaves));
1443   PetscCall(PetscArraycpy(leaves, selected, nselected));
1444   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1445   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1446 
1447   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1448   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1449   else {
1450     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1451     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1452     PetscCall(PetscMalloc1(nselected, &new_iremote));
1453     for (i = 0; i < nselected; ++i) {
1454       const PetscInt l     = leaves[i];
1455       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1456       new_iremote[i].rank  = iremote[l].rank;
1457       new_iremote[i].index = iremote[l].index;
1458     }
1459     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1460     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1461   }
1462   PetscCall(PetscFree(leaves));
1463   PetscFunctionReturn(PETSC_SUCCESS);
1464 }
1465 
1466 /*@C
1467   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1468 
1469   Collective
1470 
1471   Input Parameters:
1472 + sf       - star forest on which to communicate
1473 . unit     - data type associated with each node
1474 . rootdata - buffer to broadcast
1475 - op       - operation to use for reduction
1476 
1477   Output Parameter:
1478 . leafdata - buffer to be reduced with values from each leaf's respective root
1479 
1480   Level: intermediate
1481 
1482   Note:
1483   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1484   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1485   use `PetscSFBcastWithMemTypeBegin()` instead.
1486 
1487 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1488 @*/
1489 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1490 {
1491   PetscMemType rootmtype, leafmtype;
1492 
1493   PetscFunctionBegin;
1494   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1495   PetscCall(PetscSFSetUp(sf));
1496   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1497   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1498   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1499   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1500   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1501   PetscFunctionReturn(PETSC_SUCCESS);
1502 }
1503 
1504 /*@C
1505   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1506   to `PetscSFBcastEnd()`
1507 
1508   Collective
1509 
1510   Input Parameters:
1511 + sf        - star forest on which to communicate
1512 . unit      - data type associated with each node
1513 . rootmtype - memory type of rootdata
1514 . rootdata  - buffer to broadcast
1515 . leafmtype - memory type of leafdata
1516 - op        - operation to use for reduction
1517 
1518   Output Parameter:
1519 . leafdata - buffer to be reduced with values from each leaf's respective root
1520 
1521   Level: intermediate
1522 
1523 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1524 @*/
1525 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1526 {
1527   PetscFunctionBegin;
1528   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1529   PetscCall(PetscSFSetUp(sf));
1530   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1531   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1532   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1533   PetscFunctionReturn(PETSC_SUCCESS);
1534 }
1535 
1536 /*@C
1537   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1538 
1539   Collective
1540 
1541   Input Parameters:
1542 + sf       - star forest
1543 . unit     - data type
1544 . rootdata - buffer to broadcast
1545 - op       - operation to use for reduction
1546 
1547   Output Parameter:
1548 . leafdata - buffer to be reduced with values from each leaf's respective root
1549 
1550   Level: intermediate
1551 
1552 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1553 @*/
1554 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1555 {
1556   PetscFunctionBegin;
1557   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1558   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1559   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1560   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1561   PetscFunctionReturn(PETSC_SUCCESS);
1562 }
1563 
1564 /*@C
1565   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1566 
1567   Collective
1568 
1569   Input Parameters:
1570 + sf       - star forest
1571 . unit     - data type
1572 . leafdata - values to reduce
1573 - op       - reduction operation
1574 
1575   Output Parameter:
1576 . rootdata - result of reduction of values from all leaves of each root
1577 
1578   Level: intermediate
1579 
1580   Note:
1581   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1582   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1583   use `PetscSFReduceWithMemTypeBegin()` instead.
1584 
1585 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1586 @*/
1587 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1588 {
1589   PetscMemType rootmtype, leafmtype;
1590 
1591   PetscFunctionBegin;
1592   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1593   PetscCall(PetscSFSetUp(sf));
1594   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1595   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1596   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1597   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1598   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 /*@C
1603   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1604 
1605   Collective
1606 
1607   Input Parameters:
1608 + sf        - star forest
1609 . unit      - data type
1610 . leafmtype - memory type of leafdata
1611 . leafdata  - values to reduce
1612 . rootmtype - memory type of rootdata
1613 - op        - reduction operation
1614 
1615   Output Parameter:
1616 . rootdata - result of reduction of values from all leaves of each root
1617 
1618   Level: intermediate
1619 
1620 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1621 @*/
1622 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1623 {
1624   PetscFunctionBegin;
1625   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1626   PetscCall(PetscSFSetUp(sf));
1627   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1628   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1629   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1630   PetscFunctionReturn(PETSC_SUCCESS);
1631 }
1632 
1633 /*@C
1634   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1635 
1636   Collective
1637 
1638   Input Parameters:
1639 + sf       - star forest
1640 . unit     - data type
1641 . leafdata - values to reduce
1642 - op       - reduction operation
1643 
1644   Output Parameter:
1645 . rootdata - result of reduction of values from all leaves of each root
1646 
1647   Level: intermediate
1648 
1649 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1650 @*/
1651 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1652 {
1653   PetscFunctionBegin;
1654   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1655   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1656   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1657   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1658   PetscFunctionReturn(PETSC_SUCCESS);
1659 }
1660 
1661 /*@C
1662   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1663   to be completed with `PetscSFFetchAndOpEnd()`
1664 
1665   Collective
1666 
1667   Input Parameters:
1668 + sf       - star forest
1669 . unit     - data type
1670 . leafdata - leaf values to use in reduction
1671 - op       - operation to use for reduction
1672 
1673   Output Parameters:
1674 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1675 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1676 
1677   Level: advanced
1678 
1679   Note:
1680   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1681   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1682   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1683   integers.
1684 
1685 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1686 @*/
1687 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1688 {
1689   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1690 
1691   PetscFunctionBegin;
1692   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1693   PetscCall(PetscSFSetUp(sf));
1694   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1695   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1696   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1697   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1698   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1699   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1700   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1701   PetscFunctionReturn(PETSC_SUCCESS);
1702 }
1703 
1704 /*@C
1705   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1706   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1707 
1708   Collective
1709 
1710   Input Parameters:
1711 + sf              - star forest
1712 . unit            - data type
1713 . rootmtype       - memory type of rootdata
1714 . leafmtype       - memory type of leafdata
1715 . leafdata        - leaf values to use in reduction
1716 . leafupdatemtype - memory type of leafupdate
1717 - op              - operation to use for reduction
1718 
1719   Output Parameters:
1720 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1721 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1722 
1723   Level: advanced
1724 
1725   Note:
1726   See `PetscSFFetchAndOpBegin()` for more details.
1727 
1728 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1729 @*/
1730 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1731 {
1732   PetscFunctionBegin;
1733   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1734   PetscCall(PetscSFSetUp(sf));
1735   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1736   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1737   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1738   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1739   PetscFunctionReturn(PETSC_SUCCESS);
1740 }
1741 
1742 /*@C
1743   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1744   to fetch values from roots and update atomically by applying operation using my leaf value
1745 
1746   Collective
1747 
1748   Input Parameters:
1749 + sf       - star forest
1750 . unit     - data type
1751 . leafdata - leaf values to use in reduction
1752 - op       - operation to use for reduction
1753 
1754   Output Parameters:
1755 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1756 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1757 
1758   Level: advanced
1759 
1760 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1761 @*/
1762 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1763 {
1764   PetscFunctionBegin;
1765   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1766   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1767   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1768   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1769   PetscFunctionReturn(PETSC_SUCCESS);
1770 }
1771 
1772 /*@C
1773   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1774 
1775   Collective
1776 
1777   Input Parameter:
1778 . sf - star forest
1779 
1780   Output Parameter:
1781 . degree - degree of each root vertex
1782 
1783   Level: advanced
1784 
1785   Note:
1786   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1787 
1788 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1789 @*/
1790 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1791 {
1792   PetscFunctionBegin;
1793   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1794   PetscSFCheckGraphSet(sf, 1);
1795   PetscAssertPointer(degree, 2);
1796   if (!sf->degreeknown) {
1797     PetscInt i, nroots = sf->nroots, maxlocal;
1798     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1799     maxlocal = sf->maxleaf - sf->minleaf + 1;
1800     PetscCall(PetscMalloc1(nroots, &sf->degree));
1801     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1802     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1803     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1804     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1805   }
1806   *degree = NULL;
1807   PetscFunctionReturn(PETSC_SUCCESS);
1808 }
1809 
1810 /*@C
1811   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1812 
1813   Collective
1814 
1815   Input Parameter:
1816 . sf - star forest
1817 
1818   Output Parameter:
1819 . degree - degree of each root vertex
1820 
1821   Level: developer
1822 
1823   Note:
1824   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1825 
1826 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1827 @*/
1828 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1829 {
1830   PetscFunctionBegin;
1831   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1832   PetscSFCheckGraphSet(sf, 1);
1833   PetscAssertPointer(degree, 2);
1834   if (!sf->degreeknown) {
1835     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1836     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1837     PetscCall(PetscFree(sf->degreetmp));
1838     sf->degreeknown = PETSC_TRUE;
1839   }
1840   *degree = sf->degree;
1841   PetscFunctionReturn(PETSC_SUCCESS);
1842 }
1843 
1844 /*@C
1845   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1846   Each multi-root is assigned index of the corresponding original root.
1847 
1848   Collective
1849 
1850   Input Parameters:
1851 + sf     - star forest
1852 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1853 
1854   Output Parameters:
1855 + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1856 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1857 
1858   Level: developer
1859 
1860   Note:
1861   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1862 
1863 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1864 @*/
1865 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1866 {
1867   PetscSF  msf;
1868   PetscInt k = 0, nroots, nmroots;
1869 
1870   PetscFunctionBegin;
1871   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1872   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1873   if (nroots) PetscAssertPointer(degree, 2);
1874   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1875   PetscAssertPointer(multiRootsOrigNumbering, 4);
1876   PetscCall(PetscSFGetMultiSF(sf, &msf));
1877   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1878   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1879   for (PetscInt i = 0; i < nroots; i++) {
1880     if (!degree[i]) continue;
1881     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1882   }
1883   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1884   if (nMultiRoots) *nMultiRoots = nmroots;
1885   PetscFunctionReturn(PETSC_SUCCESS);
1886 }
1887 
1888 /*@C
1889   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1890 
1891   Collective
1892 
1893   Input Parameters:
1894 + sf       - star forest
1895 . unit     - data type
1896 - leafdata - leaf data to gather to roots
1897 
1898   Output Parameter:
1899 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1900 
1901   Level: intermediate
1902 
1903 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1904 @*/
1905 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1906 {
1907   PetscSF multi = NULL;
1908 
1909   PetscFunctionBegin;
1910   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1911   PetscCall(PetscSFSetUp(sf));
1912   PetscCall(PetscSFGetMultiSF(sf, &multi));
1913   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1914   PetscFunctionReturn(PETSC_SUCCESS);
1915 }
1916 
1917 /*@C
1918   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1919 
1920   Collective
1921 
1922   Input Parameters:
1923 + sf       - star forest
1924 . unit     - data type
1925 - leafdata - leaf data to gather to roots
1926 
1927   Output Parameter:
1928 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1929 
1930   Level: intermediate
1931 
1932 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1933 @*/
1934 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1935 {
1936   PetscSF multi = NULL;
1937 
1938   PetscFunctionBegin;
1939   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1940   PetscCall(PetscSFGetMultiSF(sf, &multi));
1941   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1942   PetscFunctionReturn(PETSC_SUCCESS);
1943 }
1944 
1945 /*@C
1946   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1947 
1948   Collective
1949 
1950   Input Parameters:
1951 + sf            - star forest
1952 . unit          - data type
1953 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1954 
1955   Output Parameter:
1956 . leafdata - leaf data to be update with personal data from each respective root
1957 
1958   Level: intermediate
1959 
1960 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1961 @*/
1962 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1963 {
1964   PetscSF multi = NULL;
1965 
1966   PetscFunctionBegin;
1967   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1968   PetscCall(PetscSFSetUp(sf));
1969   PetscCall(PetscSFGetMultiSF(sf, &multi));
1970   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1971   PetscFunctionReturn(PETSC_SUCCESS);
1972 }
1973 
1974 /*@C
1975   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1976 
1977   Collective
1978 
1979   Input Parameters:
1980 + sf            - star forest
1981 . unit          - data type
1982 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1983 
1984   Output Parameter:
1985 . leafdata - leaf data to be update with personal data from each respective root
1986 
1987   Level: intermediate
1988 
1989 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1990 @*/
1991 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1992 {
1993   PetscSF multi = NULL;
1994 
1995   PetscFunctionBegin;
1996   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1997   PetscCall(PetscSFGetMultiSF(sf, &multi));
1998   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1999   PetscFunctionReturn(PETSC_SUCCESS);
2000 }
2001 
2002 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2003 {
2004   PetscInt        i, n, nleaves;
2005   const PetscInt *ilocal = NULL;
2006   PetscHSetI      seen;
2007 
2008   PetscFunctionBegin;
2009   if (PetscDefined(USE_DEBUG)) {
2010     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2011     PetscCall(PetscHSetICreate(&seen));
2012     for (i = 0; i < nleaves; i++) {
2013       const PetscInt leaf = ilocal ? ilocal[i] : i;
2014       PetscCall(PetscHSetIAdd(seen, leaf));
2015     }
2016     PetscCall(PetscHSetIGetSize(seen, &n));
2017     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2018     PetscCall(PetscHSetIDestroy(&seen));
2019   }
2020   PetscFunctionReturn(PETSC_SUCCESS);
2021 }
2022 
2023 /*@
2024   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2025 
2026   Input Parameters:
2027 + sfA - The first `PetscSF`
2028 - sfB - The second `PetscSF`
2029 
2030   Output Parameter:
2031 . sfBA - The composite `PetscSF`
2032 
2033   Level: developer
2034 
2035   Notes:
2036   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2037   forests, i.e. the same leaf is not connected with different roots.
2038 
2039   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2040   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2041   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2042   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2043 
2044 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2045 @*/
2046 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2047 {
2048   const PetscSFNode *remotePointsA, *remotePointsB;
2049   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2050   const PetscInt    *localPointsA, *localPointsB;
2051   PetscInt          *localPointsBA;
2052   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2053   PetscBool          denseB;
2054 
2055   PetscFunctionBegin;
2056   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2057   PetscSFCheckGraphSet(sfA, 1);
2058   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2059   PetscSFCheckGraphSet(sfB, 2);
2060   PetscCheckSameComm(sfA, 1, sfB, 2);
2061   PetscAssertPointer(sfBA, 3);
2062   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2063   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2064 
2065   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2066   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2067   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2068      numRootsB; otherwise, garbage will be broadcasted.
2069      Example (comm size = 1):
2070      sfA: 0 <- (0, 0)
2071      sfB: 100 <- (0, 0)
2072           101 <- (0, 1)
2073      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2074      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2075      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2076      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2077   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2078   for (i = 0; i < numRootsB; i++) {
2079     reorderedRemotePointsA[i].rank  = -1;
2080     reorderedRemotePointsA[i].index = -1;
2081   }
2082   for (i = 0; i < numLeavesA; i++) {
2083     PetscInt localp = localPointsA ? localPointsA[i] : i;
2084 
2085     if (localp >= numRootsB) continue;
2086     reorderedRemotePointsA[localp] = remotePointsA[i];
2087   }
2088   remotePointsA = reorderedRemotePointsA;
2089   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2090   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2091   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2092     leafdataB[i].rank  = -1;
2093     leafdataB[i].index = -1;
2094   }
2095   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2096   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2097   PetscCall(PetscFree(reorderedRemotePointsA));
2098 
2099   denseB = (PetscBool)!localPointsB;
2100   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2101     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2102     else numLeavesBA++;
2103   }
2104   if (denseB) {
2105     localPointsBA  = NULL;
2106     remotePointsBA = leafdataB;
2107   } else {
2108     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2109     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2110     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2111       const PetscInt l = localPointsB ? localPointsB[i] : i;
2112 
2113       if (leafdataB[l - minleaf].rank == -1) continue;
2114       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2115       localPointsBA[numLeavesBA]  = l;
2116       numLeavesBA++;
2117     }
2118     PetscCall(PetscFree(leafdataB));
2119   }
2120   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2121   PetscCall(PetscSFSetFromOptions(*sfBA));
2122   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2123   PetscFunctionReturn(PETSC_SUCCESS);
2124 }
2125 
2126 /*@
2127   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2128 
2129   Input Parameters:
2130 + sfA - The first `PetscSF`
2131 - sfB - The second `PetscSF`
2132 
2133   Output Parameter:
2134 . sfBA - The composite `PetscSF`.
2135 
2136   Level: developer
2137 
2138   Notes:
2139   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2140   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2141   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2142 
2143   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2144   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2145   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2146   on `sfA`, then
2147   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2148 
2149 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2150 @*/
2151 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2152 {
2153   const PetscSFNode *remotePointsA, *remotePointsB;
2154   PetscSFNode       *remotePointsBA;
2155   const PetscInt    *localPointsA, *localPointsB;
2156   PetscSFNode       *reorderedRemotePointsA = NULL;
2157   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2158   MPI_Op             op;
2159 #if defined(PETSC_USE_64BIT_INDICES)
2160   PetscBool iswin;
2161 #endif
2162 
2163   PetscFunctionBegin;
2164   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2165   PetscSFCheckGraphSet(sfA, 1);
2166   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2167   PetscSFCheckGraphSet(sfB, 2);
2168   PetscCheckSameComm(sfA, 1, sfB, 2);
2169   PetscAssertPointer(sfBA, 3);
2170   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2171   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2172 
2173   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2174   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2175 
2176   /* TODO: Check roots of sfB have degree of 1 */
2177   /* Once we implement it, we can replace the MPI_MAXLOC
2178      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2179      We use MPI_MAXLOC only to have a deterministic output from this routine if
2180      the root condition is not meet.
2181    */
2182   op = MPI_MAXLOC;
2183 #if defined(PETSC_USE_64BIT_INDICES)
2184   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2185   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2186   if (iswin) op = MPI_REPLACE;
2187 #endif
2188 
2189   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2190   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2191   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2192     reorderedRemotePointsA[i].rank  = -1;
2193     reorderedRemotePointsA[i].index = -1;
2194   }
2195   if (localPointsA) {
2196     for (i = 0; i < numLeavesA; i++) {
2197       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2198       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2199     }
2200   } else {
2201     for (i = 0; i < numLeavesA; i++) {
2202       if (i > maxleaf || i < minleaf) continue;
2203       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2204     }
2205   }
2206 
2207   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2208   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2209   for (i = 0; i < numRootsB; i++) {
2210     remotePointsBA[i].rank  = -1;
2211     remotePointsBA[i].index = -1;
2212   }
2213 
2214   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2215   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2216   PetscCall(PetscFree(reorderedRemotePointsA));
2217   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2218     if (remotePointsBA[i].rank == -1) continue;
2219     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2220     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2221     localPointsBA[numLeavesBA]        = i;
2222     numLeavesBA++;
2223   }
2224   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2225   PetscCall(PetscSFSetFromOptions(*sfBA));
2226   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 /*
2231   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2232 
2233   Input Parameter:
2234 . sf - The global `PetscSF`
2235 
2236   Output Parameter:
2237 . out - The local `PetscSF`
2238 
2239 .seealso: `PetscSF`, `PetscSFCreate()`
2240  */
2241 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2242 {
2243   MPI_Comm           comm;
2244   PetscMPIInt        myrank;
2245   const PetscInt    *ilocal;
2246   const PetscSFNode *iremote;
2247   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2248   PetscSFNode       *liremote;
2249   PetscSF            lsf;
2250 
2251   PetscFunctionBegin;
2252   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2253   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2254   else {
2255     PetscMPIInt irank;
2256 
2257     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2258     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2259     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2260 
2261     /* Find out local edges and build a local SF */
2262     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2263     for (i = lnleaves = 0; i < nleaves; i++) {
2264       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2265       if (irank == myrank) lnleaves++;
2266     }
2267     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2268     PetscCall(PetscMalloc1(lnleaves, &liremote));
2269 
2270     for (i = j = 0; i < nleaves; i++) {
2271       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2272       if (irank == myrank) {
2273         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2274         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2275         liremote[j].index = iremote[i].index;
2276         j++;
2277       }
2278     }
2279     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2280     PetscCall(PetscSFSetFromOptions(lsf));
2281     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2282     PetscCall(PetscSFSetUp(lsf));
2283     *out = lsf;
2284   }
2285   PetscFunctionReturn(PETSC_SUCCESS);
2286 }
2287 
2288 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2289 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2290 {
2291   PetscMemType rootmtype, leafmtype;
2292 
2293   PetscFunctionBegin;
2294   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2295   PetscCall(PetscSFSetUp(sf));
2296   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2297   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2298   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2299   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2300   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2301   PetscFunctionReturn(PETSC_SUCCESS);
2302 }
2303 
2304 /*@
2305   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2306 
2307   Input Parameters:
2308 + comm        - the communicator
2309 . nsfs        - the number of input `PetscSF`
2310 . sfs         - the array of input `PetscSF`
2311 . rootMode    - the root mode specifying how roots are handled
2312 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2313 
2314   Output Parameter:
2315 . newsf - The resulting `PetscSF`
2316 
2317   Level: advanced
2318 
2319   Notes:
2320   The communicator of all `PetscSF`s in `sfs` must be comm.
2321 
2322   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2323 
2324   The offsets in `leafOffsets` are added to the original leaf indices.
2325 
2326   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2327   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2328 
2329   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2330   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2331 
2332   All root modes retain the essential connectivity condition.
2333   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2334   Parameter `rootMode` controls how the input root spaces are combined.
2335   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2336   and is also the same in the output `PetscSF`.
2337   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2338   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2339   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2340   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2341   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2342   the original root ranks are ignored.
2343   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2344   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2345   to keep the load balancing.
2346   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2347 
2348   Example:
2349   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2350 .vb
2351   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2352   for m in {local,global,shared}; do
2353     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2354   done
2355 .ve
2356   we generate two identical `PetscSF`s sf_0 and sf_1,
2357 .vb
2358   PetscSF Object: sf_0 2 MPI processes
2359     type: basic
2360     rank #leaves #roots
2361     [ 0]       4      2
2362     [ 1]       4      2
2363     leaves      roots       roots in global numbering
2364     ( 0,  0) <- ( 0,  0)  =   0
2365     ( 0,  1) <- ( 0,  1)  =   1
2366     ( 0,  2) <- ( 1,  0)  =   2
2367     ( 0,  3) <- ( 1,  1)  =   3
2368     ( 1,  0) <- ( 0,  0)  =   0
2369     ( 1,  1) <- ( 0,  1)  =   1
2370     ( 1,  2) <- ( 1,  0)  =   2
2371     ( 1,  3) <- ( 1,  1)  =   3
2372 .ve
2373   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2374 .vb
2375   rootMode = local:
2376   PetscSF Object: result_sf 2 MPI processes
2377     type: basic
2378     rank #leaves #roots
2379     [ 0]       8      4
2380     [ 1]       8      4
2381     leaves      roots       roots in global numbering
2382     ( 0,  0) <- ( 0,  0)  =   0
2383     ( 0,  1) <- ( 0,  1)  =   1
2384     ( 0,  2) <- ( 1,  0)  =   4
2385     ( 0,  3) <- ( 1,  1)  =   5
2386     ( 0,  4) <- ( 0,  2)  =   2
2387     ( 0,  5) <- ( 0,  3)  =   3
2388     ( 0,  6) <- ( 1,  2)  =   6
2389     ( 0,  7) <- ( 1,  3)  =   7
2390     ( 1,  0) <- ( 0,  0)  =   0
2391     ( 1,  1) <- ( 0,  1)  =   1
2392     ( 1,  2) <- ( 1,  0)  =   4
2393     ( 1,  3) <- ( 1,  1)  =   5
2394     ( 1,  4) <- ( 0,  2)  =   2
2395     ( 1,  5) <- ( 0,  3)  =   3
2396     ( 1,  6) <- ( 1,  2)  =   6
2397     ( 1,  7) <- ( 1,  3)  =   7
2398 
2399   rootMode = global:
2400   PetscSF Object: result_sf 2 MPI processes
2401     type: basic
2402     rank #leaves #roots
2403     [ 0]       8      4
2404     [ 1]       8      4
2405     leaves      roots       roots in global numbering
2406     ( 0,  0) <- ( 0,  0)  =   0
2407     ( 0,  1) <- ( 0,  1)  =   1
2408     ( 0,  2) <- ( 0,  2)  =   2
2409     ( 0,  3) <- ( 0,  3)  =   3
2410     ( 0,  4) <- ( 1,  0)  =   4
2411     ( 0,  5) <- ( 1,  1)  =   5
2412     ( 0,  6) <- ( 1,  2)  =   6
2413     ( 0,  7) <- ( 1,  3)  =   7
2414     ( 1,  0) <- ( 0,  0)  =   0
2415     ( 1,  1) <- ( 0,  1)  =   1
2416     ( 1,  2) <- ( 0,  2)  =   2
2417     ( 1,  3) <- ( 0,  3)  =   3
2418     ( 1,  4) <- ( 1,  0)  =   4
2419     ( 1,  5) <- ( 1,  1)  =   5
2420     ( 1,  6) <- ( 1,  2)  =   6
2421     ( 1,  7) <- ( 1,  3)  =   7
2422 
2423   rootMode = shared:
2424   PetscSF Object: result_sf 2 MPI processes
2425     type: basic
2426     rank #leaves #roots
2427     [ 0]       8      2
2428     [ 1]       8      2
2429     leaves      roots       roots in global numbering
2430     ( 0,  0) <- ( 0,  0)  =   0
2431     ( 0,  1) <- ( 0,  1)  =   1
2432     ( 0,  2) <- ( 1,  0)  =   2
2433     ( 0,  3) <- ( 1,  1)  =   3
2434     ( 0,  4) <- ( 0,  0)  =   0
2435     ( 0,  5) <- ( 0,  1)  =   1
2436     ( 0,  6) <- ( 1,  0)  =   2
2437     ( 0,  7) <- ( 1,  1)  =   3
2438     ( 1,  0) <- ( 0,  0)  =   0
2439     ( 1,  1) <- ( 0,  1)  =   1
2440     ( 1,  2) <- ( 1,  0)  =   2
2441     ( 1,  3) <- ( 1,  1)  =   3
2442     ( 1,  4) <- ( 0,  0)  =   0
2443     ( 1,  5) <- ( 0,  1)  =   1
2444     ( 1,  6) <- ( 1,  0)  =   2
2445     ( 1,  7) <- ( 1,  1)  =   3
2446 .ve
2447 
2448 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2449 @*/
2450 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2451 {
2452   PetscInt     i, s, nLeaves, nRoots;
2453   PetscInt    *leafArrayOffsets;
2454   PetscInt    *ilocal_new;
2455   PetscSFNode *iremote_new;
2456   PetscBool    all_ilocal_null = PETSC_FALSE;
2457   PetscLayout  glayout         = NULL;
2458   PetscInt    *gremote         = NULL;
2459   PetscMPIInt  rank, size;
2460 
2461   PetscFunctionBegin;
2462   if (PetscDefined(USE_DEBUG)) {
2463     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2464 
2465     PetscCall(PetscSFCreate(comm, &dummy));
2466     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2467     PetscAssertPointer(sfs, 3);
2468     for (i = 0; i < nsfs; i++) {
2469       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2470       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2471     }
2472     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
2473     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2474     PetscAssertPointer(newsf, 6);
2475     PetscCall(PetscSFDestroy(&dummy));
2476   }
2477   if (!nsfs) {
2478     PetscCall(PetscSFCreate(comm, newsf));
2479     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2480     PetscFunctionReturn(PETSC_SUCCESS);
2481   }
2482   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2483   PetscCallMPI(MPI_Comm_size(comm, &size));
2484 
2485   /* Calculate leaf array offsets */
2486   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2487   leafArrayOffsets[0] = 0;
2488   for (s = 0; s < nsfs; s++) {
2489     PetscInt nl;
2490 
2491     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2492     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2493   }
2494   nLeaves = leafArrayOffsets[nsfs];
2495 
2496   /* Calculate number of roots */
2497   switch (rootMode) {
2498   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2499     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2500     if (PetscDefined(USE_DEBUG)) {
2501       for (s = 1; s < nsfs; s++) {
2502         PetscInt nr;
2503 
2504         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2505         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2506       }
2507     }
2508   } break;
2509   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2510     /* Calculate also global layout in this case */
2511     PetscInt    *nls;
2512     PetscLayout *lts;
2513     PetscInt   **inds;
2514     PetscInt     j;
2515     PetscInt     rootOffset = 0;
2516 
2517     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2518     PetscCall(PetscLayoutCreate(comm, &glayout));
2519     glayout->bs = 1;
2520     glayout->n  = 0;
2521     glayout->N  = 0;
2522     for (s = 0; s < nsfs; s++) {
2523       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2524       glayout->n += lts[s]->n;
2525       glayout->N += lts[s]->N;
2526     }
2527     PetscCall(PetscLayoutSetUp(glayout));
2528     PetscCall(PetscMalloc1(nLeaves, &gremote));
2529     for (s = 0, j = 0; s < nsfs; s++) {
2530       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2531       rootOffset += lts[s]->N;
2532       PetscCall(PetscLayoutDestroy(&lts[s]));
2533       PetscCall(PetscFree(inds[s]));
2534     }
2535     PetscCall(PetscFree3(lts, nls, inds));
2536     nRoots = glayout->N;
2537   } break;
2538   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2539     /* nRoots calculated later in this case */
2540     break;
2541   default:
2542     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2543   }
2544 
2545   if (!leafOffsets) {
2546     all_ilocal_null = PETSC_TRUE;
2547     for (s = 0; s < nsfs; s++) {
2548       const PetscInt *ilocal;
2549 
2550       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2551       if (ilocal) {
2552         all_ilocal_null = PETSC_FALSE;
2553         break;
2554       }
2555     }
2556     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2557   }
2558 
2559   /* Renumber and concatenate local leaves */
2560   ilocal_new = NULL;
2561   if (!all_ilocal_null) {
2562     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2563     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2564     for (s = 0; s < nsfs; s++) {
2565       const PetscInt *ilocal;
2566       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2567       PetscInt        i, nleaves_l;
2568 
2569       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2570       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2571     }
2572   }
2573 
2574   /* Renumber and concatenate remote roots */
2575   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2576     PetscInt rootOffset = 0;
2577 
2578     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2579     for (i = 0; i < nLeaves; i++) {
2580       iremote_new[i].rank  = -1;
2581       iremote_new[i].index = -1;
2582     }
2583     for (s = 0; s < nsfs; s++) {
2584       PetscInt           i, nl, nr;
2585       PetscSF            tmp_sf;
2586       const PetscSFNode *iremote;
2587       PetscSFNode       *tmp_rootdata;
2588       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2589 
2590       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2591       PetscCall(PetscSFCreate(comm, &tmp_sf));
2592       /* create helper SF with contiguous leaves */
2593       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2594       PetscCall(PetscSFSetUp(tmp_sf));
2595       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2596       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2597         for (i = 0; i < nr; i++) {
2598           tmp_rootdata[i].index = i + rootOffset;
2599           tmp_rootdata[i].rank  = rank;
2600         }
2601         rootOffset += nr;
2602       } else {
2603         for (i = 0; i < nr; i++) {
2604           tmp_rootdata[i].index = i;
2605           tmp_rootdata[i].rank  = rank;
2606         }
2607       }
2608       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2609       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2610       PetscCall(PetscSFDestroy(&tmp_sf));
2611       PetscCall(PetscFree(tmp_rootdata));
2612     }
2613     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2614 
2615     /* Build the new SF */
2616     PetscCall(PetscSFCreate(comm, newsf));
2617     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2618   } else {
2619     /* Build the new SF */
2620     PetscCall(PetscSFCreate(comm, newsf));
2621     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2622   }
2623   PetscCall(PetscSFSetUp(*newsf));
2624   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2625   PetscCall(PetscLayoutDestroy(&glayout));
2626   PetscCall(PetscFree(gremote));
2627   PetscCall(PetscFree(leafArrayOffsets));
2628   PetscFunctionReturn(PETSC_SUCCESS);
2629 }
2630 
2631 /*@
2632   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
2633 
2634   Collective
2635 
2636   Input Parameters:
2637 + sf       - star forest
2638 . unit     - the data type contained within the root and leaf data
2639 . rootdata - root data that will be used for multiple PetscSF communications
2640 - leafdata - leaf data that will be used for multiple PetscSF communications
2641 
2642   Level: advanced
2643 
2644   Notes:
2645   Implementations of `PetscSF` can make optimizations
2646   for repeated communication using the same memory regions, but these optimizations
2647   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2648   The intended pattern is
2649 
2650 .vb
2651   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2652 
2653   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2654   // repeated use of rootdata and leafdata will now be optimized
2655 
2656   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2657   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2658   // ...
2659   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2660   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2661   // ... (other communications)
2662 
2663   // rootdata and leafdata must be deregistered before freeing
2664   // skipping this can lead to undefined behavior including
2665   // deadlocks
2666   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2667 
2668   // it is now safe to free rootdata and leafdata
2669   PetscFree2(rootdata, leafdata);
2670 .ve
2671 
2672   If you do not register `rootdata` and `leafdata` it will not cause an error,
2673   but optimizations that reduce the setup time for each communication cannot be
2674   made.  Currently, the only implementation of `PetscSF` that benefits from
2675   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2676   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2677 
2678 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2679 @*/
2680 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2681 {
2682   PetscFunctionBegin;
2683   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2684   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2685   PetscFunctionReturn(PETSC_SUCCESS);
2686 }
2687 
2688 /*@
2689   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
2690 
2691   Collective
2692 
2693   Input Parameters:
2694 + sf       - star forest
2695 . unit     - the data type contained within the root and leaf data
2696 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2697 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2698 
2699   Level: advanced
2700 
2701   Note:
2702   See `PetscSFRegisterPersistent()` for when/how to use this function.
2703 
2704 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2705 @*/
2706 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2707 {
2708   PetscFunctionBegin;
2709   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2710   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2715 {
2716   MPI_Aint lb, lb_true, bytes, bytes_true;
2717 
2718   PetscFunctionBegin;
2719   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2720   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2721   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2722   *size = bytes;
2723   PetscFunctionReturn(PETSC_SUCCESS);
2724 }
2725