xref: /petsc/src/vec/is/sf/interface/sf.c (revision ccfb0f9f40a0131988d7995ed9679700dae2a75a)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <petscdevice_cuda.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 extern void PetscSFCheckGraphSet(PetscSF, int);
16 #else
17   #if defined(PETSC_USE_DEBUG)
18     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
19   #else
20     #define PetscSFCheckGraphSet(sf, arg) \
21       do { \
22       } while (0)
23   #endif
24 #endif
25 
26 const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
28 
29 /*@
30   PetscSFCreate - create a star forest communication context
31 
32   Collective
33 
34   Input Parameter:
35 . comm - communicator on which the star forest will operate
36 
37   Output Parameter:
38 . sf - new star forest context
39 
40   Options Database Key:
41 + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
42 . -sf_type window                - Use MPI-3 one-sided window for communication
43 . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
44 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
45 
46   Level: intermediate
47 
48   Note:
49   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
50   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
51   `SF`s are optimized and they have better performance than the general `SF`s.
52 
53 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
54 @*/
55 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
56 {
57   PetscSF b;
58 
59   PetscFunctionBegin;
60   PetscAssertPointer(sf, 2);
61   PetscCall(PetscSFInitializePackage());
62 
63   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_INT_MAX;
67   b->maxleaf   = PETSC_INT_MIN;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream = PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78   b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80   b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82   b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
89   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95   *sf             = b;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 /*@
100   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102   Collective
103 
104   Input Parameter:
105 . sf - star forest
106 
107   Level: advanced
108 
109 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
115   PetscTryTypeMethod(sf, Reset);
116   PetscCall(PetscSFDestroy(&sf->rankssf));
117 
118   sf->nroots   = -1;
119   sf->nleaves  = -1;
120   sf->minleaf  = PETSC_INT_MAX;
121   sf->maxleaf  = PETSC_INT_MIN;
122   sf->mine     = NULL;
123   sf->remote   = NULL;
124   sf->graphset = PETSC_FALSE;
125   PetscCall(PetscFree(sf->mine_alloc));
126   PetscCall(PetscFree(sf->remote_alloc));
127   sf->nranks = -1;
128   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
129   sf->degreeknown = PETSC_FALSE;
130   PetscCall(PetscFree(sf->degree));
131   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
132   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
133 
134   if (sf->multi) sf->multi->multi = NULL;
135   PetscCall(PetscSFDestroy(&sf->multi));
136 
137   PetscCall(PetscLayoutDestroy(&sf->map));
138 
139 #if defined(PETSC_HAVE_DEVICE)
140   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
141 #endif
142 
143   sf->setupcalled = PETSC_FALSE;
144   PetscFunctionReturn(PETSC_SUCCESS);
145 }
146 
147 /*@
148   PetscSFSetType - Set the `PetscSF` communication implementation
149 
150   Collective
151 
152   Input Parameters:
153 + sf   - the `PetscSF` context
154 - type - a known method
155 .vb
156     PETSCSFWINDOW - MPI-2/3 one-sided
157     PETSCSFBASIC - basic implementation using MPI-1 two-sided
158 .ve
159 
160   Options Database Key:
161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
162 
163   Level: intermediate
164 
165   Notes:
166   See `PetscSFType` for possible values
167 
168 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
169 @*/
170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171 {
172   PetscBool match;
173   PetscErrorCode (*r)(PetscSF);
174 
175   PetscFunctionBegin;
176   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
177   PetscAssertPointer(type, 2);
178 
179   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
180   if (match) PetscFunctionReturn(PETSC_SUCCESS);
181 
182   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
183   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
184   /* Destroy the previous PetscSF implementation context */
185   PetscTryTypeMethod(sf, Destroy);
186   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
187   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
188   PetscCall((*r)(sf));
189   PetscFunctionReturn(PETSC_SUCCESS);
190 }
191 
192 /*@
193   PetscSFGetType - Get the `PetscSF` communication implementation
194 
195   Not Collective
196 
197   Input Parameter:
198 . sf - the `PetscSF` context
199 
200   Output Parameter:
201 . type - the `PetscSF` type name
202 
203   Level: intermediate
204 
205 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
206 @*/
207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208 {
209   PetscFunctionBegin;
210   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
211   PetscAssertPointer(type, 2);
212   *type = ((PetscObject)sf)->type_name;
213   PetscFunctionReturn(PETSC_SUCCESS);
214 }
215 
216 /*@
217   PetscSFDestroy - destroy a star forest
218 
219   Collective
220 
221   Input Parameter:
222 . sf - address of star forest
223 
224   Level: intermediate
225 
226 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
227 @*/
228 PetscErrorCode PetscSFDestroy(PetscSF *sf)
229 {
230   PetscFunctionBegin;
231   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
233   if (--((PetscObject)*sf)->refct > 0) {
234     *sf = NULL;
235     PetscFunctionReturn(PETSC_SUCCESS);
236   }
237   PetscCall(PetscSFReset(*sf));
238   PetscTryTypeMethod(*sf, Destroy);
239   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
240   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242   if ((*sf)->use_stream_aware_mpi) {
243     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245   }
246 #endif
247   PetscCall(PetscHeaderDestroy(sf));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252 {
253   PetscInt           i, nleaves;
254   PetscMPIInt        size;
255   const PetscInt    *ilocal;
256   const PetscSFNode *iremote;
257 
258   PetscFunctionBegin;
259   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
260   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
261   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262   for (i = 0; i < nleaves; i++) {
263     const PetscInt rank   = iremote[i].rank;
264     const PetscInt remote = iremote[i].index;
265     const PetscInt leaf   = ilocal ? ilocal[i] : i;
266     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
267     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
268     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269   }
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 /*@
274   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
275 
276   Collective
277 
278   Input Parameter:
279 . sf - star forest communication object
280 
281   Level: beginner
282 
283 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
284 @*/
285 PetscErrorCode PetscSFSetUp(PetscSF sf)
286 {
287   PetscFunctionBegin;
288   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
289   PetscSFCheckGraphSet(sf, 1);
290   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
291   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
292   PetscCall(PetscSFCheckGraphValid_Private(sf));
293   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294   PetscTryTypeMethod(sf, SetUp);
295 #if defined(PETSC_HAVE_CUDA)
296   if (sf->backend == PETSCSF_BACKEND_CUDA) {
297     sf->ops->Malloc = PetscSFMalloc_CUDA;
298     sf->ops->Free   = PetscSFFree_CUDA;
299   }
300 #endif
301 #if defined(PETSC_HAVE_HIP)
302   if (sf->backend == PETSCSF_BACKEND_HIP) {
303     sf->ops->Malloc = PetscSFMalloc_HIP;
304     sf->ops->Free   = PetscSFFree_HIP;
305   }
306 #endif
307 
308 #if defined(PETSC_HAVE_KOKKOS)
309   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
310     sf->ops->Malloc = PetscSFMalloc_Kokkos;
311     sf->ops->Free   = PetscSFFree_Kokkos;
312   }
313 #endif
314   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
315   sf->setupcalled = PETSC_TRUE;
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 /*@
320   PetscSFSetFromOptions - set `PetscSF` options using the options database
321 
322   Logically Collective
323 
324   Input Parameter:
325 . sf - star forest
326 
327   Options Database Keys:
328 + -sf_type                      - implementation type, see `PetscSFSetType()`
329 . -sf_rank_order                - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
330 . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
331                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
332                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
333 . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
334                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
335 
336 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
337                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
338                                   the only available is kokkos.
339 
340   Level: intermediate
341 
342 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
343 @*/
344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345 {
346   PetscSFType deft;
347   char        type[256];
348   PetscBool   flg;
349 
350   PetscFunctionBegin;
351   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
352   PetscObjectOptionsBegin((PetscObject)sf);
353   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
354   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
355   PetscCall(PetscSFSetType(sf, flg ? type : deft));
356   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
358 #if defined(PETSC_HAVE_DEVICE)
359   {
360     char      backendstr[32] = {0};
361     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
362     /* Change the defaults set in PetscSFCreate() with command line options */
363     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
364     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
365     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
366     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
367     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
368     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
369   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
370     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
371     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
372     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
373     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
374   #elif defined(PETSC_HAVE_KOKKOS)
375     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376   #endif
377 
378   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379     if (sf->use_stream_aware_mpi) {
380       MPI_Info info;
381 
382       PetscCallMPI(MPI_Info_create(&info));
383       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386       PetscCallMPI(MPI_Info_free(&info));
387       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388     }
389   #endif
390   }
391 #endif
392   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393   PetscOptionsEnd();
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 /*@
398   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
399 
400   Logically Collective
401 
402   Input Parameters:
403 + sf  - star forest
404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
405 
406   Level: advanced
407 
408 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
409 @*/
410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411 {
412   PetscFunctionBegin;
413   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
414   PetscValidLogicalCollectiveBool(sf, flg, 2);
415   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
416   sf->rankorder = flg;
417   PetscFunctionReturn(PETSC_SUCCESS);
418 }
419 
420 /*@
421   PetscSFSetGraph - Set a parallel star forest
422 
423   Collective
424 
425   Input Parameters:
426 + sf         - star forest
427 . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
428 . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
429 . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
430                during setup in debug mode)
431 . localmode  - copy mode for `ilocal`
432 . iremote    - remote locations of root vertices for each leaf on the current process, length is 2 `nleaves'
433                (locations must be >= 0, enforced during setup in debug mode)
434 - remotemode - copy mode for `iremote`
435 
436   Level: intermediate
437 
438   Notes:
439   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
440 
441   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
442   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
443   PETSc might modify the respective array;
444   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
445   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
446 
447   Fortran Notes:
448   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449 
450   Developer Notes:
451   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
452   This also allows to compare leaf sets of two `PetscSF`s easily.
453 
454 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
455 @*/
456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457 {
458   PetscBool unique, contiguous;
459 
460   PetscFunctionBegin;
461   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
462   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
463   if (nleaves > 0) PetscAssertPointer(iremote, 6);
464   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
465   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
466   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
467    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
468   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
469   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
470 
471   if (sf->nroots >= 0) { /* Reset only if graph already set */
472     PetscCall(PetscSFReset(sf));
473   }
474 
475   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
476   if (PetscDefined(USE_DEBUG)) {
477     PetscMPIInt size;
478 
479     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
481   }
482 
483   sf->nroots  = nroots;
484   sf->nleaves = nleaves;
485 
486   if (localmode == PETSC_COPY_VALUES && ilocal) {
487     PetscInt *tlocal = NULL;
488 
489     PetscCall(PetscMalloc1(nleaves, &tlocal));
490     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491     ilocal = tlocal;
492   }
493   if (remotemode == PETSC_COPY_VALUES) {
494     PetscSFNode *tremote = NULL;
495 
496     PetscCall(PetscMalloc1(nleaves, &tremote));
497     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498     iremote = tremote;
499   }
500 
501   if (nleaves && ilocal) {
502     PetscSFNode work;
503 
504     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
505     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506     unique = PetscNot(unique);
507     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508     sf->minleaf = ilocal[0];
509     sf->maxleaf = ilocal[nleaves - 1];
510     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
511   } else {
512     sf->minleaf = 0;
513     sf->maxleaf = nleaves - 1;
514     unique      = PETSC_TRUE;
515     contiguous  = PETSC_TRUE;
516   }
517 
518   if (contiguous) {
519     if (localmode == PETSC_USE_POINTER) {
520       ilocal = NULL;
521     } else {
522       PetscCall(PetscFree(ilocal));
523     }
524   }
525   sf->mine = ilocal;
526   if (localmode == PETSC_USE_POINTER) {
527     sf->mine_alloc = NULL;
528   } else {
529     sf->mine_alloc = ilocal;
530   }
531   if (PetscDefined(USE_DEBUG)) {
532     PetscMPIInt size;
533 
534     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
536   }
537   sf->remote = iremote;
538   if (remotemode == PETSC_USE_POINTER) {
539     sf->remote_alloc = NULL;
540   } else {
541     sf->remote_alloc = iremote;
542   }
543   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
544   sf->graphset = PETSC_TRUE;
545   PetscFunctionReturn(PETSC_SUCCESS);
546 }
547 
548 /*@
549   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550 
551   Collective
552 
553   Input Parameters:
554 + sf      - The `PetscSF`
555 . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557 
558   Level: intermediate
559 
560   Notes:
561   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
562   `n` and `N` are the local and global sizes of `x` respectively.
563 
564   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
565   sequential vectors `y` on all MPI processes.
566 
567   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
568   sequential vector `y` on rank 0.
569 
570   In above cases, entries of `x` are roots and entries of `y` are leaves.
571 
572   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
573   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
574   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
575   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
576   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
577 
578   In this case, roots and leaves are symmetric.
579 
580 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
581  @*/
582 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
583 {
584   MPI_Comm    comm;
585   PetscInt    n, N, res[2];
586   PetscMPIInt rank, size;
587   PetscSFType type;
588 
589   PetscFunctionBegin;
590   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
591   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
592   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
593   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
594   PetscCallMPI(MPI_Comm_rank(comm, &rank));
595   PetscCallMPI(MPI_Comm_size(comm, &size));
596 
597   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
598     PetscInt sizei = size;
599 
600     type = PETSCSFALLTOALL;
601     PetscCall(PetscLayoutCreate(comm, &sf->map));
602     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
603     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
604     PetscCall(PetscLayoutSetUp(sf->map));
605   } else {
606     PetscCall(PetscLayoutGetLocalSize(map, &n));
607     PetscCall(PetscLayoutGetSize(map, &N));
608     res[0] = n;
609     res[1] = -n;
610     /* Check if n are same over all ranks so that we can optimize it */
611     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
612     if (res[0] == -res[1]) { /* same n */
613       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
614     } else {
615       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
616     }
617     PetscCall(PetscLayoutReference(map, &sf->map));
618   }
619   PetscCall(PetscSFSetType(sf, type));
620 
621   sf->pattern = pattern;
622   sf->mine    = NULL; /* Contiguous */
623 
624   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
625      Also set other easy stuff.
626    */
627   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
628     sf->nleaves = N;
629     sf->nroots  = n;
630     sf->nranks  = size;
631     sf->minleaf = 0;
632     sf->maxleaf = N - 1;
633   } else if (pattern == PETSCSF_PATTERN_GATHER) {
634     sf->nleaves = rank ? 0 : N;
635     sf->nroots  = n;
636     sf->nranks  = rank ? 0 : size;
637     sf->minleaf = 0;
638     sf->maxleaf = rank ? -1 : N - 1;
639   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
640     sf->nleaves = size;
641     sf->nroots  = size;
642     sf->nranks  = size;
643     sf->minleaf = 0;
644     sf->maxleaf = size - 1;
645   }
646   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
647   sf->graphset = PETSC_TRUE;
648   PetscFunctionReturn(PETSC_SUCCESS);
649 }
650 
651 /*@
652   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
653 
654   Collective
655 
656   Input Parameter:
657 . sf - star forest to invert
658 
659   Output Parameter:
660 . isf - inverse of `sf`
661 
662   Level: advanced
663 
664   Notes:
665   All roots must have degree 1.
666 
667   The local space may be a permutation, but cannot be sparse.
668 
669 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
670 @*/
671 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
672 {
673   PetscMPIInt     rank;
674   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
675   const PetscInt *ilocal;
676   PetscSFNode    *roots, *leaves;
677 
678   PetscFunctionBegin;
679   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
680   PetscSFCheckGraphSet(sf, 1);
681   PetscAssertPointer(isf, 2);
682 
683   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
684   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
685 
686   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
687   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
688   for (i = 0; i < maxlocal; i++) {
689     leaves[i].rank  = rank;
690     leaves[i].index = i;
691   }
692   for (i = 0; i < nroots; i++) {
693     roots[i].rank  = -1;
694     roots[i].index = -1;
695   }
696   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
697   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
698 
699   /* Check whether our leaves are sparse */
700   for (i = 0, count = 0; i < nroots; i++)
701     if (roots[i].rank >= 0) count++;
702   if (count == nroots) newilocal = NULL;
703   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
704     for (i = 0, count = 0; i < nroots; i++) {
705       if (roots[i].rank >= 0) {
706         newilocal[count]   = i;
707         roots[count].rank  = roots[i].rank;
708         roots[count].index = roots[i].index;
709         count++;
710       }
711     }
712   }
713 
714   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
715   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
716   PetscCall(PetscFree2(roots, leaves));
717   PetscFunctionReturn(PETSC_SUCCESS);
718 }
719 
720 /*@
721   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
722 
723   Collective
724 
725   Input Parameters:
726 + sf  - communication object to duplicate
727 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
728 
729   Output Parameter:
730 . newsf - new communication object
731 
732   Level: beginner
733 
734 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
735 @*/
736 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
737 {
738   PetscSFType  type;
739   MPI_Datatype dtype = MPIU_SCALAR;
740 
741   PetscFunctionBegin;
742   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
743   PetscValidLogicalCollectiveEnum(sf, opt, 2);
744   PetscAssertPointer(newsf, 3);
745   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
746   PetscCall(PetscSFGetType(sf, &type));
747   if (type) PetscCall(PetscSFSetType(*newsf, type));
748   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
749   if (opt == PETSCSF_DUPLICATE_GRAPH) {
750     PetscSFCheckGraphSet(sf, 1);
751     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
752       PetscInt           nroots, nleaves;
753       const PetscInt    *ilocal;
754       const PetscSFNode *iremote;
755       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
756       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
757     } else {
758       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
759     }
760   }
761   /* Since oldtype is committed, so is newtype, according to MPI */
762   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
763   (*newsf)->vscat.bs     = sf->vscat.bs;
764   (*newsf)->vscat.unit   = dtype;
765   (*newsf)->vscat.to_n   = sf->vscat.to_n;
766   (*newsf)->vscat.from_n = sf->vscat.from_n;
767   /* Do not copy lsf. Build it on demand since it is rarely used */
768 
769 #if defined(PETSC_HAVE_DEVICE)
770   (*newsf)->backend              = sf->backend;
771   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
772   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
773   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
774 #endif
775   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
776   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
777   PetscFunctionReturn(PETSC_SUCCESS);
778 }
779 
780 /*@C
781   PetscSFGetGraph - Get the graph specifying a parallel star forest
782 
783   Not Collective
784 
785   Input Parameter:
786 . sf - star forest
787 
788   Output Parameters:
789 + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
790 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
791 . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
792 - iremote - remote locations of root vertices for each leaf on the current process
793 
794   Level: intermediate
795 
796   Notes:
797   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
798 
799   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
800 
801   Fortran Note:
802   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
803 
804 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
805 @*/
806 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
807 {
808   PetscFunctionBegin;
809   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
810   if (sf->ops->GetGraph) {
811     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
812   } else {
813     if (nroots) *nroots = sf->nroots;
814     if (nleaves) *nleaves = sf->nleaves;
815     if (ilocal) *ilocal = sf->mine;
816     if (iremote) *iremote = sf->remote;
817   }
818   PetscFunctionReturn(PETSC_SUCCESS);
819 }
820 
821 /*@
822   PetscSFGetLeafRange - Get the active leaf ranges
823 
824   Not Collective
825 
826   Input Parameter:
827 . sf - star forest
828 
829   Output Parameters:
830 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
831 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
832 
833   Level: developer
834 
835 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
836 @*/
837 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
838 {
839   PetscFunctionBegin;
840   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
841   PetscSFCheckGraphSet(sf, 1);
842   if (minleaf) *minleaf = sf->minleaf;
843   if (maxleaf) *maxleaf = sf->maxleaf;
844   PetscFunctionReturn(PETSC_SUCCESS);
845 }
846 
847 /*@
848   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
849 
850   Collective
851 
852   Input Parameters:
853 + A    - the star forest
854 . obj  - Optional object that provides the prefix for the option names
855 - name - command line option
856 
857   Level: intermediate
858 
859   Note:
860   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
861 
862 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
863 @*/
864 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
865 {
866   PetscFunctionBegin;
867   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
868   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
869   PetscFunctionReturn(PETSC_SUCCESS);
870 }
871 
872 /*@
873   PetscSFView - view a star forest
874 
875   Collective
876 
877   Input Parameters:
878 + sf     - star forest
879 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
880 
881   Level: beginner
882 
883 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
884 @*/
885 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
886 {
887   PetscBool         isascii;
888   PetscViewerFormat format;
889 
890   PetscFunctionBegin;
891   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
892   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
893   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
894   PetscCheckSameComm(sf, 1, viewer, 2);
895   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
896   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
897   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
898     PetscMPIInt rank;
899     PetscInt    j;
900 
901     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
902     PetscCall(PetscViewerASCIIPushTab(viewer));
903     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
904       if (!sf->graphset) {
905         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
906         PetscCall(PetscViewerASCIIPopTab(viewer));
907         PetscFunctionReturn(PETSC_SUCCESS);
908       }
909       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
910       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
911       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
912       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
913       PetscCall(PetscViewerFlush(viewer));
914       PetscCall(PetscViewerGetFormat(viewer, &format));
915       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
916         PetscMPIInt *tmpranks, *perm;
917 
918         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
919         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
920         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
921         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
922         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
923         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
924           PetscMPIInt i = perm[ii];
925 
926           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
927           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
928         }
929         PetscCall(PetscFree2(tmpranks, perm));
930       }
931       PetscCall(PetscViewerFlush(viewer));
932       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
933     }
934     PetscCall(PetscViewerASCIIPopTab(viewer));
935   }
936   PetscTryTypeMethod(sf, View, viewer);
937   PetscFunctionReturn(PETSC_SUCCESS);
938 }
939 
940 /*@C
941   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
942 
943   Not Collective
944 
945   Input Parameter:
946 . sf - star forest
947 
948   Output Parameters:
949 + nranks  - number of ranks referenced by local part
950 . ranks   - [`nranks`] array of ranks
951 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
952 . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
953 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
954 
955   Level: developer
956 
957 .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
958 @*/
959 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
960 {
961   PetscFunctionBegin;
962   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
963   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
964   if (sf->ops->GetRootRanks) {
965     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
966   } else {
967     /* The generic implementation */
968     if (nranks) *nranks = sf->nranks;
969     if (ranks) *ranks = sf->ranks;
970     if (roffset) *roffset = sf->roffset;
971     if (rmine) *rmine = sf->rmine;
972     if (rremote) *rremote = sf->rremote;
973   }
974   PetscFunctionReturn(PETSC_SUCCESS);
975 }
976 
977 /*@C
978   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
979 
980   Not Collective
981 
982   Input Parameter:
983 . sf - star forest
984 
985   Output Parameters:
986 + niranks  - number of leaf ranks referencing roots on this process
987 . iranks   - [`niranks`] array of ranks
988 . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
989 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
990 
991   Level: developer
992 
993 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
994 @*/
995 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
996 {
997   PetscFunctionBegin;
998   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
999   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
1000   if (sf->ops->GetLeafRanks) {
1001     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1002   } else {
1003     PetscSFType type;
1004     PetscCall(PetscSFGetType(sf, &type));
1005     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1006   }
1007   PetscFunctionReturn(PETSC_SUCCESS);
1008 }
1009 
1010 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1011 {
1012   PetscInt i;
1013   for (i = 0; i < n; i++) {
1014     if (needle == list[i]) return PETSC_TRUE;
1015   }
1016   return PETSC_FALSE;
1017 }
1018 
1019 /*@C
1020   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1021 
1022   Collective
1023 
1024   Input Parameters:
1025 + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1026 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1027 
1028   Level: developer
1029 
1030 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1031 @*/
1032 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1033 {
1034   PetscHMapI    table;
1035   PetscHashIter pos;
1036   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1037   PetscInt     *rcount;
1038   PetscInt      irank, sfnrank, ranksi;
1039   PetscMPIInt   i, orank = -1;
1040 
1041   PetscFunctionBegin;
1042   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1043   PetscSFCheckGraphSet(sf, 1);
1044   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1045   PetscCall(PetscHMapICreateWithSize(10, &table));
1046   for (i = 0; i < sf->nleaves; i++) {
1047     /* Log 1-based rank */
1048     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1049   }
1050   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1051   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1052   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1053   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1054   PetscHashIterBegin(table, pos);
1055   for (i = 0; i < sf->nranks; i++) {
1056     PetscHashIterGetKey(table, pos, ranksi);
1057     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1058     PetscHashIterGetVal(table, pos, rcount[i]);
1059     PetscHashIterNext(table, pos);
1060     ranks[i]--; /* Convert back to 0-based */
1061   }
1062   PetscCall(PetscHMapIDestroy(&table));
1063 
1064   /* We expect that dgroup is reliably "small" while nranks could be large */
1065   {
1066     MPI_Group    group = MPI_GROUP_NULL;
1067     PetscMPIInt *dgroupranks;
1068 
1069     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1070     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1071     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1072     PetscCall(PetscMalloc1(groupsize, &groupranks));
1073     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1074     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1075     PetscCallMPI(MPI_Group_free(&group));
1076     PetscCall(PetscFree(dgroupranks));
1077   }
1078 
1079   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1080   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1081     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1082       if (InList(ranks[i], groupsize, groupranks)) break;
1083     }
1084     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1085       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1086     }
1087     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1088       PetscMPIInt tmprank;
1089       PetscInt    tmpcount;
1090 
1091       tmprank             = ranks[i];
1092       tmpcount            = rcount[i];
1093       ranks[i]            = ranks[sf->ndranks];
1094       rcount[i]           = rcount[sf->ndranks];
1095       ranks[sf->ndranks]  = tmprank;
1096       rcount[sf->ndranks] = tmpcount;
1097       sf->ndranks++;
1098     }
1099   }
1100   PetscCall(PetscFree(groupranks));
1101   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1102   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1103   sf->roffset[0] = 0;
1104   for (i = 0; i < sf->nranks; i++) {
1105     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1106     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1107     rcount[i]          = 0;
1108   }
1109   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1110     /* short circuit */
1111     if (orank != sf->remote[i].rank) {
1112       /* Search for index of iremote[i].rank in sf->ranks */
1113       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1114       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1115       if (irank < 0) {
1116         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1117         if (irank >= 0) irank += sf->ndranks;
1118       }
1119     }
1120     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1121     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1122     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1123     rcount[irank]++;
1124   }
1125   PetscCall(PetscFree2(rcount, ranks));
1126   PetscFunctionReturn(PETSC_SUCCESS);
1127 }
1128 
1129 /*@C
1130   PetscSFGetGroups - gets incoming and outgoing process groups
1131 
1132   Collective
1133 
1134   Input Parameter:
1135 . sf - star forest
1136 
1137   Output Parameters:
1138 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1139 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1140 
1141   Level: developer
1142 
1143 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1144 @*/
1145 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1146 {
1147   MPI_Group group = MPI_GROUP_NULL;
1148 
1149   PetscFunctionBegin;
1150   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1151   if (sf->ingroup == MPI_GROUP_NULL) {
1152     PetscInt        i;
1153     const PetscInt *indegree;
1154     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1155     PetscSFNode    *remote;
1156     PetscSF         bgcount;
1157 
1158     /* Compute the number of incoming ranks */
1159     PetscCall(PetscMalloc1(sf->nranks, &remote));
1160     for (i = 0; i < sf->nranks; i++) {
1161       remote[i].rank  = sf->ranks[i];
1162       remote[i].index = 0;
1163     }
1164     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1165     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1166     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1167     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1168     /* Enumerate the incoming ranks */
1169     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1170     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1171     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1172     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1173     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1174     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1175     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1176     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1177     PetscCallMPI(MPI_Group_free(&group));
1178     PetscCall(PetscFree2(inranks, outranks));
1179     PetscCall(PetscSFDestroy(&bgcount));
1180   }
1181   *incoming = sf->ingroup;
1182 
1183   if (sf->outgroup == MPI_GROUP_NULL) {
1184     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1185     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1186     PetscCallMPI(MPI_Group_free(&group));
1187   }
1188   *outgoing = sf->outgroup;
1189   PetscFunctionReturn(PETSC_SUCCESS);
1190 }
1191 
1192 /*@
1193   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1194 
1195   Collective
1196 
1197   Input Parameter:
1198 . sf - star forest
1199 
1200   Output Parameter:
1201 . rsf - the star forest with a single root per process to perform communications
1202 
1203   Level: developer
1204 
1205 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1206 @*/
1207 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1208 {
1209   PetscFunctionBegin;
1210   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1211   PetscAssertPointer(rsf, 2);
1212   if (!sf->rankssf) {
1213     PetscSFNode       *rremotes;
1214     const PetscMPIInt *ranks;
1215     PetscMPIInt        nranks;
1216 
1217     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1218     PetscCall(PetscMalloc1(nranks, &rremotes));
1219     for (PetscInt i = 0; i < nranks; i++) {
1220       rremotes[i].rank  = ranks[i];
1221       rremotes[i].index = 0;
1222     }
1223     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1224     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1225   }
1226   *rsf = sf->rankssf;
1227   PetscFunctionReturn(PETSC_SUCCESS);
1228 }
1229 
1230 /*@
1231   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1232 
1233   Collective
1234 
1235   Input Parameter:
1236 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1237 
1238   Output Parameter:
1239 . multi - star forest with split roots, such that each root has degree exactly 1
1240 
1241   Level: developer
1242 
1243   Note:
1244   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1245   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1246   edge, it is a candidate for future optimization that might involve its removal.
1247 
1248 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1249 @*/
1250 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1251 {
1252   PetscFunctionBegin;
1253   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1254   PetscAssertPointer(multi, 2);
1255   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1256     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1257     *multi           = sf->multi;
1258     sf->multi->multi = sf->multi;
1259     PetscFunctionReturn(PETSC_SUCCESS);
1260   }
1261   if (!sf->multi) {
1262     const PetscInt *indegree;
1263     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1264     PetscSFNode    *remote;
1265     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1266     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1267     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1268     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1269     inoffset[0] = 0;
1270     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1271     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1272     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1273     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1274     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1275     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1276       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1277     }
1278     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1279     for (i = 0; i < sf->nleaves; i++) {
1280       remote[i].rank  = sf->remote[i].rank;
1281       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1282     }
1283     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1284     sf->multi->multi = sf->multi;
1285     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1286     if (sf->rankorder) { /* Sort the ranks */
1287       PetscMPIInt  rank;
1288       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1289       PetscSFNode *newremote;
1290       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1291       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1292       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1293       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1294       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1295       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1296       /* Sort the incoming ranks at each vertex, build the inverse map */
1297       for (i = 0; i < sf->nroots; i++) {
1298         PetscInt j;
1299         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1300         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1301         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1302       }
1303       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1304       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1305       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1306       for (i = 0; i < sf->nleaves; i++) {
1307         newremote[i].rank  = sf->remote[i].rank;
1308         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1309       }
1310       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1311       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1312     }
1313     PetscCall(PetscFree3(inoffset, outones, outoffset));
1314   }
1315   *multi = sf->multi;
1316   PetscFunctionReturn(PETSC_SUCCESS);
1317 }
1318 
1319 /*@C
1320   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1321 
1322   Collective
1323 
1324   Input Parameters:
1325 + sf        - original star forest
1326 . nselected - number of selected roots on this process
1327 - selected  - indices of the selected roots on this process
1328 
1329   Output Parameter:
1330 . esf - new star forest
1331 
1332   Level: advanced
1333 
1334   Note:
1335   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1336   be done by calling PetscSFGetGraph().
1337 
1338 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1339 @*/
1340 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1341 {
1342   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1343   const PetscInt    *ilocal;
1344   signed char       *rootdata, *leafdata, *leafmem;
1345   const PetscSFNode *iremote;
1346   PetscSFNode       *new_iremote;
1347   MPI_Comm           comm;
1348 
1349   PetscFunctionBegin;
1350   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1351   PetscSFCheckGraphSet(sf, 1);
1352   if (nselected) PetscAssertPointer(selected, 3);
1353   PetscAssertPointer(esf, 4);
1354 
1355   PetscCall(PetscSFSetUp(sf));
1356   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1357   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1358   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1359 
1360   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1361     PetscBool dups;
1362     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1363     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1364     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1365   }
1366 
1367   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1368   else {
1369     /* A generic version of creating embedded sf */
1370     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1371     maxlocal = maxleaf - minleaf + 1;
1372     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1373     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1374     /* Tag selected roots and bcast to leaves */
1375     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1376     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1377     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1378 
1379     /* Build esf with leaves that are still connected */
1380     esf_nleaves = 0;
1381     for (i = 0; i < nleaves; i++) {
1382       j = ilocal ? ilocal[i] : i;
1383       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1384          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1385       */
1386       esf_nleaves += (leafdata[j] ? 1 : 0);
1387     }
1388     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1389     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1390     for (i = n = 0; i < nleaves; i++) {
1391       j = ilocal ? ilocal[i] : i;
1392       if (leafdata[j]) {
1393         new_ilocal[n]        = j;
1394         new_iremote[n].rank  = iremote[i].rank;
1395         new_iremote[n].index = iremote[i].index;
1396         ++n;
1397       }
1398     }
1399     PetscCall(PetscSFCreate(comm, esf));
1400     PetscCall(PetscSFSetFromOptions(*esf));
1401     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1402     PetscCall(PetscFree2(rootdata, leafmem));
1403   }
1404   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1405   PetscFunctionReturn(PETSC_SUCCESS);
1406 }
1407 
1408 /*@C
1409   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1410 
1411   Collective
1412 
1413   Input Parameters:
1414 + sf        - original star forest
1415 . nselected - number of selected leaves on this process
1416 - selected  - indices of the selected leaves on this process
1417 
1418   Output Parameter:
1419 . newsf - new star forest
1420 
1421   Level: advanced
1422 
1423 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1424 @*/
1425 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1426 {
1427   const PetscSFNode *iremote;
1428   PetscSFNode       *new_iremote;
1429   const PetscInt    *ilocal;
1430   PetscInt           i, nroots, *leaves, *new_ilocal;
1431   MPI_Comm           comm;
1432 
1433   PetscFunctionBegin;
1434   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1435   PetscSFCheckGraphSet(sf, 1);
1436   if (nselected) PetscAssertPointer(selected, 3);
1437   PetscAssertPointer(newsf, 4);
1438 
1439   /* Uniq selected[] and put results in leaves[] */
1440   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1441   PetscCall(PetscMalloc1(nselected, &leaves));
1442   PetscCall(PetscArraycpy(leaves, selected, nselected));
1443   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1444   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1445 
1446   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1447   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1448   else {
1449     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1450     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1451     PetscCall(PetscMalloc1(nselected, &new_iremote));
1452     for (i = 0; i < nselected; ++i) {
1453       const PetscInt l     = leaves[i];
1454       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1455       new_iremote[i].rank  = iremote[l].rank;
1456       new_iremote[i].index = iremote[l].index;
1457     }
1458     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1459     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1460   }
1461   PetscCall(PetscFree(leaves));
1462   PetscFunctionReturn(PETSC_SUCCESS);
1463 }
1464 
1465 /*@C
1466   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1467 
1468   Collective
1469 
1470   Input Parameters:
1471 + sf       - star forest on which to communicate
1472 . unit     - data type associated with each node
1473 . rootdata - buffer to broadcast
1474 - op       - operation to use for reduction
1475 
1476   Output Parameter:
1477 . leafdata - buffer to be reduced with values from each leaf's respective root
1478 
1479   Level: intermediate
1480 
1481   Note:
1482   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1483   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1484   use `PetscSFBcastWithMemTypeBegin()` instead.
1485 
1486 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1487 @*/
1488 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1489 {
1490   PetscMemType rootmtype, leafmtype;
1491 
1492   PetscFunctionBegin;
1493   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1494   PetscCall(PetscSFSetUp(sf));
1495   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1496   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1497   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1498   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1499   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1500   PetscFunctionReturn(PETSC_SUCCESS);
1501 }
1502 
1503 /*@C
1504   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1505   to `PetscSFBcastEnd()`
1506 
1507   Collective
1508 
1509   Input Parameters:
1510 + sf        - star forest on which to communicate
1511 . unit      - data type associated with each node
1512 . rootmtype - memory type of rootdata
1513 . rootdata  - buffer to broadcast
1514 . leafmtype - memory type of leafdata
1515 - op        - operation to use for reduction
1516 
1517   Output Parameter:
1518 . leafdata - buffer to be reduced with values from each leaf's respective root
1519 
1520   Level: intermediate
1521 
1522 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1523 @*/
1524 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1525 {
1526   PetscFunctionBegin;
1527   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1528   PetscCall(PetscSFSetUp(sf));
1529   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1530   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1531   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1532   PetscFunctionReturn(PETSC_SUCCESS);
1533 }
1534 
1535 /*@C
1536   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1537 
1538   Collective
1539 
1540   Input Parameters:
1541 + sf       - star forest
1542 . unit     - data type
1543 . rootdata - buffer to broadcast
1544 - op       - operation to use for reduction
1545 
1546   Output Parameter:
1547 . leafdata - buffer to be reduced with values from each leaf's respective root
1548 
1549   Level: intermediate
1550 
1551 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1552 @*/
1553 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1554 {
1555   PetscFunctionBegin;
1556   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1557   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1558   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1559   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1560   PetscFunctionReturn(PETSC_SUCCESS);
1561 }
1562 
1563 /*@C
1564   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1565 
1566   Collective
1567 
1568   Input Parameters:
1569 + sf       - star forest
1570 . unit     - data type
1571 . leafdata - values to reduce
1572 - op       - reduction operation
1573 
1574   Output Parameter:
1575 . rootdata - result of reduction of values from all leaves of each root
1576 
1577   Level: intermediate
1578 
1579   Note:
1580   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1581   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1582   use `PetscSFReduceWithMemTypeBegin()` instead.
1583 
1584 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1585 @*/
1586 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1587 {
1588   PetscMemType rootmtype, leafmtype;
1589 
1590   PetscFunctionBegin;
1591   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1592   PetscCall(PetscSFSetUp(sf));
1593   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1594   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1595   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1596   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1597   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1598   PetscFunctionReturn(PETSC_SUCCESS);
1599 }
1600 
1601 /*@C
1602   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1603 
1604   Collective
1605 
1606   Input Parameters:
1607 + sf        - star forest
1608 . unit      - data type
1609 . leafmtype - memory type of leafdata
1610 . leafdata  - values to reduce
1611 . rootmtype - memory type of rootdata
1612 - op        - reduction operation
1613 
1614   Output Parameter:
1615 . rootdata - result of reduction of values from all leaves of each root
1616 
1617   Level: intermediate
1618 
1619 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1620 @*/
1621 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1622 {
1623   PetscFunctionBegin;
1624   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1625   PetscCall(PetscSFSetUp(sf));
1626   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1627   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1628   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1629   PetscFunctionReturn(PETSC_SUCCESS);
1630 }
1631 
1632 /*@C
1633   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1634 
1635   Collective
1636 
1637   Input Parameters:
1638 + sf       - star forest
1639 . unit     - data type
1640 . leafdata - values to reduce
1641 - op       - reduction operation
1642 
1643   Output Parameter:
1644 . rootdata - result of reduction of values from all leaves of each root
1645 
1646   Level: intermediate
1647 
1648 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1649 @*/
1650 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1651 {
1652   PetscFunctionBegin;
1653   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1654   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1655   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1656   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1657   PetscFunctionReturn(PETSC_SUCCESS);
1658 }
1659 
1660 /*@C
1661   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1662   to be completed with `PetscSFFetchAndOpEnd()`
1663 
1664   Collective
1665 
1666   Input Parameters:
1667 + sf       - star forest
1668 . unit     - data type
1669 . leafdata - leaf values to use in reduction
1670 - op       - operation to use for reduction
1671 
1672   Output Parameters:
1673 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1674 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1675 
1676   Level: advanced
1677 
1678   Note:
1679   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1680   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1681   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1682   integers.
1683 
1684 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1685 @*/
1686 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1687 {
1688   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1689 
1690   PetscFunctionBegin;
1691   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1692   PetscCall(PetscSFSetUp(sf));
1693   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1694   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1695   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1696   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1697   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1698   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1699   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 /*@C
1704   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1705   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1706 
1707   Collective
1708 
1709   Input Parameters:
1710 + sf              - star forest
1711 . unit            - data type
1712 . rootmtype       - memory type of rootdata
1713 . leafmtype       - memory type of leafdata
1714 . leafdata        - leaf values to use in reduction
1715 . leafupdatemtype - memory type of leafupdate
1716 - op              - operation to use for reduction
1717 
1718   Output Parameters:
1719 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1720 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1721 
1722   Level: advanced
1723 
1724   Note:
1725   See `PetscSFFetchAndOpBegin()` for more details.
1726 
1727 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1728 @*/
1729 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1730 {
1731   PetscFunctionBegin;
1732   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1733   PetscCall(PetscSFSetUp(sf));
1734   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1735   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1736   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1737   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1738   PetscFunctionReturn(PETSC_SUCCESS);
1739 }
1740 
1741 /*@C
1742   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1743   to fetch values from roots and update atomically by applying operation using my leaf value
1744 
1745   Collective
1746 
1747   Input Parameters:
1748 + sf       - star forest
1749 . unit     - data type
1750 . leafdata - leaf values to use in reduction
1751 - op       - operation to use for reduction
1752 
1753   Output Parameters:
1754 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1755 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1756 
1757   Level: advanced
1758 
1759 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1760 @*/
1761 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1762 {
1763   PetscFunctionBegin;
1764   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1765   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1766   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1767   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1768   PetscFunctionReturn(PETSC_SUCCESS);
1769 }
1770 
1771 /*@C
1772   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1773 
1774   Collective
1775 
1776   Input Parameter:
1777 . sf - star forest
1778 
1779   Output Parameter:
1780 . degree - degree of each root vertex
1781 
1782   Level: advanced
1783 
1784   Note:
1785   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1786 
1787 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1788 @*/
1789 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1790 {
1791   PetscFunctionBegin;
1792   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1793   PetscSFCheckGraphSet(sf, 1);
1794   PetscAssertPointer(degree, 2);
1795   if (!sf->degreeknown) {
1796     PetscInt i, nroots = sf->nroots, maxlocal;
1797     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1798     maxlocal = sf->maxleaf - sf->minleaf + 1;
1799     PetscCall(PetscMalloc1(nroots, &sf->degree));
1800     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1801     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1802     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1803     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1804   }
1805   *degree = NULL;
1806   PetscFunctionReturn(PETSC_SUCCESS);
1807 }
1808 
1809 /*@C
1810   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1811 
1812   Collective
1813 
1814   Input Parameter:
1815 . sf - star forest
1816 
1817   Output Parameter:
1818 . degree - degree of each root vertex
1819 
1820   Level: developer
1821 
1822   Note:
1823   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1824 
1825 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1826 @*/
1827 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1828 {
1829   PetscFunctionBegin;
1830   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1831   PetscSFCheckGraphSet(sf, 1);
1832   PetscAssertPointer(degree, 2);
1833   if (!sf->degreeknown) {
1834     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1835     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1836     PetscCall(PetscFree(sf->degreetmp));
1837     sf->degreeknown = PETSC_TRUE;
1838   }
1839   *degree = sf->degree;
1840   PetscFunctionReturn(PETSC_SUCCESS);
1841 }
1842 
1843 /*@C
1844   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1845   Each multi-root is assigned index of the corresponding original root.
1846 
1847   Collective
1848 
1849   Input Parameters:
1850 + sf     - star forest
1851 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1852 
1853   Output Parameters:
1854 + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1855 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1856 
1857   Level: developer
1858 
1859   Note:
1860   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1861 
1862 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1863 @*/
1864 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1865 {
1866   PetscSF  msf;
1867   PetscInt k = 0, nroots, nmroots;
1868 
1869   PetscFunctionBegin;
1870   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1871   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1872   if (nroots) PetscAssertPointer(degree, 2);
1873   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1874   PetscAssertPointer(multiRootsOrigNumbering, 4);
1875   PetscCall(PetscSFGetMultiSF(sf, &msf));
1876   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1877   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1878   for (PetscInt i = 0; i < nroots; i++) {
1879     if (!degree[i]) continue;
1880     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1881   }
1882   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1883   if (nMultiRoots) *nMultiRoots = nmroots;
1884   PetscFunctionReturn(PETSC_SUCCESS);
1885 }
1886 
1887 /*@C
1888   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1889 
1890   Collective
1891 
1892   Input Parameters:
1893 + sf       - star forest
1894 . unit     - data type
1895 - leafdata - leaf data to gather to roots
1896 
1897   Output Parameter:
1898 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1899 
1900   Level: intermediate
1901 
1902 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1903 @*/
1904 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1905 {
1906   PetscSF multi = NULL;
1907 
1908   PetscFunctionBegin;
1909   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1910   PetscCall(PetscSFSetUp(sf));
1911   PetscCall(PetscSFGetMultiSF(sf, &multi));
1912   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1913   PetscFunctionReturn(PETSC_SUCCESS);
1914 }
1915 
1916 /*@C
1917   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1918 
1919   Collective
1920 
1921   Input Parameters:
1922 + sf       - star forest
1923 . unit     - data type
1924 - leafdata - leaf data to gather to roots
1925 
1926   Output Parameter:
1927 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1928 
1929   Level: intermediate
1930 
1931 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1932 @*/
1933 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1934 {
1935   PetscSF multi = NULL;
1936 
1937   PetscFunctionBegin;
1938   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1939   PetscCall(PetscSFGetMultiSF(sf, &multi));
1940   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1941   PetscFunctionReturn(PETSC_SUCCESS);
1942 }
1943 
1944 /*@C
1945   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1946 
1947   Collective
1948 
1949   Input Parameters:
1950 + sf            - star forest
1951 . unit          - data type
1952 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1953 
1954   Output Parameter:
1955 . leafdata - leaf data to be update with personal data from each respective root
1956 
1957   Level: intermediate
1958 
1959 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1960 @*/
1961 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1962 {
1963   PetscSF multi = NULL;
1964 
1965   PetscFunctionBegin;
1966   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1967   PetscCall(PetscSFSetUp(sf));
1968   PetscCall(PetscSFGetMultiSF(sf, &multi));
1969   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1970   PetscFunctionReturn(PETSC_SUCCESS);
1971 }
1972 
1973 /*@C
1974   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1975 
1976   Collective
1977 
1978   Input Parameters:
1979 + sf            - star forest
1980 . unit          - data type
1981 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1982 
1983   Output Parameter:
1984 . leafdata - leaf data to be update with personal data from each respective root
1985 
1986   Level: intermediate
1987 
1988 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1989 @*/
1990 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1991 {
1992   PetscSF multi = NULL;
1993 
1994   PetscFunctionBegin;
1995   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1996   PetscCall(PetscSFGetMultiSF(sf, &multi));
1997   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1998   PetscFunctionReturn(PETSC_SUCCESS);
1999 }
2000 
2001 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2002 {
2003   PetscInt        i, n, nleaves;
2004   const PetscInt *ilocal = NULL;
2005   PetscHSetI      seen;
2006 
2007   PetscFunctionBegin;
2008   if (PetscDefined(USE_DEBUG)) {
2009     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2010     PetscCall(PetscHSetICreate(&seen));
2011     for (i = 0; i < nleaves; i++) {
2012       const PetscInt leaf = ilocal ? ilocal[i] : i;
2013       PetscCall(PetscHSetIAdd(seen, leaf));
2014     }
2015     PetscCall(PetscHSetIGetSize(seen, &n));
2016     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2017     PetscCall(PetscHSetIDestroy(&seen));
2018   }
2019   PetscFunctionReturn(PETSC_SUCCESS);
2020 }
2021 
2022 /*@
2023   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2024 
2025   Input Parameters:
2026 + sfA - The first `PetscSF`
2027 - sfB - The second `PetscSF`
2028 
2029   Output Parameter:
2030 . sfBA - The composite `PetscSF`
2031 
2032   Level: developer
2033 
2034   Notes:
2035   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2036   forests, i.e. the same leaf is not connected with different roots.
2037 
2038   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2039   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2040   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2041   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2042 
2043 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2044 @*/
2045 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2046 {
2047   const PetscSFNode *remotePointsA, *remotePointsB;
2048   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2049   const PetscInt    *localPointsA, *localPointsB;
2050   PetscInt          *localPointsBA;
2051   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2052   PetscBool          denseB;
2053 
2054   PetscFunctionBegin;
2055   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2056   PetscSFCheckGraphSet(sfA, 1);
2057   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2058   PetscSFCheckGraphSet(sfB, 2);
2059   PetscCheckSameComm(sfA, 1, sfB, 2);
2060   PetscAssertPointer(sfBA, 3);
2061   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2062   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2063 
2064   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2065   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2066   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2067      numRootsB; otherwise, garbage will be broadcasted.
2068      Example (comm size = 1):
2069      sfA: 0 <- (0, 0)
2070      sfB: 100 <- (0, 0)
2071           101 <- (0, 1)
2072      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2073      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2074      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2075      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2076   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2077   for (i = 0; i < numRootsB; i++) {
2078     reorderedRemotePointsA[i].rank  = -1;
2079     reorderedRemotePointsA[i].index = -1;
2080   }
2081   for (i = 0; i < numLeavesA; i++) {
2082     PetscInt localp = localPointsA ? localPointsA[i] : i;
2083 
2084     if (localp >= numRootsB) continue;
2085     reorderedRemotePointsA[localp] = remotePointsA[i];
2086   }
2087   remotePointsA = reorderedRemotePointsA;
2088   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2089   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2090   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2091     leafdataB[i].rank  = -1;
2092     leafdataB[i].index = -1;
2093   }
2094   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2095   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2096   PetscCall(PetscFree(reorderedRemotePointsA));
2097 
2098   denseB = (PetscBool)!localPointsB;
2099   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2100     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2101     else numLeavesBA++;
2102   }
2103   if (denseB) {
2104     localPointsBA  = NULL;
2105     remotePointsBA = leafdataB;
2106   } else {
2107     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2108     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2109     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2110       const PetscInt l = localPointsB ? localPointsB[i] : i;
2111 
2112       if (leafdataB[l - minleaf].rank == -1) continue;
2113       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2114       localPointsBA[numLeavesBA]  = l;
2115       numLeavesBA++;
2116     }
2117     PetscCall(PetscFree(leafdataB));
2118   }
2119   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2120   PetscCall(PetscSFSetFromOptions(*sfBA));
2121   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2122   PetscFunctionReturn(PETSC_SUCCESS);
2123 }
2124 
2125 /*@
2126   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2127 
2128   Input Parameters:
2129 + sfA - The first `PetscSF`
2130 - sfB - The second `PetscSF`
2131 
2132   Output Parameter:
2133 . sfBA - The composite `PetscSF`.
2134 
2135   Level: developer
2136 
2137   Notes:
2138   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2139   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2140   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2141 
2142   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2143   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2144   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2145   on `sfA`, then
2146   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2147 
2148 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2149 @*/
2150 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2151 {
2152   const PetscSFNode *remotePointsA, *remotePointsB;
2153   PetscSFNode       *remotePointsBA;
2154   const PetscInt    *localPointsA, *localPointsB;
2155   PetscSFNode       *reorderedRemotePointsA = NULL;
2156   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2157   MPI_Op             op;
2158 #if defined(PETSC_USE_64BIT_INDICES)
2159   PetscBool iswin;
2160 #endif
2161 
2162   PetscFunctionBegin;
2163   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2164   PetscSFCheckGraphSet(sfA, 1);
2165   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2166   PetscSFCheckGraphSet(sfB, 2);
2167   PetscCheckSameComm(sfA, 1, sfB, 2);
2168   PetscAssertPointer(sfBA, 3);
2169   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2170   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2171 
2172   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2173   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2174 
2175   /* TODO: Check roots of sfB have degree of 1 */
2176   /* Once we implement it, we can replace the MPI_MAXLOC
2177      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2178      We use MPI_MAXLOC only to have a deterministic output from this routine if
2179      the root condition is not meet.
2180    */
2181   op = MPI_MAXLOC;
2182 #if defined(PETSC_USE_64BIT_INDICES)
2183   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2184   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2185   if (iswin) op = MPI_REPLACE;
2186 #endif
2187 
2188   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2189   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2190   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2191     reorderedRemotePointsA[i].rank  = -1;
2192     reorderedRemotePointsA[i].index = -1;
2193   }
2194   if (localPointsA) {
2195     for (i = 0; i < numLeavesA; i++) {
2196       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2197       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2198     }
2199   } else {
2200     for (i = 0; i < numLeavesA; i++) {
2201       if (i > maxleaf || i < minleaf) continue;
2202       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2203     }
2204   }
2205 
2206   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2207   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2208   for (i = 0; i < numRootsB; i++) {
2209     remotePointsBA[i].rank  = -1;
2210     remotePointsBA[i].index = -1;
2211   }
2212 
2213   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2214   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2215   PetscCall(PetscFree(reorderedRemotePointsA));
2216   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2217     if (remotePointsBA[i].rank == -1) continue;
2218     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2219     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2220     localPointsBA[numLeavesBA]        = i;
2221     numLeavesBA++;
2222   }
2223   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2224   PetscCall(PetscSFSetFromOptions(*sfBA));
2225   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2226   PetscFunctionReturn(PETSC_SUCCESS);
2227 }
2228 
2229 /*
2230   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2231 
2232   Input Parameter:
2233 . sf - The global `PetscSF`
2234 
2235   Output Parameter:
2236 . out - The local `PetscSF`
2237 
2238 .seealso: `PetscSF`, `PetscSFCreate()`
2239  */
2240 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2241 {
2242   MPI_Comm           comm;
2243   PetscMPIInt        myrank;
2244   const PetscInt    *ilocal;
2245   const PetscSFNode *iremote;
2246   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2247   PetscSFNode       *liremote;
2248   PetscSF            lsf;
2249 
2250   PetscFunctionBegin;
2251   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2252   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2253   else {
2254     PetscMPIInt irank;
2255 
2256     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2257     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2258     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2259 
2260     /* Find out local edges and build a local SF */
2261     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2262     for (i = lnleaves = 0; i < nleaves; i++) {
2263       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2264       if (irank == myrank) lnleaves++;
2265     }
2266     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2267     PetscCall(PetscMalloc1(lnleaves, &liremote));
2268 
2269     for (i = j = 0; i < nleaves; i++) {
2270       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2271       if (irank == myrank) {
2272         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2273         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2274         liremote[j].index = iremote[i].index;
2275         j++;
2276       }
2277     }
2278     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2279     PetscCall(PetscSFSetFromOptions(lsf));
2280     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2281     PetscCall(PetscSFSetUp(lsf));
2282     *out = lsf;
2283   }
2284   PetscFunctionReturn(PETSC_SUCCESS);
2285 }
2286 
2287 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2288 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2289 {
2290   PetscMemType rootmtype, leafmtype;
2291 
2292   PetscFunctionBegin;
2293   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2294   PetscCall(PetscSFSetUp(sf));
2295   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2296   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2297   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2298   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2299   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2300   PetscFunctionReturn(PETSC_SUCCESS);
2301 }
2302 
2303 /*@
2304   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2305 
2306   Input Parameters:
2307 + comm        - the communicator
2308 . nsfs        - the number of input `PetscSF`
2309 . sfs         - the array of input `PetscSF`
2310 . rootMode    - the root mode specifying how roots are handled
2311 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2312 
2313   Output Parameter:
2314 . newsf - The resulting `PetscSF`
2315 
2316   Level: advanced
2317 
2318   Notes:
2319   The communicator of all `PetscSF`s in `sfs` must be comm.
2320 
2321   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2322 
2323   The offsets in `leafOffsets` are added to the original leaf indices.
2324 
2325   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2326   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2327 
2328   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2329   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2330 
2331   All root modes retain the essential connectivity condition.
2332   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2333   Parameter `rootMode` controls how the input root spaces are combined.
2334   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2335   and is also the same in the output `PetscSF`.
2336   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2337   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2338   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2339   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2340   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2341   the original root ranks are ignored.
2342   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2343   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2344   to keep the load balancing.
2345   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2346 
2347   Example:
2348   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2349 .vb
2350   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2351   for m in {local,global,shared}; do
2352     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2353   done
2354 .ve
2355   we generate two identical `PetscSF`s sf_0 and sf_1,
2356 .vb
2357   PetscSF Object: sf_0 2 MPI processes
2358     type: basic
2359     rank #leaves #roots
2360     [ 0]       4      2
2361     [ 1]       4      2
2362     leaves      roots       roots in global numbering
2363     ( 0,  0) <- ( 0,  0)  =   0
2364     ( 0,  1) <- ( 0,  1)  =   1
2365     ( 0,  2) <- ( 1,  0)  =   2
2366     ( 0,  3) <- ( 1,  1)  =   3
2367     ( 1,  0) <- ( 0,  0)  =   0
2368     ( 1,  1) <- ( 0,  1)  =   1
2369     ( 1,  2) <- ( 1,  0)  =   2
2370     ( 1,  3) <- ( 1,  1)  =   3
2371 .ve
2372   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2373 .vb
2374   rootMode = local:
2375   PetscSF Object: result_sf 2 MPI processes
2376     type: basic
2377     rank #leaves #roots
2378     [ 0]       8      4
2379     [ 1]       8      4
2380     leaves      roots       roots in global numbering
2381     ( 0,  0) <- ( 0,  0)  =   0
2382     ( 0,  1) <- ( 0,  1)  =   1
2383     ( 0,  2) <- ( 1,  0)  =   4
2384     ( 0,  3) <- ( 1,  1)  =   5
2385     ( 0,  4) <- ( 0,  2)  =   2
2386     ( 0,  5) <- ( 0,  3)  =   3
2387     ( 0,  6) <- ( 1,  2)  =   6
2388     ( 0,  7) <- ( 1,  3)  =   7
2389     ( 1,  0) <- ( 0,  0)  =   0
2390     ( 1,  1) <- ( 0,  1)  =   1
2391     ( 1,  2) <- ( 1,  0)  =   4
2392     ( 1,  3) <- ( 1,  1)  =   5
2393     ( 1,  4) <- ( 0,  2)  =   2
2394     ( 1,  5) <- ( 0,  3)  =   3
2395     ( 1,  6) <- ( 1,  2)  =   6
2396     ( 1,  7) <- ( 1,  3)  =   7
2397 
2398   rootMode = global:
2399   PetscSF Object: result_sf 2 MPI processes
2400     type: basic
2401     rank #leaves #roots
2402     [ 0]       8      4
2403     [ 1]       8      4
2404     leaves      roots       roots in global numbering
2405     ( 0,  0) <- ( 0,  0)  =   0
2406     ( 0,  1) <- ( 0,  1)  =   1
2407     ( 0,  2) <- ( 0,  2)  =   2
2408     ( 0,  3) <- ( 0,  3)  =   3
2409     ( 0,  4) <- ( 1,  0)  =   4
2410     ( 0,  5) <- ( 1,  1)  =   5
2411     ( 0,  6) <- ( 1,  2)  =   6
2412     ( 0,  7) <- ( 1,  3)  =   7
2413     ( 1,  0) <- ( 0,  0)  =   0
2414     ( 1,  1) <- ( 0,  1)  =   1
2415     ( 1,  2) <- ( 0,  2)  =   2
2416     ( 1,  3) <- ( 0,  3)  =   3
2417     ( 1,  4) <- ( 1,  0)  =   4
2418     ( 1,  5) <- ( 1,  1)  =   5
2419     ( 1,  6) <- ( 1,  2)  =   6
2420     ( 1,  7) <- ( 1,  3)  =   7
2421 
2422   rootMode = shared:
2423   PetscSF Object: result_sf 2 MPI processes
2424     type: basic
2425     rank #leaves #roots
2426     [ 0]       8      2
2427     [ 1]       8      2
2428     leaves      roots       roots in global numbering
2429     ( 0,  0) <- ( 0,  0)  =   0
2430     ( 0,  1) <- ( 0,  1)  =   1
2431     ( 0,  2) <- ( 1,  0)  =   2
2432     ( 0,  3) <- ( 1,  1)  =   3
2433     ( 0,  4) <- ( 0,  0)  =   0
2434     ( 0,  5) <- ( 0,  1)  =   1
2435     ( 0,  6) <- ( 1,  0)  =   2
2436     ( 0,  7) <- ( 1,  1)  =   3
2437     ( 1,  0) <- ( 0,  0)  =   0
2438     ( 1,  1) <- ( 0,  1)  =   1
2439     ( 1,  2) <- ( 1,  0)  =   2
2440     ( 1,  3) <- ( 1,  1)  =   3
2441     ( 1,  4) <- ( 0,  0)  =   0
2442     ( 1,  5) <- ( 0,  1)  =   1
2443     ( 1,  6) <- ( 1,  0)  =   2
2444     ( 1,  7) <- ( 1,  1)  =   3
2445 .ve
2446 
2447 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2448 @*/
2449 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2450 {
2451   PetscInt     i, s, nLeaves, nRoots;
2452   PetscInt    *leafArrayOffsets;
2453   PetscInt    *ilocal_new;
2454   PetscSFNode *iremote_new;
2455   PetscBool    all_ilocal_null = PETSC_FALSE;
2456   PetscLayout  glayout         = NULL;
2457   PetscInt    *gremote         = NULL;
2458   PetscMPIInt  rank, size;
2459 
2460   PetscFunctionBegin;
2461   if (PetscDefined(USE_DEBUG)) {
2462     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2463 
2464     PetscCall(PetscSFCreate(comm, &dummy));
2465     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2466     PetscAssertPointer(sfs, 3);
2467     for (i = 0; i < nsfs; i++) {
2468       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2469       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2470     }
2471     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
2472     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2473     PetscAssertPointer(newsf, 6);
2474     PetscCall(PetscSFDestroy(&dummy));
2475   }
2476   if (!nsfs) {
2477     PetscCall(PetscSFCreate(comm, newsf));
2478     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2479     PetscFunctionReturn(PETSC_SUCCESS);
2480   }
2481   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2482   PetscCallMPI(MPI_Comm_size(comm, &size));
2483 
2484   /* Calculate leaf array offsets */
2485   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2486   leafArrayOffsets[0] = 0;
2487   for (s = 0; s < nsfs; s++) {
2488     PetscInt nl;
2489 
2490     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2491     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2492   }
2493   nLeaves = leafArrayOffsets[nsfs];
2494 
2495   /* Calculate number of roots */
2496   switch (rootMode) {
2497   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2498     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2499     if (PetscDefined(USE_DEBUG)) {
2500       for (s = 1; s < nsfs; s++) {
2501         PetscInt nr;
2502 
2503         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2504         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2505       }
2506     }
2507   } break;
2508   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2509     /* Calculate also global layout in this case */
2510     PetscInt    *nls;
2511     PetscLayout *lts;
2512     PetscInt   **inds;
2513     PetscInt     j;
2514     PetscInt     rootOffset = 0;
2515 
2516     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2517     PetscCall(PetscLayoutCreate(comm, &glayout));
2518     glayout->bs = 1;
2519     glayout->n  = 0;
2520     glayout->N  = 0;
2521     for (s = 0; s < nsfs; s++) {
2522       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2523       glayout->n += lts[s]->n;
2524       glayout->N += lts[s]->N;
2525     }
2526     PetscCall(PetscLayoutSetUp(glayout));
2527     PetscCall(PetscMalloc1(nLeaves, &gremote));
2528     for (s = 0, j = 0; s < nsfs; s++) {
2529       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2530       rootOffset += lts[s]->N;
2531       PetscCall(PetscLayoutDestroy(&lts[s]));
2532       PetscCall(PetscFree(inds[s]));
2533     }
2534     PetscCall(PetscFree3(lts, nls, inds));
2535     nRoots = glayout->N;
2536   } break;
2537   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2538     /* nRoots calculated later in this case */
2539     break;
2540   default:
2541     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2542   }
2543 
2544   if (!leafOffsets) {
2545     all_ilocal_null = PETSC_TRUE;
2546     for (s = 0; s < nsfs; s++) {
2547       const PetscInt *ilocal;
2548 
2549       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2550       if (ilocal) {
2551         all_ilocal_null = PETSC_FALSE;
2552         break;
2553       }
2554     }
2555     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2556   }
2557 
2558   /* Renumber and concatenate local leaves */
2559   ilocal_new = NULL;
2560   if (!all_ilocal_null) {
2561     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2562     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2563     for (s = 0; s < nsfs; s++) {
2564       const PetscInt *ilocal;
2565       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2566       PetscInt        i, nleaves_l;
2567 
2568       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2569       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2570     }
2571   }
2572 
2573   /* Renumber and concatenate remote roots */
2574   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2575     PetscInt rootOffset = 0;
2576 
2577     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2578     for (i = 0; i < nLeaves; i++) {
2579       iremote_new[i].rank  = -1;
2580       iremote_new[i].index = -1;
2581     }
2582     for (s = 0; s < nsfs; s++) {
2583       PetscInt           i, nl, nr;
2584       PetscSF            tmp_sf;
2585       const PetscSFNode *iremote;
2586       PetscSFNode       *tmp_rootdata;
2587       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2588 
2589       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2590       PetscCall(PetscSFCreate(comm, &tmp_sf));
2591       /* create helper SF with contiguous leaves */
2592       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2593       PetscCall(PetscSFSetUp(tmp_sf));
2594       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2595       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2596         for (i = 0; i < nr; i++) {
2597           tmp_rootdata[i].index = i + rootOffset;
2598           tmp_rootdata[i].rank  = rank;
2599         }
2600         rootOffset += nr;
2601       } else {
2602         for (i = 0; i < nr; i++) {
2603           tmp_rootdata[i].index = i;
2604           tmp_rootdata[i].rank  = rank;
2605         }
2606       }
2607       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2608       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2609       PetscCall(PetscSFDestroy(&tmp_sf));
2610       PetscCall(PetscFree(tmp_rootdata));
2611     }
2612     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2613 
2614     /* Build the new SF */
2615     PetscCall(PetscSFCreate(comm, newsf));
2616     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2617   } else {
2618     /* Build the new SF */
2619     PetscCall(PetscSFCreate(comm, newsf));
2620     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2621   }
2622   PetscCall(PetscSFSetUp(*newsf));
2623   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2624   PetscCall(PetscLayoutDestroy(&glayout));
2625   PetscCall(PetscFree(gremote));
2626   PetscCall(PetscFree(leafArrayOffsets));
2627   PetscFunctionReturn(PETSC_SUCCESS);
2628 }
2629 
2630 /*@
2631   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
2632 
2633   Collective
2634 
2635   Input Parameters:
2636 + sf       - star forest
2637 . unit     - the data type contained within the root and leaf data
2638 . rootdata - root data that will be used for multiple PetscSF communications
2639 - leafdata - leaf data that will be used for multiple PetscSF communications
2640 
2641   Level: advanced
2642 
2643   Notes:
2644   Implementations of `PetscSF` can make optimizations
2645   for repeated communication using the same memory regions, but these optimizations
2646   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2647   The intended pattern is
2648 
2649 .vb
2650   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2651 
2652   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2653   // repeated use of rootdata and leafdata will now be optimized
2654 
2655   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2656   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2657   // ...
2658   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2659   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2660   // ... (other communications)
2661 
2662   // rootdata and leafdata must be deregistered before freeing
2663   // skipping this can lead to undefined behavior including
2664   // deadlocks
2665   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2666 
2667   // it is now safe to free rootdata and leafdata
2668   PetscFree2(rootdata, leafdata);
2669 .ve
2670 
2671   If you do not register `rootdata` and `leafdata` it will not cause an error,
2672   but optimizations that reduce the setup time for each communication cannot be
2673   made.  Currently, the only implementation of `PetscSF` that benefits from
2674   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2675   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2676 
2677 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2678 @*/
2679 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2680 {
2681   PetscFunctionBegin;
2682   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2683   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2684   PetscFunctionReturn(PETSC_SUCCESS);
2685 }
2686 
2687 /*@
2688   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
2689 
2690   Collective
2691 
2692   Input Parameters:
2693 + sf       - star forest
2694 . unit     - the data type contained within the root and leaf data
2695 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2696 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2697 
2698   Level: advanced
2699 
2700   Note:
2701   See `PetscSFRegisterPersistent()` for when/how to use this function.
2702 
2703 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2704 @*/
2705 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2706 {
2707   PetscFunctionBegin;
2708   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2709   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2714 {
2715   MPI_Aint lb, lb_true, bytes, bytes_true;
2716 
2717   PetscFunctionBegin;
2718   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2719   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2720   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2721   *size = bytes;
2722   PetscFunctionReturn(PETSC_SUCCESS);
2723 }
2724