xref: /petsc/src/vec/is/sf/interface/sf.c (revision 7dcfde4d07706b324bec70f4acb301e88d0e45fe)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8   #include <petscdevice_cuda.h>
9 #endif
10 
11 #if defined(PETSC_HAVE_HIP)
12   #include <hip/hip_runtime.h>
13 #endif
14 
15 #if defined(PETSC_CLANG_STATIC_ANALYZER)
16 extern void PetscSFCheckGraphSet(PetscSF, int);
17 #else
18   #if defined(PETSC_USE_DEBUG)
19     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
20   #else
21     #define PetscSFCheckGraphSet(sf, arg) \
22       do { \
23       } while (0)
24   #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
29 
30 /*@
31   PetscSFCreate - create a star forest communication context
32 
33   Collective
34 
35   Input Parameter:
36 . comm - communicator on which the star forest will operate
37 
38   Output Parameter:
39 . sf - new star forest context
40 
41   Options Database Key:
42 + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
43 . -sf_type window                - Use MPI-3 one-sided window for communication
44 . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46 
47   Level: intermediate
48 
49   Note:
50   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
52   `SF`s are optimized and they have better performance than the general `SF`s.
53 
54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
55 @*/
56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57 {
58   PetscSF b;
59 
60   PetscFunctionBegin;
61   PetscAssertPointer(sf, 2);
62   PetscCall(PetscSFInitializePackage());
63 
64   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
65   b->nroots    = -1;
66   b->nleaves   = -1;
67   b->minleaf   = PETSC_INT_MAX;
68   b->maxleaf   = PETSC_INT_MIN;
69   b->nranks    = -1;
70   b->rankorder = PETSC_TRUE;
71   b->ingroup   = MPI_GROUP_NULL;
72   b->outgroup  = MPI_GROUP_NULL;
73   b->graphset  = PETSC_FALSE;
74 #if defined(PETSC_HAVE_DEVICE)
75   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
76   b->use_stream_aware_mpi = PETSC_FALSE;
77   b->unknown_input_stream = PETSC_FALSE;
78   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
79   b->backend = PETSCSF_BACKEND_KOKKOS;
80   #elif defined(PETSC_HAVE_CUDA)
81   b->backend = PETSCSF_BACKEND_CUDA;
82   #elif defined(PETSC_HAVE_HIP)
83   b->backend = PETSCSF_BACKEND_HIP;
84   #endif
85 
86   #if defined(PETSC_HAVE_NVSHMEM)
87   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
88   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
89   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
90   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
91   #endif
92 #endif
93   b->vscat.from_n = -1;
94   b->vscat.to_n   = -1;
95   b->vscat.unit   = MPIU_SCALAR;
96   *sf             = b;
97   PetscFunctionReturn(PETSC_SUCCESS);
98 }
99 
100 /*@
101   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
102 
103   Collective
104 
105   Input Parameter:
106 . sf - star forest
107 
108   Level: advanced
109 
110 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
111 @*/
112 PetscErrorCode PetscSFReset(PetscSF sf)
113 {
114   PetscFunctionBegin;
115   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
116   PetscTryTypeMethod(sf, Reset);
117   PetscCall(PetscSFDestroy(&sf->rankssf));
118 
119   sf->nroots   = -1;
120   sf->nleaves  = -1;
121   sf->minleaf  = PETSC_INT_MAX;
122   sf->maxleaf  = PETSC_INT_MIN;
123   sf->mine     = NULL;
124   sf->remote   = NULL;
125   sf->graphset = PETSC_FALSE;
126   PetscCall(PetscFree(sf->mine_alloc));
127   PetscCall(PetscFree(sf->remote_alloc));
128   sf->nranks = -1;
129   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
130   sf->degreeknown = PETSC_FALSE;
131   PetscCall(PetscFree(sf->degree));
132   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
133   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
134 
135   if (sf->multi) sf->multi->multi = NULL;
136   PetscCall(PetscSFDestroy(&sf->multi));
137 
138   PetscCall(PetscLayoutDestroy(&sf->map));
139 
140 #if defined(PETSC_HAVE_DEVICE)
141   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
142 #endif
143 
144   sf->setupcalled = PETSC_FALSE;
145   PetscFunctionReturn(PETSC_SUCCESS);
146 }
147 
148 /*@
149   PetscSFSetType - Set the `PetscSF` communication implementation
150 
151   Collective
152 
153   Input Parameters:
154 + sf   - the `PetscSF` context
155 - type - a known method
156 .vb
157     PETSCSFWINDOW - MPI-2/3 one-sided
158     PETSCSFBASIC - basic implementation using MPI-1 two-sided
159 .ve
160 
161   Options Database Key:
162 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
163 
164   Level: intermediate
165 
166   Notes:
167   See `PetscSFType` for possible values
168 
169 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
170 @*/
171 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
172 {
173   PetscBool match;
174   PetscErrorCode (*r)(PetscSF);
175 
176   PetscFunctionBegin;
177   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
178   PetscAssertPointer(type, 2);
179 
180   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
181   if (match) PetscFunctionReturn(PETSC_SUCCESS);
182 
183   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
184   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
185   /* Destroy the previous PetscSF implementation context */
186   PetscTryTypeMethod(sf, Destroy);
187   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
188   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
189   PetscCall((*r)(sf));
190   PetscFunctionReturn(PETSC_SUCCESS);
191 }
192 
193 /*@
194   PetscSFGetType - Get the `PetscSF` communication implementation
195 
196   Not Collective
197 
198   Input Parameter:
199 . sf - the `PetscSF` context
200 
201   Output Parameter:
202 . type - the `PetscSF` type name
203 
204   Level: intermediate
205 
206 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
207 @*/
208 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
209 {
210   PetscFunctionBegin;
211   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
212   PetscAssertPointer(type, 2);
213   *type = ((PetscObject)sf)->type_name;
214   PetscFunctionReturn(PETSC_SUCCESS);
215 }
216 
217 /*@
218   PetscSFDestroy - destroy a star forest
219 
220   Collective
221 
222   Input Parameter:
223 . sf - address of star forest
224 
225   Level: intermediate
226 
227 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
228 @*/
229 PetscErrorCode PetscSFDestroy(PetscSF *sf)
230 {
231   PetscFunctionBegin;
232   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
233   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
234   if (--((PetscObject)*sf)->refct > 0) {
235     *sf = NULL;
236     PetscFunctionReturn(PETSC_SUCCESS);
237   }
238   PetscCall(PetscSFReset(*sf));
239   PetscTryTypeMethod(*sf, Destroy);
240   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
241   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
242 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
243   if ((*sf)->use_stream_aware_mpi) {
244     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
245     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
246   }
247 #endif
248   PetscCall(PetscHeaderDestroy(sf));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
253 {
254   PetscInt           i, nleaves;
255   PetscMPIInt        size;
256   const PetscInt    *ilocal;
257   const PetscSFNode *iremote;
258 
259   PetscFunctionBegin;
260   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
261   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
262   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
263   for (i = 0; i < nleaves; i++) {
264     const PetscInt rank   = iremote[i].rank;
265     const PetscInt remote = iremote[i].index;
266     const PetscInt leaf   = ilocal ? ilocal[i] : i;
267     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
268     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
269     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
270   }
271   PetscFunctionReturn(PETSC_SUCCESS);
272 }
273 
274 /*@
275   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
276 
277   Collective
278 
279   Input Parameter:
280 . sf - star forest communication object
281 
282   Level: beginner
283 
284 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
285 @*/
286 PetscErrorCode PetscSFSetUp(PetscSF sf)
287 {
288   PetscFunctionBegin;
289   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
290   PetscSFCheckGraphSet(sf, 1);
291   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
292   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
293   PetscCall(PetscSFCheckGraphValid_Private(sf));
294   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
295   PetscTryTypeMethod(sf, SetUp);
296 #if defined(PETSC_HAVE_CUDA)
297   if (sf->backend == PETSCSF_BACKEND_CUDA) {
298     sf->ops->Malloc = PetscSFMalloc_CUDA;
299     sf->ops->Free   = PetscSFFree_CUDA;
300   }
301 #endif
302 #if defined(PETSC_HAVE_HIP)
303   if (sf->backend == PETSCSF_BACKEND_HIP) {
304     sf->ops->Malloc = PetscSFMalloc_HIP;
305     sf->ops->Free   = PetscSFFree_HIP;
306   }
307 #endif
308 
309 #if defined(PETSC_HAVE_KOKKOS)
310   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
311     sf->ops->Malloc = PetscSFMalloc_Kokkos;
312     sf->ops->Free   = PetscSFFree_Kokkos;
313   }
314 #endif
315   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
316   sf->setupcalled = PETSC_TRUE;
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 /*@
321   PetscSFSetFromOptions - set `PetscSF` options using the options database
322 
323   Logically Collective
324 
325   Input Parameter:
326 . sf - star forest
327 
328   Options Database Keys:
329 + -sf_type                      - implementation type, see `PetscSFSetType()`
330 . -sf_rank_order                - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
331 . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
332                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
333                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
334 . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
335                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
336 
337 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
338                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
339                                   the only available is kokkos.
340 
341   Level: intermediate
342 
343 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
344 @*/
345 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
346 {
347   PetscSFType deft;
348   char        type[256];
349   PetscBool   flg;
350 
351   PetscFunctionBegin;
352   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
353   PetscObjectOptionsBegin((PetscObject)sf);
354   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
355   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
356   PetscCall(PetscSFSetType(sf, flg ? type : deft));
357   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
358 #if defined(PETSC_HAVE_DEVICE)
359   {
360     char      backendstr[32] = {0};
361     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
362     /* Change the defaults set in PetscSFCreate() with command line options */
363     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
364     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
365     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
366     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
367     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
368     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
369   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
370     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
371     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
372     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
373     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
374   #elif defined(PETSC_HAVE_KOKKOS)
375     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376   #endif
377 
378   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379     if (sf->use_stream_aware_mpi) {
380       MPI_Info info;
381 
382       PetscCallMPI(MPI_Info_create(&info));
383       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386       PetscCallMPI(MPI_Info_free(&info));
387       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388     }
389   #endif
390   }
391 #endif
392   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393   PetscOptionsEnd();
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 /*@
398   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
399 
400   Logically Collective
401 
402   Input Parameters:
403 + sf  - star forest
404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
405 
406   Level: advanced
407 
408 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
409 @*/
410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411 {
412   PetscFunctionBegin;
413   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
414   PetscValidLogicalCollectiveBool(sf, flg, 2);
415   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
416   sf->rankorder = flg;
417   PetscFunctionReturn(PETSC_SUCCESS);
418 }
419 
420 /*@
421   PetscSFSetGraph - Set a parallel star forest
422 
423   Collective
424 
425   Input Parameters:
426 + sf         - star forest
427 . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
428 . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
429 . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
430 during setup in debug mode)
431 . localmode  - copy mode for `ilocal`
432 . iremote    - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
433 during setup in debug mode)
434 - remotemode - copy mode for `iremote`
435 
436   Level: intermediate
437 
438   Notes:
439   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
440 
441   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
442   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
443   PETSc might modify the respective array;
444   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
445   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
446 
447   Fortran Notes:
448   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449 
450   Developer Notes:
451   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
452   This also allows to compare leaf sets of two `PetscSF`s easily.
453 
454 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
455 @*/
456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
457 {
458   PetscBool unique, contiguous;
459 
460   PetscFunctionBegin;
461   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
462   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
463   if (nleaves > 0) PetscAssertPointer(iremote, 6);
464   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
465   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
466   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
467    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
468   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
469   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
470 
471   if (sf->nroots >= 0) { /* Reset only if graph already set */
472     PetscCall(PetscSFReset(sf));
473   }
474 
475   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
476   if (PetscDefined(USE_DEBUG)) {
477     PetscMPIInt size;
478 
479     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
481   }
482 
483   sf->nroots  = nroots;
484   sf->nleaves = nleaves;
485 
486   if (localmode == PETSC_COPY_VALUES && ilocal) {
487     PetscInt *tlocal = NULL;
488 
489     PetscCall(PetscMalloc1(nleaves, &tlocal));
490     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491     ilocal = tlocal;
492   }
493   if (remotemode == PETSC_COPY_VALUES) {
494     PetscSFNode *tremote = NULL;
495 
496     PetscCall(PetscMalloc1(nleaves, &tremote));
497     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498     iremote = tremote;
499   }
500 
501   if (nleaves && ilocal) {
502     PetscSFNode work;
503 
504     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
505     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506     unique = PetscNot(unique);
507     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508     sf->minleaf = ilocal[0];
509     sf->maxleaf = ilocal[nleaves - 1];
510     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
511   } else {
512     sf->minleaf = 0;
513     sf->maxleaf = nleaves - 1;
514     unique      = PETSC_TRUE;
515     contiguous  = PETSC_TRUE;
516   }
517 
518   if (contiguous) {
519     if (localmode == PETSC_USE_POINTER) {
520       ilocal = NULL;
521     } else {
522       PetscCall(PetscFree(ilocal));
523     }
524   }
525   sf->mine = ilocal;
526   if (localmode == PETSC_USE_POINTER) {
527     sf->mine_alloc = NULL;
528   } else {
529     sf->mine_alloc = ilocal;
530   }
531   if (PetscDefined(USE_DEBUG)) {
532     PetscMPIInt size;
533 
534     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535     for (PetscInt i = 0; i < nleaves; i++) { PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values"); }
536   }
537   sf->remote = iremote;
538   if (remotemode == PETSC_USE_POINTER) {
539     sf->remote_alloc = NULL;
540   } else {
541     sf->remote_alloc = iremote;
542   }
543   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
544   sf->graphset = PETSC_TRUE;
545   PetscFunctionReturn(PETSC_SUCCESS);
546 }
547 
548 /*@
549   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550 
551   Collective
552 
553   Input Parameters:
554 + sf      - The `PetscSF`
555 . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557 
558   Level: intermediate
559 
560   Notes:
561   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
562   `n` and `N` are the local and global sizes of `x` respectively.
563 
564   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
565   sequential vectors `y` on all MPI processes.
566 
567   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
568   sequential vector `y` on rank 0.
569 
570   In above cases, entries of `x` are roots and entries of `y` are leaves.
571 
572   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
573   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
574   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
575   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
576   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
577 
578   In this case, roots and leaves are symmetric.
579 
580 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
581  @*/
582 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
583 {
584   MPI_Comm    comm;
585   PetscInt    n, N, res[2];
586   PetscMPIInt rank, size;
587   PetscSFType type;
588 
589   PetscFunctionBegin;
590   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
591   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
592   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
593   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
594   PetscCallMPI(MPI_Comm_rank(comm, &rank));
595   PetscCallMPI(MPI_Comm_size(comm, &size));
596 
597   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
598     type = PETSCSFALLTOALL;
599     PetscCall(PetscLayoutCreate(comm, &sf->map));
600     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
601     PetscCall(PetscLayoutSetSize(sf->map, (PetscInt)size * size));
602     PetscCall(PetscLayoutSetUp(sf->map));
603   } else {
604     PetscCall(PetscLayoutGetLocalSize(map, &n));
605     PetscCall(PetscLayoutGetSize(map, &N));
606     res[0] = n;
607     res[1] = -n;
608     /* Check if n are same over all ranks so that we can optimize it */
609     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
610     if (res[0] == -res[1]) { /* same n */
611       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
612     } else {
613       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
614     }
615     PetscCall(PetscLayoutReference(map, &sf->map));
616   }
617   PetscCall(PetscSFSetType(sf, type));
618 
619   sf->pattern = pattern;
620   sf->mine    = NULL; /* Contiguous */
621 
622   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
623      Also set other easy stuff.
624    */
625   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
626     sf->nleaves = N;
627     sf->nroots  = n;
628     sf->nranks  = size;
629     sf->minleaf = 0;
630     sf->maxleaf = N - 1;
631   } else if (pattern == PETSCSF_PATTERN_GATHER) {
632     sf->nleaves = rank ? 0 : N;
633     sf->nroots  = n;
634     sf->nranks  = rank ? 0 : size;
635     sf->minleaf = 0;
636     sf->maxleaf = rank ? -1 : N - 1;
637   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
638     sf->nleaves = size;
639     sf->nroots  = size;
640     sf->nranks  = size;
641     sf->minleaf = 0;
642     sf->maxleaf = size - 1;
643   }
644   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
645   sf->graphset = PETSC_TRUE;
646   PetscFunctionReturn(PETSC_SUCCESS);
647 }
648 
649 /*@
650   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
651 
652   Collective
653 
654   Input Parameter:
655 . sf - star forest to invert
656 
657   Output Parameter:
658 . isf - inverse of `sf`
659 
660   Level: advanced
661 
662   Notes:
663   All roots must have degree 1.
664 
665   The local space may be a permutation, but cannot be sparse.
666 
667 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
668 @*/
669 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
670 {
671   PetscMPIInt     rank;
672   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
673   const PetscInt *ilocal;
674   PetscSFNode    *roots, *leaves;
675 
676   PetscFunctionBegin;
677   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
678   PetscSFCheckGraphSet(sf, 1);
679   PetscAssertPointer(isf, 2);
680 
681   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
682   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
683 
684   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
685   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
686   for (i = 0; i < maxlocal; i++) {
687     leaves[i].rank  = rank;
688     leaves[i].index = i;
689   }
690   for (i = 0; i < nroots; i++) {
691     roots[i].rank  = -1;
692     roots[i].index = -1;
693   }
694   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
695   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
696 
697   /* Check whether our leaves are sparse */
698   for (i = 0, count = 0; i < nroots; i++)
699     if (roots[i].rank >= 0) count++;
700   if (count == nroots) newilocal = NULL;
701   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
702     for (i = 0, count = 0; i < nroots; i++) {
703       if (roots[i].rank >= 0) {
704         newilocal[count]   = i;
705         roots[count].rank  = roots[i].rank;
706         roots[count].index = roots[i].index;
707         count++;
708       }
709     }
710   }
711 
712   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
713   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
714   PetscCall(PetscFree2(roots, leaves));
715   PetscFunctionReturn(PETSC_SUCCESS);
716 }
717 
718 /*@
719   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
720 
721   Collective
722 
723   Input Parameters:
724 + sf  - communication object to duplicate
725 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
726 
727   Output Parameter:
728 . newsf - new communication object
729 
730   Level: beginner
731 
732 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
733 @*/
734 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
735 {
736   PetscSFType  type;
737   MPI_Datatype dtype = MPIU_SCALAR;
738 
739   PetscFunctionBegin;
740   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
741   PetscValidLogicalCollectiveEnum(sf, opt, 2);
742   PetscAssertPointer(newsf, 3);
743   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
744   PetscCall(PetscSFGetType(sf, &type));
745   if (type) PetscCall(PetscSFSetType(*newsf, type));
746   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
747   if (opt == PETSCSF_DUPLICATE_GRAPH) {
748     PetscSFCheckGraphSet(sf, 1);
749     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
750       PetscInt           nroots, nleaves;
751       const PetscInt    *ilocal;
752       const PetscSFNode *iremote;
753       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
754       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
755     } else {
756       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
757     }
758   }
759   /* Since oldtype is committed, so is newtype, according to MPI */
760   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
761   (*newsf)->vscat.bs     = sf->vscat.bs;
762   (*newsf)->vscat.unit   = dtype;
763   (*newsf)->vscat.to_n   = sf->vscat.to_n;
764   (*newsf)->vscat.from_n = sf->vscat.from_n;
765   /* Do not copy lsf. Build it on demand since it is rarely used */
766 
767 #if defined(PETSC_HAVE_DEVICE)
768   (*newsf)->backend              = sf->backend;
769   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
770   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
771   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
772 #endif
773   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
774   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
775   PetscFunctionReturn(PETSC_SUCCESS);
776 }
777 
778 /*@C
779   PetscSFGetGraph - Get the graph specifying a parallel star forest
780 
781   Not Collective
782 
783   Input Parameter:
784 . sf - star forest
785 
786   Output Parameters:
787 + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
788 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
789 . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
790 - iremote - remote locations of root vertices for each leaf on the current process
791 
792   Level: intermediate
793 
794   Notes:
795   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
796 
797   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
798 
799   Fortran Notes:
800   The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
801   want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
802 
803   To check for a `NULL` `ilocal` use
804 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
805 
806 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
807 @*/
808 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
809 {
810   PetscFunctionBegin;
811   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
812   if (sf->ops->GetGraph) {
813     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
814   } else {
815     if (nroots) *nroots = sf->nroots;
816     if (nleaves) *nleaves = sf->nleaves;
817     if (ilocal) *ilocal = sf->mine;
818     if (iremote) *iremote = sf->remote;
819   }
820   PetscFunctionReturn(PETSC_SUCCESS);
821 }
822 
823 /*@
824   PetscSFGetLeafRange - Get the active leaf ranges
825 
826   Not Collective
827 
828   Input Parameter:
829 . sf - star forest
830 
831   Output Parameters:
832 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
833 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
834 
835   Level: developer
836 
837 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
838 @*/
839 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
840 {
841   PetscFunctionBegin;
842   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
843   PetscSFCheckGraphSet(sf, 1);
844   if (minleaf) *minleaf = sf->minleaf;
845   if (maxleaf) *maxleaf = sf->maxleaf;
846   PetscFunctionReturn(PETSC_SUCCESS);
847 }
848 
849 /*@
850   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
851 
852   Collective
853 
854   Input Parameters:
855 + A    - the star forest
856 . obj  - Optional object that provides the prefix for the option names
857 - name - command line option
858 
859   Level: intermediate
860 
861   Note:
862   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
863 
864 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
865 @*/
866 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
867 {
868   PetscFunctionBegin;
869   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
870   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
871   PetscFunctionReturn(PETSC_SUCCESS);
872 }
873 
874 /*@
875   PetscSFView - view a star forest
876 
877   Collective
878 
879   Input Parameters:
880 + sf     - star forest
881 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
882 
883   Level: beginner
884 
885 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
886 @*/
887 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
888 {
889   PetscBool         iascii;
890   PetscViewerFormat format;
891 
892   PetscFunctionBegin;
893   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
894   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
895   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
896   PetscCheckSameComm(sf, 1, viewer, 2);
897   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
898   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
899   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
900     PetscMPIInt rank;
901     PetscInt    j;
902 
903     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
904     PetscCall(PetscViewerASCIIPushTab(viewer));
905     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
906       if (!sf->graphset) {
907         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
908         PetscCall(PetscViewerASCIIPopTab(viewer));
909         PetscFunctionReturn(PETSC_SUCCESS);
910       }
911       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
912       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
913       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
914       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%d,%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, (PetscMPIInt)sf->remote[i].rank, sf->remote[i].index));
915       PetscCall(PetscViewerFlush(viewer));
916       PetscCall(PetscViewerGetFormat(viewer, &format));
917       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
918         PetscMPIInt *tmpranks, *perm;
919 
920         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
921         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
922         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
923         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
924         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
925         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
926           PetscMPIInt i = perm[ii];
927 
928           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
929           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
930         }
931         PetscCall(PetscFree2(tmpranks, perm));
932       }
933       PetscCall(PetscViewerFlush(viewer));
934       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
935     }
936     PetscCall(PetscViewerASCIIPopTab(viewer));
937   }
938   PetscTryTypeMethod(sf, View, viewer);
939   PetscFunctionReturn(PETSC_SUCCESS);
940 }
941 
942 /*@C
943   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
944 
945   Not Collective
946 
947   Input Parameter:
948 . sf - star forest
949 
950   Output Parameters:
951 + nranks  - number of ranks referenced by local part
952 . ranks   - [`nranks`] array of ranks
953 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
954 . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank, or `NULL`
955 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank, or `NULL`
956 
957   Level: developer
958 
959 .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
960 @*/
961 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
962 {
963   PetscFunctionBegin;
964   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
965   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
966   if (sf->ops->GetRootRanks) {
967     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
968   } else {
969     /* The generic implementation */
970     if (nranks) *nranks = sf->nranks;
971     if (ranks) *ranks = sf->ranks;
972     if (roffset) *roffset = sf->roffset;
973     if (rmine) *rmine = sf->rmine;
974     if (rremote) *rremote = sf->rremote;
975   }
976   PetscFunctionReturn(PETSC_SUCCESS);
977 }
978 
979 /*@C
980   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
981 
982   Not Collective
983 
984   Input Parameter:
985 . sf - star forest
986 
987   Output Parameters:
988 + niranks  - number of leaf ranks referencing roots on this process
989 . iranks   - [`niranks`] array of ranks
990 . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
991 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
992 
993   Level: developer
994 
995 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
996 @*/
997 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
998 {
999   PetscFunctionBegin;
1000   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1001   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
1002   if (sf->ops->GetLeafRanks) {
1003     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1004   } else {
1005     PetscSFType type;
1006     PetscCall(PetscSFGetType(sf, &type));
1007     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1008   }
1009   PetscFunctionReturn(PETSC_SUCCESS);
1010 }
1011 
1012 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1013 {
1014   PetscInt i;
1015   for (i = 0; i < n; i++) {
1016     if (needle == list[i]) return PETSC_TRUE;
1017   }
1018   return PETSC_FALSE;
1019 }
1020 
1021 /*@C
1022   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1023 
1024   Collective
1025 
1026   Input Parameters:
1027 + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1028 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1029 
1030   Level: developer
1031 
1032 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1033 @*/
1034 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1035 {
1036   PetscHMapI    table;
1037   PetscHashIter pos;
1038   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1039   PetscInt     *rcount;
1040   PetscInt      irank, sfnrank, ranksi;
1041   PetscMPIInt   i, orank = -1;
1042 
1043   PetscFunctionBegin;
1044   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1045   PetscSFCheckGraphSet(sf, 1);
1046   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1047   PetscCall(PetscHMapICreateWithSize(10, &table));
1048   for (i = 0; i < sf->nleaves; i++) {
1049     /* Log 1-based rank */
1050     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1051   }
1052   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1053   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1054   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1055   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1056   PetscHashIterBegin(table, pos);
1057   for (i = 0; i < sf->nranks; i++) {
1058     PetscHashIterGetKey(table, pos, ranksi);
1059     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1060     PetscHashIterGetVal(table, pos, rcount[i]);
1061     PetscHashIterNext(table, pos);
1062     ranks[i]--; /* Convert back to 0-based */
1063   }
1064   PetscCall(PetscHMapIDestroy(&table));
1065 
1066   /* We expect that dgroup is reliably "small" while nranks could be large */
1067   {
1068     MPI_Group    group = MPI_GROUP_NULL;
1069     PetscMPIInt *dgroupranks;
1070 
1071     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1072     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1073     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1074     PetscCall(PetscMalloc1(groupsize, &groupranks));
1075     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1076     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1077     PetscCallMPI(MPI_Group_free(&group));
1078     PetscCall(PetscFree(dgroupranks));
1079   }
1080 
1081   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1082   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1083     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1084       if (InList(ranks[i], groupsize, groupranks)) break;
1085     }
1086     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1087       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1088     }
1089     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1090       PetscMPIInt tmprank;
1091       PetscInt    tmpcount;
1092 
1093       tmprank             = ranks[i];
1094       tmpcount            = rcount[i];
1095       ranks[i]            = ranks[sf->ndranks];
1096       rcount[i]           = rcount[sf->ndranks];
1097       ranks[sf->ndranks]  = tmprank;
1098       rcount[sf->ndranks] = tmpcount;
1099       sf->ndranks++;
1100     }
1101   }
1102   PetscCall(PetscFree(groupranks));
1103   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1104   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1105   sf->roffset[0] = 0;
1106   for (i = 0; i < sf->nranks; i++) {
1107     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1108     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1109     rcount[i]          = 0;
1110   }
1111   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1112     /* short circuit */
1113     if (orank != sf->remote[i].rank) {
1114       /* Search for index of iremote[i].rank in sf->ranks */
1115       PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1116       if (irank < 0) {
1117         PetscCall(PetscFindMPIInt((PetscMPIInt)sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1118         if (irank >= 0) irank += sf->ndranks;
1119       }
1120       orank = (PetscMPIInt)sf->remote[i].rank;
1121     }
1122     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", (PetscMPIInt)sf->remote[i].rank);
1123     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1124     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1125     rcount[irank]++;
1126   }
1127   PetscCall(PetscFree2(rcount, ranks));
1128   PetscFunctionReturn(PETSC_SUCCESS);
1129 }
1130 
1131 /*@C
1132   PetscSFGetGroups - gets incoming and outgoing process groups
1133 
1134   Collective
1135 
1136   Input Parameter:
1137 . sf - star forest
1138 
1139   Output Parameters:
1140 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1141 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1142 
1143   Level: developer
1144 
1145 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1146 @*/
1147 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1148 {
1149   MPI_Group group = MPI_GROUP_NULL;
1150 
1151   PetscFunctionBegin;
1152   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1153   if (sf->ingroup == MPI_GROUP_NULL) {
1154     PetscInt        i;
1155     const PetscInt *indegree;
1156     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1157     PetscSFNode    *remote;
1158     PetscSF         bgcount;
1159 
1160     /* Compute the number of incoming ranks */
1161     PetscCall(PetscMalloc1(sf->nranks, &remote));
1162     for (i = 0; i < sf->nranks; i++) {
1163       remote[i].rank  = sf->ranks[i];
1164       remote[i].index = 0;
1165     }
1166     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1167     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1168     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1169     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1170     /* Enumerate the incoming ranks */
1171     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1172     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1173     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1174     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1175     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1176     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1177     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1178     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1179     PetscCallMPI(MPI_Group_free(&group));
1180     PetscCall(PetscFree2(inranks, outranks));
1181     PetscCall(PetscSFDestroy(&bgcount));
1182   }
1183   *incoming = sf->ingroup;
1184 
1185   if (sf->outgroup == MPI_GROUP_NULL) {
1186     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1187     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1188     PetscCallMPI(MPI_Group_free(&group));
1189   }
1190   *outgoing = sf->outgroup;
1191   PetscFunctionReturn(PETSC_SUCCESS);
1192 }
1193 
1194 /*@
1195   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1196 
1197   Collective
1198 
1199   Input Parameter:
1200 . sf - star forest
1201 
1202   Output Parameter:
1203 . rsf - the star forest with a single root per process to perform communications
1204 
1205   Level: developer
1206 
1207 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1208 @*/
1209 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1210 {
1211   PetscFunctionBegin;
1212   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1213   PetscAssertPointer(rsf, 2);
1214   if (!sf->rankssf) {
1215     PetscSFNode       *rremotes;
1216     const PetscMPIInt *ranks;
1217     PetscMPIInt        nranks;
1218 
1219     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1220     PetscCall(PetscMalloc1(nranks, &rremotes));
1221     for (PetscInt i = 0; i < nranks; i++) {
1222       rremotes[i].rank  = ranks[i];
1223       rremotes[i].index = 0;
1224     }
1225     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1226     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1227   }
1228   *rsf = sf->rankssf;
1229   PetscFunctionReturn(PETSC_SUCCESS);
1230 }
1231 
1232 /*@
1233   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1234 
1235   Collective
1236 
1237   Input Parameter:
1238 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1239 
1240   Output Parameter:
1241 . multi - star forest with split roots, such that each root has degree exactly 1
1242 
1243   Level: developer
1244 
1245   Note:
1246   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1247   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1248   edge, it is a candidate for future optimization that might involve its removal.
1249 
1250 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1251 @*/
1252 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1253 {
1254   PetscFunctionBegin;
1255   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1256   PetscAssertPointer(multi, 2);
1257   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1258     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1259     *multi           = sf->multi;
1260     sf->multi->multi = sf->multi;
1261     PetscFunctionReturn(PETSC_SUCCESS);
1262   }
1263   if (!sf->multi) {
1264     const PetscInt *indegree;
1265     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1266     PetscSFNode    *remote;
1267     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1268     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1269     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1270     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1271     inoffset[0] = 0;
1272     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1273     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1274     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1275     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1276     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1277     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1278       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1279     }
1280     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1281     for (i = 0; i < sf->nleaves; i++) {
1282       remote[i].rank  = sf->remote[i].rank;
1283       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1284     }
1285     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1286     sf->multi->multi = sf->multi;
1287     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1288     if (sf->rankorder) { /* Sort the ranks */
1289       PetscMPIInt  rank;
1290       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1291       PetscSFNode *newremote;
1292       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1293       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1294       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1295       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1296       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1297       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1298       /* Sort the incoming ranks at each vertex, build the inverse map */
1299       for (i = 0; i < sf->nroots; i++) {
1300         PetscInt j;
1301         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1302         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1303         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1304       }
1305       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1306       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1307       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1308       for (i = 0; i < sf->nleaves; i++) {
1309         newremote[i].rank  = sf->remote[i].rank;
1310         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1311       }
1312       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1313       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1314     }
1315     PetscCall(PetscFree3(inoffset, outones, outoffset));
1316   }
1317   *multi = sf->multi;
1318   PetscFunctionReturn(PETSC_SUCCESS);
1319 }
1320 
1321 /*@C
1322   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1323 
1324   Collective
1325 
1326   Input Parameters:
1327 + sf        - original star forest
1328 . nselected - number of selected roots on this process
1329 - selected  - indices of the selected roots on this process
1330 
1331   Output Parameter:
1332 . esf - new star forest
1333 
1334   Level: advanced
1335 
1336   Note:
1337   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1338   be done by calling PetscSFGetGraph().
1339 
1340 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1341 @*/
1342 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1343 {
1344   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1345   const PetscInt    *ilocal;
1346   signed char       *rootdata, *leafdata, *leafmem;
1347   const PetscSFNode *iremote;
1348   PetscSFNode       *new_iremote;
1349   MPI_Comm           comm;
1350 
1351   PetscFunctionBegin;
1352   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1353   PetscSFCheckGraphSet(sf, 1);
1354   if (nselected) PetscAssertPointer(selected, 3);
1355   PetscAssertPointer(esf, 4);
1356 
1357   PetscCall(PetscSFSetUp(sf));
1358   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1359   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1360   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1361 
1362   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1363     PetscBool dups;
1364     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1365     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1366     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1367   }
1368 
1369   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1370   else {
1371     /* A generic version of creating embedded sf */
1372     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1373     maxlocal = maxleaf - minleaf + 1;
1374     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1375     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1376     /* Tag selected roots and bcast to leaves */
1377     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1378     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1379     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1380 
1381     /* Build esf with leaves that are still connected */
1382     esf_nleaves = 0;
1383     for (i = 0; i < nleaves; i++) {
1384       j = ilocal ? ilocal[i] : i;
1385       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1386          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1387       */
1388       esf_nleaves += (leafdata[j] ? 1 : 0);
1389     }
1390     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1391     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1392     for (i = n = 0; i < nleaves; i++) {
1393       j = ilocal ? ilocal[i] : i;
1394       if (leafdata[j]) {
1395         new_ilocal[n]        = j;
1396         new_iremote[n].rank  = iremote[i].rank;
1397         new_iremote[n].index = iremote[i].index;
1398         ++n;
1399       }
1400     }
1401     PetscCall(PetscSFCreate(comm, esf));
1402     PetscCall(PetscSFSetFromOptions(*esf));
1403     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1404     PetscCall(PetscFree2(rootdata, leafmem));
1405   }
1406   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1407   PetscFunctionReturn(PETSC_SUCCESS);
1408 }
1409 
1410 /*@C
1411   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1412 
1413   Collective
1414 
1415   Input Parameters:
1416 + sf        - original star forest
1417 . nselected - number of selected leaves on this process
1418 - selected  - indices of the selected leaves on this process
1419 
1420   Output Parameter:
1421 . newsf - new star forest
1422 
1423   Level: advanced
1424 
1425 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1426 @*/
1427 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1428 {
1429   const PetscSFNode *iremote;
1430   PetscSFNode       *new_iremote;
1431   const PetscInt    *ilocal;
1432   PetscInt           i, nroots, *leaves, *new_ilocal;
1433   MPI_Comm           comm;
1434 
1435   PetscFunctionBegin;
1436   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1437   PetscSFCheckGraphSet(sf, 1);
1438   if (nselected) PetscAssertPointer(selected, 3);
1439   PetscAssertPointer(newsf, 4);
1440 
1441   /* Uniq selected[] and put results in leaves[] */
1442   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1443   PetscCall(PetscMalloc1(nselected, &leaves));
1444   PetscCall(PetscArraycpy(leaves, selected, nselected));
1445   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1446   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1447 
1448   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1449   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1450   else {
1451     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1452     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1453     PetscCall(PetscMalloc1(nselected, &new_iremote));
1454     for (i = 0; i < nselected; ++i) {
1455       const PetscInt l     = leaves[i];
1456       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1457       new_iremote[i].rank  = iremote[l].rank;
1458       new_iremote[i].index = iremote[l].index;
1459     }
1460     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1461     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1462   }
1463   PetscCall(PetscFree(leaves));
1464   PetscFunctionReturn(PETSC_SUCCESS);
1465 }
1466 
1467 /*@C
1468   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1469 
1470   Collective
1471 
1472   Input Parameters:
1473 + sf       - star forest on which to communicate
1474 . unit     - data type associated with each node
1475 . rootdata - buffer to broadcast
1476 - op       - operation to use for reduction
1477 
1478   Output Parameter:
1479 . leafdata - buffer to be reduced with values from each leaf's respective root
1480 
1481   Level: intermediate
1482 
1483   Note:
1484   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1485   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1486   use `PetscSFBcastWithMemTypeBegin()` instead.
1487 
1488 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1489 @*/
1490 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1491 {
1492   PetscMemType rootmtype, leafmtype;
1493 
1494   PetscFunctionBegin;
1495   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1496   PetscCall(PetscSFSetUp(sf));
1497   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1498   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1499   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1500   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1501   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1502   PetscFunctionReturn(PETSC_SUCCESS);
1503 }
1504 
1505 /*@C
1506   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1507   to `PetscSFBcastEnd()`
1508 
1509   Collective
1510 
1511   Input Parameters:
1512 + sf        - star forest on which to communicate
1513 . unit      - data type associated with each node
1514 . rootmtype - memory type of rootdata
1515 . rootdata  - buffer to broadcast
1516 . leafmtype - memory type of leafdata
1517 - op        - operation to use for reduction
1518 
1519   Output Parameter:
1520 . leafdata - buffer to be reduced with values from each leaf's respective root
1521 
1522   Level: intermediate
1523 
1524 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1525 @*/
1526 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1527 {
1528   PetscFunctionBegin;
1529   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1530   PetscCall(PetscSFSetUp(sf));
1531   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1532   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1533   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1534   PetscFunctionReturn(PETSC_SUCCESS);
1535 }
1536 
1537 /*@C
1538   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1539 
1540   Collective
1541 
1542   Input Parameters:
1543 + sf       - star forest
1544 . unit     - data type
1545 . rootdata - buffer to broadcast
1546 - op       - operation to use for reduction
1547 
1548   Output Parameter:
1549 . leafdata - buffer to be reduced with values from each leaf's respective root
1550 
1551   Level: intermediate
1552 
1553 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1554 @*/
1555 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1556 {
1557   PetscFunctionBegin;
1558   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1559   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1560   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1561   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1562   PetscFunctionReturn(PETSC_SUCCESS);
1563 }
1564 
1565 /*@C
1566   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1567 
1568   Collective
1569 
1570   Input Parameters:
1571 + sf       - star forest
1572 . unit     - data type
1573 . leafdata - values to reduce
1574 - op       - reduction operation
1575 
1576   Output Parameter:
1577 . rootdata - result of reduction of values from all leaves of each root
1578 
1579   Level: intermediate
1580 
1581   Note:
1582   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1583   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1584   use `PetscSFReduceWithMemTypeBegin()` instead.
1585 
1586 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1587 @*/
1588 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1589 {
1590   PetscMemType rootmtype, leafmtype;
1591 
1592   PetscFunctionBegin;
1593   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1594   PetscCall(PetscSFSetUp(sf));
1595   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1596   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1597   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1598   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1599   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 /*@C
1604   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1605 
1606   Collective
1607 
1608   Input Parameters:
1609 + sf        - star forest
1610 . unit      - data type
1611 . leafmtype - memory type of leafdata
1612 . leafdata  - values to reduce
1613 . rootmtype - memory type of rootdata
1614 - op        - reduction operation
1615 
1616   Output Parameter:
1617 . rootdata - result of reduction of values from all leaves of each root
1618 
1619   Level: intermediate
1620 
1621 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1622 @*/
1623 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1624 {
1625   PetscFunctionBegin;
1626   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1627   PetscCall(PetscSFSetUp(sf));
1628   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1629   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1630   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1631   PetscFunctionReturn(PETSC_SUCCESS);
1632 }
1633 
1634 /*@C
1635   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1636 
1637   Collective
1638 
1639   Input Parameters:
1640 + sf       - star forest
1641 . unit     - data type
1642 . leafdata - values to reduce
1643 - op       - reduction operation
1644 
1645   Output Parameter:
1646 . rootdata - result of reduction of values from all leaves of each root
1647 
1648   Level: intermediate
1649 
1650 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1651 @*/
1652 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1653 {
1654   PetscFunctionBegin;
1655   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1656   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1657   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1658   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1659   PetscFunctionReturn(PETSC_SUCCESS);
1660 }
1661 
1662 /*@C
1663   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1664   to be completed with `PetscSFFetchAndOpEnd()`
1665 
1666   Collective
1667 
1668   Input Parameters:
1669 + sf       - star forest
1670 . unit     - data type
1671 . leafdata - leaf values to use in reduction
1672 - op       - operation to use for reduction
1673 
1674   Output Parameters:
1675 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1676 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1677 
1678   Level: advanced
1679 
1680   Note:
1681   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1682   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1683   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1684   integers.
1685 
1686 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1687 @*/
1688 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1689 {
1690   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1691 
1692   PetscFunctionBegin;
1693   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1694   PetscCall(PetscSFSetUp(sf));
1695   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1696   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1697   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1698   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1699   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1700   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1701   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1702   PetscFunctionReturn(PETSC_SUCCESS);
1703 }
1704 
1705 /*@C
1706   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1707   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1708 
1709   Collective
1710 
1711   Input Parameters:
1712 + sf              - star forest
1713 . unit            - data type
1714 . rootmtype       - memory type of rootdata
1715 . leafmtype       - memory type of leafdata
1716 . leafdata        - leaf values to use in reduction
1717 . leafupdatemtype - memory type of leafupdate
1718 - op              - operation to use for reduction
1719 
1720   Output Parameters:
1721 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1722 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1723 
1724   Level: advanced
1725 
1726   Note:
1727   See `PetscSFFetchAndOpBegin()` for more details.
1728 
1729 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1730 @*/
1731 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1732 {
1733   PetscFunctionBegin;
1734   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1735   PetscCall(PetscSFSetUp(sf));
1736   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1737   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1738   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1739   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1740   PetscFunctionReturn(PETSC_SUCCESS);
1741 }
1742 
1743 /*@C
1744   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1745   to fetch values from roots and update atomically by applying operation using my leaf value
1746 
1747   Collective
1748 
1749   Input Parameters:
1750 + sf       - star forest
1751 . unit     - data type
1752 . leafdata - leaf values to use in reduction
1753 - op       - operation to use for reduction
1754 
1755   Output Parameters:
1756 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1757 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1758 
1759   Level: advanced
1760 
1761 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1762 @*/
1763 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1764 {
1765   PetscFunctionBegin;
1766   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1767   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1768   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1769   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1770   PetscFunctionReturn(PETSC_SUCCESS);
1771 }
1772 
1773 /*@C
1774   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1775 
1776   Collective
1777 
1778   Input Parameter:
1779 . sf - star forest
1780 
1781   Output Parameter:
1782 . degree - degree of each root vertex
1783 
1784   Level: advanced
1785 
1786   Note:
1787   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1788 
1789 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1790 @*/
1791 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1792 {
1793   PetscFunctionBegin;
1794   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1795   PetscSFCheckGraphSet(sf, 1);
1796   PetscAssertPointer(degree, 2);
1797   if (!sf->degreeknown) {
1798     PetscInt i, nroots = sf->nroots, maxlocal;
1799     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1800     maxlocal = sf->maxleaf - sf->minleaf + 1;
1801     PetscCall(PetscMalloc1(nroots, &sf->degree));
1802     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1803     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1804     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1805     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1806   }
1807   *degree = NULL;
1808   PetscFunctionReturn(PETSC_SUCCESS);
1809 }
1810 
1811 /*@C
1812   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1813 
1814   Collective
1815 
1816   Input Parameter:
1817 . sf - star forest
1818 
1819   Output Parameter:
1820 . degree - degree of each root vertex
1821 
1822   Level: developer
1823 
1824   Note:
1825   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1826 
1827 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1828 @*/
1829 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1830 {
1831   PetscFunctionBegin;
1832   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1833   PetscSFCheckGraphSet(sf, 1);
1834   PetscAssertPointer(degree, 2);
1835   if (!sf->degreeknown) {
1836     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1837     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1838     PetscCall(PetscFree(sf->degreetmp));
1839     sf->degreeknown = PETSC_TRUE;
1840   }
1841   *degree = sf->degree;
1842   PetscFunctionReturn(PETSC_SUCCESS);
1843 }
1844 
1845 /*@C
1846   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1847   Each multi-root is assigned index of the corresponding original root.
1848 
1849   Collective
1850 
1851   Input Parameters:
1852 + sf     - star forest
1853 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1854 
1855   Output Parameters:
1856 + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1857 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1858 
1859   Level: developer
1860 
1861   Note:
1862   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1863 
1864 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1865 @*/
1866 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1867 {
1868   PetscSF  msf;
1869   PetscInt k = 0, nroots, nmroots;
1870 
1871   PetscFunctionBegin;
1872   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1873   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1874   if (nroots) PetscAssertPointer(degree, 2);
1875   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1876   PetscAssertPointer(multiRootsOrigNumbering, 4);
1877   PetscCall(PetscSFGetMultiSF(sf, &msf));
1878   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1879   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1880   for (PetscInt i = 0; i < nroots; i++) {
1881     if (!degree[i]) continue;
1882     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1883   }
1884   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1885   if (nMultiRoots) *nMultiRoots = nmroots;
1886   PetscFunctionReturn(PETSC_SUCCESS);
1887 }
1888 
1889 /*@C
1890   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1891 
1892   Collective
1893 
1894   Input Parameters:
1895 + sf       - star forest
1896 . unit     - data type
1897 - leafdata - leaf data to gather to roots
1898 
1899   Output Parameter:
1900 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1901 
1902   Level: intermediate
1903 
1904 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1905 @*/
1906 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1907 {
1908   PetscSF multi = NULL;
1909 
1910   PetscFunctionBegin;
1911   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1912   PetscCall(PetscSFSetUp(sf));
1913   PetscCall(PetscSFGetMultiSF(sf, &multi));
1914   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1915   PetscFunctionReturn(PETSC_SUCCESS);
1916 }
1917 
1918 /*@C
1919   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1920 
1921   Collective
1922 
1923   Input Parameters:
1924 + sf       - star forest
1925 . unit     - data type
1926 - leafdata - leaf data to gather to roots
1927 
1928   Output Parameter:
1929 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1930 
1931   Level: intermediate
1932 
1933 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1934 @*/
1935 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1936 {
1937   PetscSF multi = NULL;
1938 
1939   PetscFunctionBegin;
1940   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1941   PetscCall(PetscSFGetMultiSF(sf, &multi));
1942   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1943   PetscFunctionReturn(PETSC_SUCCESS);
1944 }
1945 
1946 /*@C
1947   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1948 
1949   Collective
1950 
1951   Input Parameters:
1952 + sf            - star forest
1953 . unit          - data type
1954 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1955 
1956   Output Parameter:
1957 . leafdata - leaf data to be update with personal data from each respective root
1958 
1959   Level: intermediate
1960 
1961 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1962 @*/
1963 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1964 {
1965   PetscSF multi = NULL;
1966 
1967   PetscFunctionBegin;
1968   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1969   PetscCall(PetscSFSetUp(sf));
1970   PetscCall(PetscSFGetMultiSF(sf, &multi));
1971   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1972   PetscFunctionReturn(PETSC_SUCCESS);
1973 }
1974 
1975 /*@C
1976   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1977 
1978   Collective
1979 
1980   Input Parameters:
1981 + sf            - star forest
1982 . unit          - data type
1983 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1984 
1985   Output Parameter:
1986 . leafdata - leaf data to be update with personal data from each respective root
1987 
1988   Level: intermediate
1989 
1990 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1991 @*/
1992 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1993 {
1994   PetscSF multi = NULL;
1995 
1996   PetscFunctionBegin;
1997   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1998   PetscCall(PetscSFGetMultiSF(sf, &multi));
1999   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
2000   PetscFunctionReturn(PETSC_SUCCESS);
2001 }
2002 
2003 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2004 {
2005   PetscInt        i, n, nleaves;
2006   const PetscInt *ilocal = NULL;
2007   PetscHSetI      seen;
2008 
2009   PetscFunctionBegin;
2010   if (PetscDefined(USE_DEBUG)) {
2011     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2012     PetscCall(PetscHSetICreate(&seen));
2013     for (i = 0; i < nleaves; i++) {
2014       const PetscInt leaf = ilocal ? ilocal[i] : i;
2015       PetscCall(PetscHSetIAdd(seen, leaf));
2016     }
2017     PetscCall(PetscHSetIGetSize(seen, &n));
2018     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2019     PetscCall(PetscHSetIDestroy(&seen));
2020   }
2021   PetscFunctionReturn(PETSC_SUCCESS);
2022 }
2023 
2024 /*@
2025   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2026 
2027   Input Parameters:
2028 + sfA - The first `PetscSF`
2029 - sfB - The second `PetscSF`
2030 
2031   Output Parameter:
2032 . sfBA - The composite `PetscSF`
2033 
2034   Level: developer
2035 
2036   Notes:
2037   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2038   forests, i.e. the same leaf is not connected with different roots.
2039 
2040   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2041   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2042   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2043   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2044 
2045 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2046 @*/
2047 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2048 {
2049   const PetscSFNode *remotePointsA, *remotePointsB;
2050   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2051   const PetscInt    *localPointsA, *localPointsB;
2052   PetscInt          *localPointsBA;
2053   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2054   PetscBool          denseB;
2055 
2056   PetscFunctionBegin;
2057   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2058   PetscSFCheckGraphSet(sfA, 1);
2059   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2060   PetscSFCheckGraphSet(sfB, 2);
2061   PetscCheckSameComm(sfA, 1, sfB, 2);
2062   PetscAssertPointer(sfBA, 3);
2063   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2064   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2065 
2066   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2067   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2068   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2069      numRootsB; otherwise, garbage will be broadcasted.
2070      Example (comm size = 1):
2071      sfA: 0 <- (0, 0)
2072      sfB: 100 <- (0, 0)
2073           101 <- (0, 1)
2074      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2075      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2076      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2077      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2078   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2079   for (i = 0; i < numRootsB; i++) {
2080     reorderedRemotePointsA[i].rank  = -1;
2081     reorderedRemotePointsA[i].index = -1;
2082   }
2083   for (i = 0; i < numLeavesA; i++) {
2084     PetscInt localp = localPointsA ? localPointsA[i] : i;
2085 
2086     if (localp >= numRootsB) continue;
2087     reorderedRemotePointsA[localp] = remotePointsA[i];
2088   }
2089   remotePointsA = reorderedRemotePointsA;
2090   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2091   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2092   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2093     leafdataB[i].rank  = -1;
2094     leafdataB[i].index = -1;
2095   }
2096   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2097   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2098   PetscCall(PetscFree(reorderedRemotePointsA));
2099 
2100   denseB = (PetscBool)!localPointsB;
2101   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2102     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2103     else numLeavesBA++;
2104   }
2105   if (denseB) {
2106     localPointsBA  = NULL;
2107     remotePointsBA = leafdataB;
2108   } else {
2109     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2110     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2111     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2112       const PetscInt l = localPointsB ? localPointsB[i] : i;
2113 
2114       if (leafdataB[l - minleaf].rank == -1) continue;
2115       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2116       localPointsBA[numLeavesBA]  = l;
2117       numLeavesBA++;
2118     }
2119     PetscCall(PetscFree(leafdataB));
2120   }
2121   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2122   PetscCall(PetscSFSetFromOptions(*sfBA));
2123   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 /*@
2128   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2129 
2130   Input Parameters:
2131 + sfA - The first `PetscSF`
2132 - sfB - The second `PetscSF`
2133 
2134   Output Parameter:
2135 . sfBA - The composite `PetscSF`.
2136 
2137   Level: developer
2138 
2139   Notes:
2140   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2141   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2142   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2143 
2144   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2145   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2146   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2147   on `sfA`, then
2148   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2149 
2150 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2151 @*/
2152 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2153 {
2154   const PetscSFNode *remotePointsA, *remotePointsB;
2155   PetscSFNode       *remotePointsBA;
2156   const PetscInt    *localPointsA, *localPointsB;
2157   PetscSFNode       *reorderedRemotePointsA = NULL;
2158   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2159   MPI_Op             op;
2160 #if defined(PETSC_USE_64BIT_INDICES)
2161   PetscBool iswin;
2162 #endif
2163 
2164   PetscFunctionBegin;
2165   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2166   PetscSFCheckGraphSet(sfA, 1);
2167   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2168   PetscSFCheckGraphSet(sfB, 2);
2169   PetscCheckSameComm(sfA, 1, sfB, 2);
2170   PetscAssertPointer(sfBA, 3);
2171   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2172   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2173 
2174   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2175   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2176 
2177   /* TODO: Check roots of sfB have degree of 1 */
2178   /* Once we implement it, we can replace the MPI_MAXLOC
2179      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2180      We use MPI_MAXLOC only to have a deterministic output from this routine if
2181      the root condition is not meet.
2182    */
2183   op = MPI_MAXLOC;
2184 #if defined(PETSC_USE_64BIT_INDICES)
2185   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2186   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2187   if (iswin) op = MPI_REPLACE;
2188 #endif
2189 
2190   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2191   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2192   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2193     reorderedRemotePointsA[i].rank  = -1;
2194     reorderedRemotePointsA[i].index = -1;
2195   }
2196   if (localPointsA) {
2197     for (i = 0; i < numLeavesA; i++) {
2198       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2199       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2200     }
2201   } else {
2202     for (i = 0; i < numLeavesA; i++) {
2203       if (i > maxleaf || i < minleaf) continue;
2204       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2205     }
2206   }
2207 
2208   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2209   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2210   for (i = 0; i < numRootsB; i++) {
2211     remotePointsBA[i].rank  = -1;
2212     remotePointsBA[i].index = -1;
2213   }
2214 
2215   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2216   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2217   PetscCall(PetscFree(reorderedRemotePointsA));
2218   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2219     if (remotePointsBA[i].rank == -1) continue;
2220     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2221     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2222     localPointsBA[numLeavesBA]        = i;
2223     numLeavesBA++;
2224   }
2225   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2226   PetscCall(PetscSFSetFromOptions(*sfBA));
2227   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2228   PetscFunctionReturn(PETSC_SUCCESS);
2229 }
2230 
2231 /*
2232   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2233 
2234   Input Parameter:
2235 . sf - The global `PetscSF`
2236 
2237   Output Parameter:
2238 . out - The local `PetscSF`
2239 
2240 .seealso: `PetscSF`, `PetscSFCreate()`
2241  */
2242 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2243 {
2244   MPI_Comm           comm;
2245   PetscMPIInt        myrank;
2246   const PetscInt    *ilocal;
2247   const PetscSFNode *iremote;
2248   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2249   PetscSFNode       *liremote;
2250   PetscSF            lsf;
2251 
2252   PetscFunctionBegin;
2253   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2254   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2255   else {
2256     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2257     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2258     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2259 
2260     /* Find out local edges and build a local SF */
2261     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2262     for (i = lnleaves = 0; i < nleaves; i++) {
2263       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2264     }
2265     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2266     PetscCall(PetscMalloc1(lnleaves, &liremote));
2267 
2268     for (i = j = 0; i < nleaves; i++) {
2269       if (iremote[i].rank == (PetscInt)myrank) {
2270         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2271         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2272         liremote[j].index = iremote[i].index;
2273         j++;
2274       }
2275     }
2276     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2277     PetscCall(PetscSFSetFromOptions(lsf));
2278     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2279     PetscCall(PetscSFSetUp(lsf));
2280     *out = lsf;
2281   }
2282   PetscFunctionReturn(PETSC_SUCCESS);
2283 }
2284 
2285 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2286 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2287 {
2288   PetscMemType rootmtype, leafmtype;
2289 
2290   PetscFunctionBegin;
2291   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2292   PetscCall(PetscSFSetUp(sf));
2293   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2294   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2295   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2296   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2297   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2298   PetscFunctionReturn(PETSC_SUCCESS);
2299 }
2300 
2301 /*@
2302   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2303 
2304   Input Parameters:
2305 + comm        - the communicator
2306 . nsfs        - the number of input `PetscSF`
2307 . sfs         - the array of input `PetscSF`
2308 . rootMode    - the root mode specifying how roots are handled
2309 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2310 
2311   Output Parameter:
2312 . newsf - The resulting `PetscSF`
2313 
2314   Level: advanced
2315 
2316   Notes:
2317   The communicator of all `PetscSF`s in `sfs` must be comm.
2318 
2319   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2320 
2321   The offsets in `leafOffsets` are added to the original leaf indices.
2322 
2323   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2324   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2325 
2326   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2327   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2328 
2329   All root modes retain the essential connectivity condition.
2330   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2331   Parameter `rootMode` controls how the input root spaces are combined.
2332   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2333   and is also the same in the output `PetscSF`.
2334   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2335   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2336   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2337   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2338   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2339   the original root ranks are ignored.
2340   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2341   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2342   to keep the load balancing.
2343   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2344 
2345   Example:
2346   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2347 .vb
2348   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2349   for m in {local,global,shared}; do
2350     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2351   done
2352 .ve
2353   we generate two identical `PetscSF`s sf_0 and sf_1,
2354 .vb
2355   PetscSF Object: sf_0 2 MPI processes
2356     type: basic
2357     rank #leaves #roots
2358     [ 0]       4      2
2359     [ 1]       4      2
2360     leaves      roots       roots in global numbering
2361     ( 0,  0) <- ( 0,  0)  =   0
2362     ( 0,  1) <- ( 0,  1)  =   1
2363     ( 0,  2) <- ( 1,  0)  =   2
2364     ( 0,  3) <- ( 1,  1)  =   3
2365     ( 1,  0) <- ( 0,  0)  =   0
2366     ( 1,  1) <- ( 0,  1)  =   1
2367     ( 1,  2) <- ( 1,  0)  =   2
2368     ( 1,  3) <- ( 1,  1)  =   3
2369 .ve
2370   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2371 .vb
2372   rootMode = local:
2373   PetscSF Object: result_sf 2 MPI processes
2374     type: basic
2375     rank #leaves #roots
2376     [ 0]       8      4
2377     [ 1]       8      4
2378     leaves      roots       roots in global numbering
2379     ( 0,  0) <- ( 0,  0)  =   0
2380     ( 0,  1) <- ( 0,  1)  =   1
2381     ( 0,  2) <- ( 1,  0)  =   4
2382     ( 0,  3) <- ( 1,  1)  =   5
2383     ( 0,  4) <- ( 0,  2)  =   2
2384     ( 0,  5) <- ( 0,  3)  =   3
2385     ( 0,  6) <- ( 1,  2)  =   6
2386     ( 0,  7) <- ( 1,  3)  =   7
2387     ( 1,  0) <- ( 0,  0)  =   0
2388     ( 1,  1) <- ( 0,  1)  =   1
2389     ( 1,  2) <- ( 1,  0)  =   4
2390     ( 1,  3) <- ( 1,  1)  =   5
2391     ( 1,  4) <- ( 0,  2)  =   2
2392     ( 1,  5) <- ( 0,  3)  =   3
2393     ( 1,  6) <- ( 1,  2)  =   6
2394     ( 1,  7) <- ( 1,  3)  =   7
2395 
2396   rootMode = global:
2397   PetscSF Object: result_sf 2 MPI processes
2398     type: basic
2399     rank #leaves #roots
2400     [ 0]       8      4
2401     [ 1]       8      4
2402     leaves      roots       roots in global numbering
2403     ( 0,  0) <- ( 0,  0)  =   0
2404     ( 0,  1) <- ( 0,  1)  =   1
2405     ( 0,  2) <- ( 0,  2)  =   2
2406     ( 0,  3) <- ( 0,  3)  =   3
2407     ( 0,  4) <- ( 1,  0)  =   4
2408     ( 0,  5) <- ( 1,  1)  =   5
2409     ( 0,  6) <- ( 1,  2)  =   6
2410     ( 0,  7) <- ( 1,  3)  =   7
2411     ( 1,  0) <- ( 0,  0)  =   0
2412     ( 1,  1) <- ( 0,  1)  =   1
2413     ( 1,  2) <- ( 0,  2)  =   2
2414     ( 1,  3) <- ( 0,  3)  =   3
2415     ( 1,  4) <- ( 1,  0)  =   4
2416     ( 1,  5) <- ( 1,  1)  =   5
2417     ( 1,  6) <- ( 1,  2)  =   6
2418     ( 1,  7) <- ( 1,  3)  =   7
2419 
2420   rootMode = shared:
2421   PetscSF Object: result_sf 2 MPI processes
2422     type: basic
2423     rank #leaves #roots
2424     [ 0]       8      2
2425     [ 1]       8      2
2426     leaves      roots       roots in global numbering
2427     ( 0,  0) <- ( 0,  0)  =   0
2428     ( 0,  1) <- ( 0,  1)  =   1
2429     ( 0,  2) <- ( 1,  0)  =   2
2430     ( 0,  3) <- ( 1,  1)  =   3
2431     ( 0,  4) <- ( 0,  0)  =   0
2432     ( 0,  5) <- ( 0,  1)  =   1
2433     ( 0,  6) <- ( 1,  0)  =   2
2434     ( 0,  7) <- ( 1,  1)  =   3
2435     ( 1,  0) <- ( 0,  0)  =   0
2436     ( 1,  1) <- ( 0,  1)  =   1
2437     ( 1,  2) <- ( 1,  0)  =   2
2438     ( 1,  3) <- ( 1,  1)  =   3
2439     ( 1,  4) <- ( 0,  0)  =   0
2440     ( 1,  5) <- ( 0,  1)  =   1
2441     ( 1,  6) <- ( 1,  0)  =   2
2442     ( 1,  7) <- ( 1,  1)  =   3
2443 .ve
2444 
2445 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2446 @*/
2447 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2448 {
2449   PetscInt     i, s, nLeaves, nRoots;
2450   PetscInt    *leafArrayOffsets;
2451   PetscInt    *ilocal_new;
2452   PetscSFNode *iremote_new;
2453   PetscBool    all_ilocal_null = PETSC_FALSE;
2454   PetscLayout  glayout         = NULL;
2455   PetscInt    *gremote         = NULL;
2456   PetscMPIInt  rank, size;
2457 
2458   PetscFunctionBegin;
2459   if (PetscDefined(USE_DEBUG)) {
2460     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2461 
2462     PetscCall(PetscSFCreate(comm, &dummy));
2463     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2464     PetscAssertPointer(sfs, 3);
2465     for (i = 0; i < nsfs; i++) {
2466       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2467       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2468     }
2469     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
2470     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2471     PetscAssertPointer(newsf, 6);
2472     PetscCall(PetscSFDestroy(&dummy));
2473   }
2474   if (!nsfs) {
2475     PetscCall(PetscSFCreate(comm, newsf));
2476     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2477     PetscFunctionReturn(PETSC_SUCCESS);
2478   }
2479   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2480   PetscCallMPI(MPI_Comm_size(comm, &size));
2481 
2482   /* Calculate leaf array offsets */
2483   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2484   leafArrayOffsets[0] = 0;
2485   for (s = 0; s < nsfs; s++) {
2486     PetscInt nl;
2487 
2488     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2489     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2490   }
2491   nLeaves = leafArrayOffsets[nsfs];
2492 
2493   /* Calculate number of roots */
2494   switch (rootMode) {
2495   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2496     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2497     if (PetscDefined(USE_DEBUG)) {
2498       for (s = 1; s < nsfs; s++) {
2499         PetscInt nr;
2500 
2501         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2502         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2503       }
2504     }
2505   } break;
2506   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2507     /* Calculate also global layout in this case */
2508     PetscInt    *nls;
2509     PetscLayout *lts;
2510     PetscInt   **inds;
2511     PetscInt     j;
2512     PetscInt     rootOffset = 0;
2513 
2514     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2515     PetscCall(PetscLayoutCreate(comm, &glayout));
2516     glayout->bs = 1;
2517     glayout->n  = 0;
2518     glayout->N  = 0;
2519     for (s = 0; s < nsfs; s++) {
2520       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2521       glayout->n += lts[s]->n;
2522       glayout->N += lts[s]->N;
2523     }
2524     PetscCall(PetscLayoutSetUp(glayout));
2525     PetscCall(PetscMalloc1(nLeaves, &gremote));
2526     for (s = 0, j = 0; s < nsfs; s++) {
2527       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2528       rootOffset += lts[s]->N;
2529       PetscCall(PetscLayoutDestroy(&lts[s]));
2530       PetscCall(PetscFree(inds[s]));
2531     }
2532     PetscCall(PetscFree3(lts, nls, inds));
2533     nRoots = glayout->N;
2534   } break;
2535   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2536     /* nRoots calculated later in this case */
2537     break;
2538   default:
2539     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2540   }
2541 
2542   if (!leafOffsets) {
2543     all_ilocal_null = PETSC_TRUE;
2544     for (s = 0; s < nsfs; s++) {
2545       const PetscInt *ilocal;
2546 
2547       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2548       if (ilocal) {
2549         all_ilocal_null = PETSC_FALSE;
2550         break;
2551       }
2552     }
2553     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2554   }
2555 
2556   /* Renumber and concatenate local leaves */
2557   ilocal_new = NULL;
2558   if (!all_ilocal_null) {
2559     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2560     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2561     for (s = 0; s < nsfs; s++) {
2562       const PetscInt *ilocal;
2563       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2564       PetscInt        i, nleaves_l;
2565 
2566       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2567       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2568     }
2569   }
2570 
2571   /* Renumber and concatenate remote roots */
2572   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2573     PetscInt rootOffset = 0;
2574 
2575     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2576     for (i = 0; i < nLeaves; i++) {
2577       iremote_new[i].rank  = -1;
2578       iremote_new[i].index = -1;
2579     }
2580     for (s = 0; s < nsfs; s++) {
2581       PetscInt           i, nl, nr;
2582       PetscSF            tmp_sf;
2583       const PetscSFNode *iremote;
2584       PetscSFNode       *tmp_rootdata;
2585       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2586 
2587       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2588       PetscCall(PetscSFCreate(comm, &tmp_sf));
2589       /* create helper SF with contiguous leaves */
2590       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2591       PetscCall(PetscSFSetUp(tmp_sf));
2592       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2593       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2594         for (i = 0; i < nr; i++) {
2595           tmp_rootdata[i].index = i + rootOffset;
2596           tmp_rootdata[i].rank  = rank;
2597         }
2598         rootOffset += nr;
2599       } else {
2600         for (i = 0; i < nr; i++) {
2601           tmp_rootdata[i].index = i;
2602           tmp_rootdata[i].rank  = rank;
2603         }
2604       }
2605       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2606       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2607       PetscCall(PetscSFDestroy(&tmp_sf));
2608       PetscCall(PetscFree(tmp_rootdata));
2609     }
2610     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2611 
2612     /* Build the new SF */
2613     PetscCall(PetscSFCreate(comm, newsf));
2614     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2615   } else {
2616     /* Build the new SF */
2617     PetscCall(PetscSFCreate(comm, newsf));
2618     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2619   }
2620   PetscCall(PetscSFSetUp(*newsf));
2621   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2622   PetscCall(PetscLayoutDestroy(&glayout));
2623   PetscCall(PetscFree(gremote));
2624   PetscCall(PetscFree(leafArrayOffsets));
2625   PetscFunctionReturn(PETSC_SUCCESS);
2626 }
2627 
2628 /*@
2629   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated PetscSF communications.
2630 
2631   Collective
2632 
2633   Input Parameters:
2634 + sf       - star forest
2635 . unit     - the data type contained within the root and leaf data
2636 . rootdata - root data that will be used for muliple PetscSF communications
2637 - leafdata - leaf data that will be used for muliple PetscSF communications
2638 
2639   Level: advanced
2640 
2641   Notes:
2642   Implementations of `PetscSF` can make optimizations
2643   for repeated communication using the same memory regions, but these optimizations
2644   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2645   The intended pattern is
2646 
2647 .vb
2648   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2649 
2650   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2651   // repeated use of rootdata and leafdata will now be optimized
2652 
2653   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2654   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2655   // ...
2656   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2657   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2658   // ... (other communications)
2659 
2660   // rootdata and leafdata must be deregistered before freeing
2661   // skipping this can lead to undefined behavior including
2662   // deadlocks
2663   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2664 
2665   // it is now safe to free rootdata and leafdata
2666   PetscFree2(rootdata, leafdata);
2667 .ve
2668 
2669   If you do not register `rootdata` and `leafdata` it will not cause an error,
2670   but optimizations that reduce the setup time for each communication cannot be
2671   made.  Currently, the only implementation of `PetscSF` that benefits from
2672   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2673   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2674 
2675 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2676 @*/
2677 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2678 {
2679   PetscFunctionBegin;
2680   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2681   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2682   PetscFunctionReturn(PETSC_SUCCESS);
2683 }
2684 
2685 /*@
2686   PetscSFDeregisterPersistent - Signal that repeated usage of root and leaf data for PetscSF communication has concluded.
2687 
2688   Collective
2689 
2690   Input Parameters:
2691 + sf       - star forest
2692 . unit     - the data type contained within the root and leaf data
2693 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2694 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2695 
2696   Level: advanced
2697 
2698   Note:
2699   See `PetscSFRegisterPersistent()` for when/how to use this function.
2700 
2701 .seealso: `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2702 @*/
2703 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2704 {
2705   PetscFunctionBegin;
2706   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2707   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2708   PetscFunctionReturn(PETSC_SUCCESS);
2709 }
2710 
2711 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2712 {
2713   MPI_Aint lb, lb_true, bytes, bytes_true;
2714 
2715   PetscFunctionBegin;
2716   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2717   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2718   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2719   *size = bytes;
2720   PetscFunctionReturn(PETSC_SUCCESS);
2721 }
2722