xref: /petsc/src/vec/is/sf/interface/sf.c (revision d756bedd70a89ca052be956bccd75c5761cb2ab4)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <petscdevice_cuda.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 extern void PetscSFCheckGraphSet(PetscSF, int);
16 #else
17   #if defined(PETSC_USE_DEBUG)
18     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
19   #else
20     #define PetscSFCheckGraphSet(sf, arg) \
21       do { \
22       } while (0)
23   #endif
24 #endif
25 
26 const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
28 
29 /*@
30   PetscSFCreate - create a star forest communication context
31 
32   Collective
33 
34   Input Parameter:
35 . comm - communicator on which the star forest will operate
36 
37   Output Parameter:
38 . sf - new star forest context
39 
40   Options Database Key:
41 + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
42 . -sf_type window                - Use MPI-3 one-sided window for communication
43 . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
44 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
45 
46   Level: intermediate
47 
48   Note:
49   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
50   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
51   `PetscSF`s are optimized and have better performance than the general `PetscSF`s.
52 
53 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
54 @*/
55 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
56 {
57   PetscSF b;
58 
59   PetscFunctionBegin;
60   PetscAssertPointer(sf, 2);
61   PetscCall(PetscSFInitializePackage());
62 
63   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_INT_MAX;
67   b->maxleaf   = PETSC_INT_MIN;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream = PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78   b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80   b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82   b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
89   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95   *sf             = b;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 /*@
100   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102   Collective
103 
104   Input Parameter:
105 . sf - star forest
106 
107   Level: advanced
108 
109 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
115   PetscTryTypeMethod(sf, Reset);
116   PetscCall(PetscSFDestroy(&sf->rankssf));
117 
118   sf->nroots   = -1;
119   sf->nleaves  = -1;
120   sf->minleaf  = PETSC_INT_MAX;
121   sf->maxleaf  = PETSC_INT_MIN;
122   sf->mine     = NULL;
123   sf->remote   = NULL;
124   sf->graphset = PETSC_FALSE;
125   PetscCall(PetscFree(sf->mine_alloc));
126   PetscCall(PetscFree(sf->remote_alloc));
127   sf->nranks = -1;
128   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
129   sf->degreeknown = PETSC_FALSE;
130   PetscCall(PetscFree(sf->degree));
131   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
132   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
133 
134   if (sf->multi) sf->multi->multi = NULL;
135   PetscCall(PetscSFDestroy(&sf->multi));
136 
137   PetscCall(PetscLayoutDestroy(&sf->map));
138 
139 #if defined(PETSC_HAVE_DEVICE)
140   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
141 #endif
142 
143   sf->setupcalled = PETSC_FALSE;
144   PetscFunctionReturn(PETSC_SUCCESS);
145 }
146 
147 /*@
148   PetscSFSetType - Set the `PetscSF` communication implementation
149 
150   Collective
151 
152   Input Parameters:
153 + sf   - the `PetscSF` context
154 - type - a known method
155 .vb
156     PETSCSFWINDOW - MPI-2/3 one-sided
157     PETSCSFBASIC - basic implementation using MPI-1 two-sided
158 .ve
159 
160   Options Database Key:
161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
162 
163   Level: intermediate
164 
165   Notes:
166   See `PetscSFType` for possible values
167 
168 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`
169 @*/
170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171 {
172   PetscBool match;
173   PetscErrorCode (*r)(PetscSF);
174 
175   PetscFunctionBegin;
176   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
177   PetscAssertPointer(type, 2);
178 
179   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
180   if (match) PetscFunctionReturn(PETSC_SUCCESS);
181 
182   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
183   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
184   /* Destroy the previous PetscSF implementation context */
185   PetscTryTypeMethod(sf, Destroy);
186   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
187   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
188   PetscCall((*r)(sf));
189   PetscFunctionReturn(PETSC_SUCCESS);
190 }
191 
192 /*@
193   PetscSFGetType - Get the `PetscSF` communication implementation
194 
195   Not Collective
196 
197   Input Parameter:
198 . sf - the `PetscSF` context
199 
200   Output Parameter:
201 . type - the `PetscSF` type name
202 
203   Level: intermediate
204 
205 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
206 @*/
207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208 {
209   PetscFunctionBegin;
210   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
211   PetscAssertPointer(type, 2);
212   *type = ((PetscObject)sf)->type_name;
213   PetscFunctionReturn(PETSC_SUCCESS);
214 }
215 
216 /*@
217   PetscSFDestroy - destroy a star forest
218 
219   Collective
220 
221   Input Parameter:
222 . sf - address of star forest
223 
224   Level: intermediate
225 
226 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
227 @*/
228 PetscErrorCode PetscSFDestroy(PetscSF *sf)
229 {
230   PetscFunctionBegin;
231   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
233   if (--((PetscObject)*sf)->refct > 0) {
234     *sf = NULL;
235     PetscFunctionReturn(PETSC_SUCCESS);
236   }
237   PetscCall(PetscSFReset(*sf));
238   PetscTryTypeMethod(*sf, Destroy);
239   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
240   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242   if ((*sf)->use_stream_aware_mpi) {
243     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245   }
246 #endif
247   PetscCall(PetscHeaderDestroy(sf));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252 {
253   PetscInt           i, nleaves;
254   PetscMPIInt        size;
255   const PetscInt    *ilocal;
256   const PetscSFNode *iremote;
257 
258   PetscFunctionBegin;
259   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
260   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
261   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262   for (i = 0; i < nleaves; i++) {
263     const PetscInt rank   = iremote[i].rank;
264     const PetscInt remote = iremote[i].index;
265     const PetscInt leaf   = ilocal ? ilocal[i] : i;
266     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
267     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
268     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269   }
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 /*@
274   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
275 
276   Collective
277 
278   Input Parameter:
279 . sf - star forest communication object
280 
281   Level: beginner
282 
283 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
284 @*/
285 PetscErrorCode PetscSFSetUp(PetscSF sf)
286 {
287   PetscFunctionBegin;
288   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
289   PetscSFCheckGraphSet(sf, 1);
290   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
291   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
292   PetscCall(PetscSFCheckGraphValid_Private(sf));
293   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294   PetscTryTypeMethod(sf, SetUp);
295 #if defined(PETSC_HAVE_CUDA)
296   if (sf->backend == PETSCSF_BACKEND_CUDA) {
297     sf->ops->Malloc = PetscSFMalloc_CUDA;
298     sf->ops->Free   = PetscSFFree_CUDA;
299   }
300 #endif
301 #if defined(PETSC_HAVE_HIP)
302   if (sf->backend == PETSCSF_BACKEND_HIP) {
303     sf->ops->Malloc = PetscSFMalloc_HIP;
304     sf->ops->Free   = PetscSFFree_HIP;
305   }
306 #endif
307 
308 #if defined(PETSC_HAVE_KOKKOS)
309   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
310     sf->ops->Malloc = PetscSFMalloc_Kokkos;
311     sf->ops->Free   = PetscSFFree_Kokkos;
312   }
313 #endif
314   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
315   sf->setupcalled = PETSC_TRUE;
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 /*@
320   PetscSFSetFromOptions - set `PetscSF` options using the options database
321 
322   Logically Collective
323 
324   Input Parameter:
325 . sf - star forest
326 
327   Options Database Keys:
328 + -sf_type                      - implementation type, see `PetscSFSetType()`
329 . -sf_rank_order                - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise
330 . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
331                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
332                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
333 . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
334                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
335 
336 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
337                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
338                                   the only available is kokkos.
339 
340   Level: intermediate
341 
342 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
343 @*/
344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345 {
346   PetscSFType deft;
347   char        type[256];
348   PetscBool   flg;
349 
350   PetscFunctionBegin;
351   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
352   PetscObjectOptionsBegin((PetscObject)sf);
353   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
354   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
355   PetscCall(PetscSFSetType(sf, flg ? type : deft));
356   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
358 #if defined(PETSC_HAVE_DEVICE)
359   {
360     char      backendstr[32] = {0};
361     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
362     /* Change the defaults set in PetscSFCreate() with command line options */
363     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
364     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
365     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
366     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
367     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
368     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
369   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
370     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
371     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
372     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
373     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
374   #elif defined(PETSC_HAVE_KOKKOS)
375     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376   #endif
377 
378   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379     if (sf->use_stream_aware_mpi) {
380       MPI_Info info;
381 
382       PetscCallMPI(MPI_Info_create(&info));
383       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386       PetscCallMPI(MPI_Info_free(&info));
387       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388     }
389   #endif
390   }
391 #endif
392   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393   PetscOptionsEnd();
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 /*@
398   PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order
399 
400   Logically Collective
401 
402   Input Parameters:
403 + sf  - star forest
404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic)
405 
406   Level: advanced
407 
408 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
409 @*/
410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411 {
412   PetscFunctionBegin;
413   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
414   PetscValidLogicalCollectiveBool(sf, flg, 2);
415   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
416   sf->rankorder = flg;
417   PetscFunctionReturn(PETSC_SUCCESS);
418 }
419 
420 /*@
421   PetscSFSetGraph - Set a parallel star forest
422 
423   Collective
424 
425   Input Parameters:
426 + sf         - star forest
427 . nroots     - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves)
428 . nleaves    - number of leaf vertices on the current MPI process, each of these references a root on any process
429 . ilocal     - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1))
430 . localmode  - copy mode for `ilocal`
431 . iremote    - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode)
432 - remotemode - copy mode for `iremote`
433 
434   Level: intermediate
435 
436   Notes:
437   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
438 
439   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
440   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
441   PETSc might modify the respective array;
442   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
443   If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
444 
445   `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations.
446 
447   Fortran Notes:
448   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449 
450   Developer Notes:
451   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
452   This also allows to compare leaf sets of two `PetscSF`s easily.
453 
454 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()`
455 @*/
456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457 {
458   PetscBool unique, contiguous;
459 
460   PetscFunctionBegin;
461   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
462   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
463   if (nleaves > 0) PetscAssertPointer(iremote, 6);
464   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
465   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
466   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
467    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
468   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
469   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
470 
471   if (sf->nroots >= 0) { /* Reset only if graph already set */
472     PetscCall(PetscSFReset(sf));
473   }
474 
475   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
476   if (PetscDefined(USE_DEBUG)) {
477     PetscMPIInt size;
478 
479     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
481   }
482 
483   sf->nroots  = nroots;
484   sf->nleaves = nleaves;
485 
486   if (localmode == PETSC_COPY_VALUES && ilocal) {
487     PetscInt *tlocal = NULL;
488 
489     PetscCall(PetscMalloc1(nleaves, &tlocal));
490     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491     ilocal = tlocal;
492   }
493   if (remotemode == PETSC_COPY_VALUES) {
494     PetscSFNode *tremote = NULL;
495 
496     PetscCall(PetscMalloc1(nleaves, &tremote));
497     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498     iremote = tremote;
499   }
500 
501   if (nleaves && ilocal) {
502     PetscSFNode work;
503 
504     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
505     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506     unique = PetscNot(unique);
507     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508     sf->minleaf = ilocal[0];
509     sf->maxleaf = ilocal[nleaves - 1];
510     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
511   } else {
512     sf->minleaf = 0;
513     sf->maxleaf = nleaves - 1;
514     unique      = PETSC_TRUE;
515     contiguous  = PETSC_TRUE;
516   }
517 
518   if (contiguous) {
519     if (localmode == PETSC_USE_POINTER) {
520       ilocal = NULL;
521     } else {
522       PetscCall(PetscFree(ilocal));
523     }
524   }
525   sf->mine = ilocal;
526   if (localmode == PETSC_USE_POINTER) {
527     sf->mine_alloc = NULL;
528   } else {
529     sf->mine_alloc = ilocal;
530   }
531   if (PetscDefined(USE_DEBUG)) {
532     PetscMPIInt size;
533 
534     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
536   }
537   sf->remote = iremote;
538   if (remotemode == PETSC_USE_POINTER) {
539     sf->remote_alloc = NULL;
540   } else {
541     sf->remote_alloc = iremote;
542   }
543   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
544   sf->graphset = PETSC_TRUE;
545   PetscFunctionReturn(PETSC_SUCCESS);
546 }
547 
548 /*@
549   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550 
551   Collective
552 
553   Input Parameters:
554 + sf      - The `PetscSF`
555 . map     - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557 
558   Level: intermediate
559 
560   Notes:
561   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`.
562   `n` and `N` are the local and global sizes of `root` respectively.
563 
564   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to
565   sequential vectors `leaves` on all MPI processes.
566 
567   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a
568   sequential vector `leaves` on MPI rank 0.
569 
570   With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine
571   creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i
572   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
573   not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data
574   items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric.
575 
576 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
577  @*/
578 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
579 {
580   MPI_Comm    comm;
581   PetscInt    n, N, res[2];
582   PetscMPIInt rank, size;
583   PetscSFType type;
584 
585   PetscFunctionBegin;
586   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
587   PetscValidLogicalCollectiveEnum(sf, pattern, 3);
588   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
589   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
590   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
591   PetscCallMPI(MPI_Comm_rank(comm, &rank));
592   PetscCallMPI(MPI_Comm_size(comm, &size));
593 
594   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
595     PetscInt sizei = size;
596 
597     type = PETSCSFALLTOALL;
598     PetscCall(PetscLayoutCreate(comm, &sf->map));
599     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
600     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
601     PetscCall(PetscLayoutSetUp(sf->map));
602   } else {
603     PetscCall(PetscLayoutGetLocalSize(map, &n));
604     PetscCall(PetscLayoutGetSize(map, &N));
605     res[0] = n;
606     res[1] = -n;
607     /* Check if n are same over all ranks so that we can optimize it */
608     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
609     if (res[0] == -res[1]) { /* same n */
610       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
611     } else {
612       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
613     }
614     PetscCall(PetscLayoutReference(map, &sf->map));
615   }
616   PetscCall(PetscSFSetType(sf, type));
617 
618   sf->pattern = pattern;
619   sf->mine    = NULL; /* Contiguous */
620 
621   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
622      Also set other easy stuff.
623    */
624   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
625     sf->nleaves = N;
626     sf->nroots  = n;
627     sf->nranks  = size;
628     sf->minleaf = 0;
629     sf->maxleaf = N - 1;
630   } else if (pattern == PETSCSF_PATTERN_GATHER) {
631     sf->nleaves = rank ? 0 : N;
632     sf->nroots  = n;
633     sf->nranks  = rank ? 0 : size;
634     sf->minleaf = 0;
635     sf->maxleaf = rank ? -1 : N - 1;
636   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
637     sf->nleaves = size;
638     sf->nroots  = size;
639     sf->nranks  = size;
640     sf->minleaf = 0;
641     sf->maxleaf = size - 1;
642   }
643   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
644   sf->graphset = PETSC_TRUE;
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 /*@
649   PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map
650 
651   Collective
652 
653   Input Parameter:
654 . sf - star forest to invert
655 
656   Output Parameter:
657 . isf - inverse of `sf`
658 
659   Level: advanced
660 
661   Notes:
662   All roots must have degree 1.
663 
664   The local space may be a permutation, but cannot be sparse.
665 
666 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
667 @*/
668 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
669 {
670   PetscMPIInt     rank;
671   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
672   const PetscInt *ilocal;
673   PetscSFNode    *roots, *leaves;
674 
675   PetscFunctionBegin;
676   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
677   PetscSFCheckGraphSet(sf, 1);
678   PetscAssertPointer(isf, 2);
679 
680   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
681   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
682 
683   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
684   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
685   for (i = 0; i < maxlocal; i++) {
686     leaves[i].rank  = rank;
687     leaves[i].index = i;
688   }
689   for (i = 0; i < nroots; i++) {
690     roots[i].rank  = -1;
691     roots[i].index = -1;
692   }
693   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
694   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
695 
696   /* Check whether our leaves are sparse */
697   for (i = 0, count = 0; i < nroots; i++)
698     if (roots[i].rank >= 0) count++;
699   if (count == nroots) newilocal = NULL;
700   else { // Index for sparse leaves and compact "roots" array (which is to become our leaves).
701     PetscCall(PetscMalloc1(count, &newilocal));
702     for (i = 0, count = 0; i < nroots; i++) {
703       if (roots[i].rank >= 0) {
704         newilocal[count]   = i;
705         roots[count].rank  = roots[i].rank;
706         roots[count].index = roots[i].index;
707         count++;
708       }
709     }
710   }
711 
712   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
713   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
714   PetscCall(PetscFree2(roots, leaves));
715   PetscFunctionReturn(PETSC_SUCCESS);
716 }
717 
718 /*@
719   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
720 
721   Collective
722 
723   Input Parameters:
724 + sf  - communication object to duplicate
725 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
726 
727   Output Parameter:
728 . newsf - new communication object
729 
730   Level: beginner
731 
732 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
733 @*/
734 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
735 {
736   PetscSFType  type;
737   MPI_Datatype dtype = MPIU_SCALAR;
738 
739   PetscFunctionBegin;
740   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
741   PetscValidLogicalCollectiveEnum(sf, opt, 2);
742   PetscAssertPointer(newsf, 3);
743   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
744   PetscCall(PetscSFGetType(sf, &type));
745   if (type) PetscCall(PetscSFSetType(*newsf, type));
746   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
747   if (opt == PETSCSF_DUPLICATE_GRAPH) {
748     PetscSFCheckGraphSet(sf, 1);
749     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
750       PetscInt           nroots, nleaves;
751       const PetscInt    *ilocal;
752       const PetscSFNode *iremote;
753       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
754       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
755     } else {
756       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
757     }
758   }
759   /* Since oldtype is committed, so is newtype, according to MPI */
760   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
761   (*newsf)->vscat.bs     = sf->vscat.bs;
762   (*newsf)->vscat.unit   = dtype;
763   (*newsf)->vscat.to_n   = sf->vscat.to_n;
764   (*newsf)->vscat.from_n = sf->vscat.from_n;
765   /* Do not copy lsf. Build it on demand since it is rarely used */
766 
767 #if defined(PETSC_HAVE_DEVICE)
768   (*newsf)->backend              = sf->backend;
769   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
770   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
771   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
772 #endif
773   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
774   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
775   PetscFunctionReturn(PETSC_SUCCESS);
776 }
777 
778 /*@C
779   PetscSFGetGraph - Get the graph specifying a parallel star forest
780 
781   Not Collective
782 
783   Input Parameter:
784 . sf - star forest
785 
786   Output Parameters:
787 + nroots  - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves)
788 . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process
789 . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
790 - iremote - remote locations of root vertices for each leaf on the current process
791 
792   Level: intermediate
793 
794   Notes:
795   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
796 
797   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
798 
799   Fortran Note:
800   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
801 
802 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
803 @*/
804 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
805 {
806   PetscFunctionBegin;
807   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
808   if (sf->ops->GetGraph) {
809     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
810   } else {
811     if (nroots) *nroots = sf->nroots;
812     if (nleaves) *nleaves = sf->nleaves;
813     if (ilocal) *ilocal = sf->mine;
814     if (iremote) *iremote = sf->remote;
815   }
816   PetscFunctionReturn(PETSC_SUCCESS);
817 }
818 
819 /*@
820   PetscSFGetLeafRange - Get the active leaf ranges
821 
822   Not Collective
823 
824   Input Parameter:
825 . sf - star forest
826 
827   Output Parameters:
828 + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves.
829 - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves.
830 
831   Level: developer
832 
833 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
834 @*/
835 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
836 {
837   PetscFunctionBegin;
838   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
839   PetscSFCheckGraphSet(sf, 1);
840   if (minleaf) *minleaf = sf->minleaf;
841   if (maxleaf) *maxleaf = sf->maxleaf;
842   PetscFunctionReturn(PETSC_SUCCESS);
843 }
844 
845 /*@
846   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
847 
848   Collective
849 
850   Input Parameters:
851 + A    - the star forest
852 . obj  - Optional object that provides the prefix for the option names
853 - name - command line option
854 
855   Level: intermediate
856 
857   Note:
858   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
859 
860 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
861 @*/
862 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
863 {
864   PetscFunctionBegin;
865   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
866   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
867   PetscFunctionReturn(PETSC_SUCCESS);
868 }
869 
870 /*@
871   PetscSFView - view a star forest
872 
873   Collective
874 
875   Input Parameters:
876 + sf     - star forest
877 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
878 
879   Level: beginner
880 
881 .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
882 @*/
883 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
884 {
885   PetscBool         isascii;
886   PetscViewerFormat format;
887 
888   PetscFunctionBegin;
889   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
890   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
891   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
892   PetscCheckSameComm(sf, 1, viewer, 2);
893   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
894   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
895   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
896     PetscMPIInt rank;
897     PetscInt    j;
898 
899     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
900     PetscCall(PetscViewerASCIIPushTab(viewer));
901     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
902       if (!sf->graphset) {
903         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
904         PetscCall(PetscViewerASCIIPopTab(viewer));
905         PetscFunctionReturn(PETSC_SUCCESS);
906       }
907       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
908       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
909       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
910       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
911       PetscCall(PetscViewerFlush(viewer));
912       PetscCall(PetscViewerGetFormat(viewer, &format));
913       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
914         PetscMPIInt *tmpranks, *perm;
915 
916         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
917         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
918         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
919         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
920         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
921         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
922           PetscMPIInt i = perm[ii];
923 
924           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
925           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
926         }
927         PetscCall(PetscFree2(tmpranks, perm));
928       }
929       PetscCall(PetscViewerFlush(viewer));
930       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
931     }
932     PetscCall(PetscViewerASCIIPopTab(viewer));
933   }
934   PetscTryTypeMethod(sf, View, viewer);
935   PetscFunctionReturn(PETSC_SUCCESS);
936 }
937 
938 /*@C
939   PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process
940 
941   Not Collective
942 
943   Input Parameter:
944 . sf - star forest
945 
946   Output Parameters:
947 + nranks  - number of MPI processes referenced by local part
948 . ranks   - [`nranks`] array of MPI ranks
949 . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process
950 . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL`
951 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL`
952 
953   Level: developer
954 
955 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()`
956 @*/
957 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
958 {
959   PetscFunctionBegin;
960   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
961   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
962   if (sf->ops->GetRootRanks) {
963     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
964   } else {
965     /* The generic implementation */
966     if (nranks) *nranks = sf->nranks;
967     if (ranks) *ranks = sf->ranks;
968     if (roffset) *roffset = sf->roffset;
969     if (rmine) *rmine = sf->rmine;
970     if (rremote) *rremote = sf->rremote;
971   }
972   PetscFunctionReturn(PETSC_SUCCESS);
973 }
974 
975 /*@C
976   PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process
977 
978   Not Collective
979 
980   Input Parameter:
981 . sf - star forest
982 
983   Output Parameters:
984 + niranks  - number of leaf MPI processes referencing roots on this process
985 . iranks   - [`niranks`] array of MPI ranks
986 . ioffset  - [`niranks`+1] offset in `irootloc` for each MPI process
987 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process
988 
989   Level: developer
990 
991 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
992 @*/
993 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
994 {
995   PetscFunctionBegin;
996   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
997   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
998   if (sf->ops->GetLeafRanks) {
999     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
1000   } else {
1001     PetscSFType type;
1002     PetscCall(PetscSFGetType(sf, &type));
1003     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1004   }
1005   PetscFunctionReturn(PETSC_SUCCESS);
1006 }
1007 
1008 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1009 {
1010   PetscInt i;
1011   for (i = 0; i < n; i++) {
1012     if (needle == list[i]) return PETSC_TRUE;
1013   }
1014   return PETSC_FALSE;
1015 }
1016 
1017 /*@C
1018   PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations.
1019 
1020   Collective
1021 
1022   Input Parameters:
1023 + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1024 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1025 
1026   Level: developer
1027 
1028 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
1029 @*/
1030 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1031 {
1032   PetscHMapI    table;
1033   PetscHashIter pos;
1034   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1035   PetscInt     *rcount;
1036   PetscInt      irank, sfnrank, ranksi;
1037   PetscMPIInt   i, orank = -1;
1038 
1039   PetscFunctionBegin;
1040   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1041   PetscSFCheckGraphSet(sf, 1);
1042   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1043   PetscCall(PetscHMapICreateWithSize(10, &table));
1044   for (i = 0; i < sf->nleaves; i++) {
1045     /* Log 1-based rank */
1046     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1047   }
1048   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1049   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1050   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1051   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1052   PetscHashIterBegin(table, pos);
1053   for (i = 0; i < sf->nranks; i++) {
1054     PetscHashIterGetKey(table, pos, ranksi);
1055     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1056     PetscHashIterGetVal(table, pos, rcount[i]);
1057     PetscHashIterNext(table, pos);
1058     ranks[i]--; /* Convert back to 0-based */
1059   }
1060   PetscCall(PetscHMapIDestroy(&table));
1061 
1062   /* We expect that dgroup is reliably "small" while nranks could be large */
1063   {
1064     MPI_Group    group = MPI_GROUP_NULL;
1065     PetscMPIInt *dgroupranks;
1066 
1067     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1068     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1069     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1070     PetscCall(PetscMalloc1(groupsize, &groupranks));
1071     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1072     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1073     PetscCallMPI(MPI_Group_free(&group));
1074     PetscCall(PetscFree(dgroupranks));
1075   }
1076 
1077   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1078   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1079     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1080       if (InList(ranks[i], groupsize, groupranks)) break;
1081     }
1082     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1083       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1084     }
1085     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1086       PetscMPIInt tmprank;
1087       PetscInt    tmpcount;
1088 
1089       tmprank             = ranks[i];
1090       tmpcount            = rcount[i];
1091       ranks[i]            = ranks[sf->ndranks];
1092       rcount[i]           = rcount[sf->ndranks];
1093       ranks[sf->ndranks]  = tmprank;
1094       rcount[sf->ndranks] = tmpcount;
1095       sf->ndranks++;
1096     }
1097   }
1098   PetscCall(PetscFree(groupranks));
1099   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1100   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1101   sf->roffset[0] = 0;
1102   for (i = 0; i < sf->nranks; i++) {
1103     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1104     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1105     rcount[i]          = 0;
1106   }
1107   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1108     /* short circuit */
1109     if (orank != sf->remote[i].rank) {
1110       /* Search for index of iremote[i].rank in sf->ranks */
1111       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1112       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1113       if (irank < 0) {
1114         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1115         if (irank >= 0) irank += sf->ndranks;
1116       }
1117     }
1118     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1119     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1120     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1121     rcount[irank]++;
1122   }
1123   PetscCall(PetscFree2(rcount, ranks));
1124   PetscFunctionReturn(PETSC_SUCCESS);
1125 }
1126 
1127 /*@C
1128   PetscSFGetGroups - gets incoming and outgoing process groups
1129 
1130   Collective
1131 
1132   Input Parameter:
1133 . sf - star forest
1134 
1135   Output Parameters:
1136 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1137 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1138 
1139   Level: developer
1140 
1141 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1142 @*/
1143 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1144 {
1145   MPI_Group group = MPI_GROUP_NULL;
1146 
1147   PetscFunctionBegin;
1148   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1149   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1150   if (sf->ingroup == MPI_GROUP_NULL) {
1151     PetscInt        i;
1152     const PetscInt *indegree;
1153     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1154     PetscSFNode    *remote;
1155     PetscSF         bgcount;
1156 
1157     /* Compute the number of incoming ranks */
1158     PetscCall(PetscMalloc1(sf->nranks, &remote));
1159     for (i = 0; i < sf->nranks; i++) {
1160       remote[i].rank  = sf->ranks[i];
1161       remote[i].index = 0;
1162     }
1163     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1164     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1165     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1166     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1167     /* Enumerate the incoming ranks */
1168     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1169     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1170     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1171     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1172     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1173     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1174     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1175     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1176     PetscCallMPI(MPI_Group_free(&group));
1177     PetscCall(PetscFree2(inranks, outranks));
1178     PetscCall(PetscSFDestroy(&bgcount));
1179   }
1180   *incoming = sf->ingroup;
1181 
1182   if (sf->outgroup == MPI_GROUP_NULL) {
1183     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1184     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1185     PetscCallMPI(MPI_Group_free(&group));
1186   }
1187   *outgoing = sf->outgroup;
1188   PetscFunctionReturn(PETSC_SUCCESS);
1189 }
1190 
1191 /*@
1192   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1193 
1194   Collective
1195 
1196   Input Parameter:
1197 . sf - star forest
1198 
1199   Output Parameter:
1200 . rsf - the star forest with a single root per MPI process to perform communications
1201 
1202   Level: developer
1203 
1204 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1205 @*/
1206 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1207 {
1208   PetscFunctionBegin;
1209   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1210   PetscAssertPointer(rsf, 2);
1211   if (!sf->rankssf) {
1212     PetscSFNode       *rremotes;
1213     const PetscMPIInt *ranks;
1214     PetscMPIInt        nranks;
1215 
1216     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1217     PetscCall(PetscMalloc1(nranks, &rremotes));
1218     for (PetscInt i = 0; i < nranks; i++) {
1219       rremotes[i].rank  = ranks[i];
1220       rremotes[i].index = 0;
1221     }
1222     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1223     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1224   }
1225   *rsf = sf->rankssf;
1226   PetscFunctionReturn(PETSC_SUCCESS);
1227 }
1228 
1229 /*@
1230   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1231 
1232   Collective
1233 
1234   Input Parameter:
1235 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1236 
1237   Output Parameter:
1238 . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf)
1239 
1240   Level: developer
1241 
1242   Note:
1243   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1244   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1245   edge, it is a candidate for future optimization that might involve its removal.
1246 
1247 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1248 @*/
1249 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1250 {
1251   PetscFunctionBegin;
1252   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1253   PetscAssertPointer(multi, 2);
1254   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1255     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1256     *multi           = sf->multi;
1257     sf->multi->multi = sf->multi;
1258     PetscFunctionReturn(PETSC_SUCCESS);
1259   }
1260   if (!sf->multi) {
1261     const PetscInt *indegree;
1262     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1263     PetscSFNode    *remote;
1264     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1265     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1266     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1267     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1268     inoffset[0] = 0;
1269     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1270     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1271     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1272     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1273     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1274     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1275       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1276     }
1277     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1278     for (i = 0; i < sf->nleaves; i++) {
1279       remote[i].rank  = sf->remote[i].rank;
1280       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1281     }
1282     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1283     sf->multi->multi = sf->multi;
1284     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1285     if (sf->rankorder) { /* Sort the ranks */
1286       PetscMPIInt  rank;
1287       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1288       PetscSFNode *newremote;
1289       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1290       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1291       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1292       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1293       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1294       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1295       /* Sort the incoming ranks at each vertex, build the inverse map */
1296       for (i = 0; i < sf->nroots; i++) {
1297         PetscInt j;
1298         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1299         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1300         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1301       }
1302       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1303       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1304       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1305       for (i = 0; i < sf->nleaves; i++) {
1306         newremote[i].rank  = sf->remote[i].rank;
1307         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1308       }
1309       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1310       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1311     }
1312     PetscCall(PetscFree3(inoffset, outones, outoffset));
1313   }
1314   *multi = sf->multi;
1315   PetscFunctionReturn(PETSC_SUCCESS);
1316 }
1317 
1318 /*@C
1319   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1320 
1321   Collective
1322 
1323   Input Parameters:
1324 + sf        - original star forest
1325 . nselected - number of selected roots on this MPI process
1326 - selected  - indices of the selected roots on this MPI process
1327 
1328   Output Parameter:
1329 . esf - new star forest
1330 
1331   Level: advanced
1332 
1333   Note:
1334   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1335   be done by calling PetscSFGetGraph().
1336 
1337 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1338 @*/
1339 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf)
1340 {
1341   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1342   const PetscInt    *ilocal;
1343   signed char       *rootdata, *leafdata, *leafmem;
1344   const PetscSFNode *iremote;
1345   PetscSFNode       *new_iremote;
1346   MPI_Comm           comm;
1347 
1348   PetscFunctionBegin;
1349   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1350   PetscSFCheckGraphSet(sf, 1);
1351   if (nselected) PetscAssertPointer(selected, 3);
1352   PetscAssertPointer(esf, 4);
1353 
1354   PetscCall(PetscSFSetUp(sf));
1355   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1356   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1357   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1358 
1359   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1360     PetscBool dups;
1361     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1362     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1363     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1364   }
1365 
1366   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1367   else {
1368     /* A generic version of creating embedded sf */
1369     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1370     maxlocal = maxleaf - minleaf + 1;
1371     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1372     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1373     /* Tag selected roots and bcast to leaves */
1374     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1375     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1376     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1377 
1378     /* Build esf with leaves that are still connected */
1379     esf_nleaves = 0;
1380     for (i = 0; i < nleaves; i++) {
1381       j = ilocal ? ilocal[i] : i;
1382       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1383          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1384       */
1385       esf_nleaves += (leafdata[j] ? 1 : 0);
1386     }
1387     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1388     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1389     for (i = n = 0; i < nleaves; i++) {
1390       j = ilocal ? ilocal[i] : i;
1391       if (leafdata[j]) {
1392         new_ilocal[n]        = j;
1393         new_iremote[n].rank  = iremote[i].rank;
1394         new_iremote[n].index = iremote[i].index;
1395         ++n;
1396       }
1397     }
1398     PetscCall(PetscSFCreate(comm, esf));
1399     PetscCall(PetscSFSetFromOptions(*esf));
1400     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1401     PetscCall(PetscFree2(rootdata, leafmem));
1402   }
1403   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1404   PetscFunctionReturn(PETSC_SUCCESS);
1405 }
1406 
1407 /*@C
1408   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1409 
1410   Collective
1411 
1412   Input Parameters:
1413 + sf        - original star forest
1414 . nselected - number of selected leaves on this MPI process
1415 - selected  - indices of the selected leaves on this MPI process
1416 
1417   Output Parameter:
1418 . newsf - new star forest
1419 
1420   Level: advanced
1421 
1422 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1423 @*/
1424 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf)
1425 {
1426   const PetscSFNode *iremote;
1427   PetscSFNode       *new_iremote;
1428   const PetscInt    *ilocal;
1429   PetscInt           i, nroots, *leaves, *new_ilocal;
1430   MPI_Comm           comm;
1431 
1432   PetscFunctionBegin;
1433   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1434   PetscSFCheckGraphSet(sf, 1);
1435   if (nselected) PetscAssertPointer(selected, 3);
1436   PetscAssertPointer(newsf, 4);
1437 
1438   /* Uniq selected[] and put results in leaves[] */
1439   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1440   PetscCall(PetscMalloc1(nselected, &leaves));
1441   PetscCall(PetscArraycpy(leaves, selected, nselected));
1442   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1443   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1444 
1445   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1446   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1447   else {
1448     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1449     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1450     PetscCall(PetscMalloc1(nselected, &new_iremote));
1451     for (i = 0; i < nselected; ++i) {
1452       const PetscInt l     = leaves[i];
1453       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1454       new_iremote[i].rank  = iremote[l].rank;
1455       new_iremote[i].index = iremote[l].index;
1456     }
1457     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1458     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1459   }
1460   PetscCall(PetscFree(leaves));
1461   PetscFunctionReturn(PETSC_SUCCESS);
1462 }
1463 
1464 /*@C
1465   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1466 
1467   Collective
1468 
1469   Input Parameters:
1470 + sf       - star forest on which to communicate
1471 . unit     - data type associated with each node
1472 . rootdata - buffer to broadcast
1473 - op       - operation to use for reduction
1474 
1475   Output Parameter:
1476 . leafdata - buffer to be reduced with values from each leaf's respective root
1477 
1478   Level: intermediate
1479 
1480   Note:
1481   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1482   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1483   use `PetscSFBcastWithMemTypeBegin()` instead.
1484 
1485 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1486 @*/
1487 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1488 {
1489   PetscMemType rootmtype, leafmtype;
1490 
1491   PetscFunctionBegin;
1492   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1493   PetscCall(PetscSFSetUp(sf));
1494   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1495   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1496   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1497   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1498   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1499   PetscFunctionReturn(PETSC_SUCCESS);
1500 }
1501 
1502 /*@C
1503   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1504   to `PetscSFBcastEnd()`
1505 
1506   Collective
1507 
1508   Input Parameters:
1509 + sf        - star forest on which to communicate
1510 . unit      - data type associated with each node
1511 . rootmtype - memory type of `rootdata`
1512 . rootdata  - buffer to broadcast
1513 . leafmtype - memory type of `leafdata`
1514 - op        - operation to use for reduction
1515 
1516   Output Parameter:
1517 . leafdata - buffer to be reduced with values from each leaf's respective root
1518 
1519   Level: intermediate
1520 
1521 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1522 @*/
1523 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1524 {
1525   PetscFunctionBegin;
1526   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1527   PetscCall(PetscSFSetUp(sf));
1528   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1529   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1530   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1531   PetscFunctionReturn(PETSC_SUCCESS);
1532 }
1533 
1534 /*@C
1535   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1536 
1537   Collective
1538 
1539   Input Parameters:
1540 + sf       - star forest
1541 . unit     - data type
1542 . rootdata - buffer to broadcast
1543 - op       - operation to use for reduction
1544 
1545   Output Parameter:
1546 . leafdata - buffer to be reduced with values from each leaf's respective root
1547 
1548   Level: intermediate
1549 
1550 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1551 @*/
1552 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1553 {
1554   PetscFunctionBegin;
1555   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1556   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1557   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1558   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1559   PetscFunctionReturn(PETSC_SUCCESS);
1560 }
1561 
1562 /*@C
1563   PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()`
1564 
1565   Collective
1566 
1567   Input Parameters:
1568 + sf       - star forest
1569 . unit     - data type
1570 . leafdata - values to reduce (communicate)
1571 - op       - reduction operation
1572 
1573   Output Parameter:
1574 . rootdata - result of reduction of values (`leafdata`) from all leaves to each root
1575 
1576   Level: intermediate
1577 
1578   Note:
1579   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1580   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1581   use `PetscSFReduceWithMemTypeBegin()` instead.
1582 
1583 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype`
1584 @*/
1585 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1586 {
1587   PetscMemType rootmtype, leafmtype;
1588 
1589   PetscFunctionBegin;
1590   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1591   PetscCall(PetscSFSetUp(sf));
1592   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1593   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1594   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1595   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1596   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1597   PetscFunctionReturn(PETSC_SUCCESS);
1598 }
1599 
1600 /*@C
1601   PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1602 
1603   Collective
1604 
1605   Input Parameters:
1606 + sf        - star forest
1607 . unit      - data type
1608 . leafmtype - memory type of `leafdata`
1609 . leafdata  - values to reduce
1610 . rootmtype - memory type of `rootdata`
1611 - op        - reduction operation
1612 
1613   Output Parameter:
1614 . rootdata - result of reduction of values from all leaves of each root
1615 
1616   Level: intermediate
1617 
1618 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1619 @*/
1620 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1621 {
1622   PetscFunctionBegin;
1623   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1624   PetscCall(PetscSFSetUp(sf));
1625   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1626   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1627   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1628   PetscFunctionReturn(PETSC_SUCCESS);
1629 }
1630 
1631 /*@C
1632   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1633 
1634   Collective
1635 
1636   Input Parameters:
1637 + sf       - star forest
1638 . unit     - data type
1639 . leafdata - values to reduce
1640 - op       - reduction operation
1641 
1642   Output Parameter:
1643 . rootdata - result of reduction of values from all leaves of each root
1644 
1645   Level: intermediate
1646 
1647 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1648 @*/
1649 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1650 {
1651   PetscFunctionBegin;
1652   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1653   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1654   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1655   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1656   PetscFunctionReturn(PETSC_SUCCESS);
1657 }
1658 
1659 /*@C
1660   PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`,
1661   to be completed with `PetscSFFetchAndOpEnd()`
1662 
1663   Collective
1664 
1665   Input Parameters:
1666 + sf       - star forest
1667 . unit     - data type
1668 . leafdata - leaf values to use in reduction
1669 - op       - operation to use for reduction
1670 
1671   Output Parameters:
1672 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1673 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1674 
1675   Level: advanced
1676 
1677   Note:
1678   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1679   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1680   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1681   integers.
1682 
1683 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1684 @*/
1685 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1686 {
1687   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1688 
1689   PetscFunctionBegin;
1690   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1691   PetscCall(PetscSFSetUp(sf));
1692   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1693   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1694   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1695   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1696   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1697   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1698   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1699   PetscFunctionReturn(PETSC_SUCCESS);
1700 }
1701 
1702 /*@C
1703   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1704   applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()`
1705 
1706   Collective
1707 
1708   Input Parameters:
1709 + sf              - star forest
1710 . unit            - data type
1711 . rootmtype       - memory type of `rootdata`
1712 . leafmtype       - memory type of `leafdata`
1713 . leafdata        - leaf values to use in reduction
1714 . leafupdatemtype - memory type of `leafupdate`
1715 - op              - operation to use for reduction
1716 
1717   Output Parameters:
1718 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1719 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1720 
1721   Level: advanced
1722 
1723   Note:
1724   See `PetscSFFetchAndOpBegin()` for more details.
1725 
1726 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1727 @*/
1728 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1729 {
1730   PetscFunctionBegin;
1731   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1732   PetscCall(PetscSFSetUp(sf));
1733   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1734   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1735   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1736   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1737   PetscFunctionReturn(PETSC_SUCCESS);
1738 }
1739 
1740 /*@C
1741   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1742   to fetch values from roots and update atomically by applying operation using `leafdata`
1743 
1744   Collective
1745 
1746   Input Parameters:
1747 + sf       - star forest
1748 . unit     - data type
1749 . leafdata - leaf values to use in reduction
1750 - op       - operation to use for reduction
1751 
1752   Output Parameters:
1753 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1754 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1755 
1756   Level: advanced
1757 
1758 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1759 @*/
1760 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1761 {
1762   PetscFunctionBegin;
1763   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1764   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1765   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1766   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1767   PetscFunctionReturn(PETSC_SUCCESS);
1768 }
1769 
1770 /*@C
1771   PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1772 
1773   Collective
1774 
1775   Input Parameter:
1776 . sf - star forest
1777 
1778   Output Parameter:
1779 . degree - degree (the number of leaves) of each root vertex
1780 
1781   Level: advanced
1782 
1783   Note:
1784   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1785 
1786 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1787 @*/
1788 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1789 {
1790   PetscFunctionBegin;
1791   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1792   PetscSFCheckGraphSet(sf, 1);
1793   PetscAssertPointer(degree, 2);
1794   if (!sf->degreeknown) {
1795     PetscInt i, nroots = sf->nroots, maxlocal;
1796     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1797     maxlocal = sf->maxleaf - sf->minleaf + 1;
1798     PetscCall(PetscMalloc1(nroots, &sf->degree));
1799     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1800     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1801     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1802     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1803   }
1804   *degree = NULL;
1805   PetscFunctionReturn(PETSC_SUCCESS);
1806 }
1807 
1808 /*@C
1809   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1810 
1811   Collective
1812 
1813   Input Parameter:
1814 . sf - star forest
1815 
1816   Output Parameter:
1817 . degree - degree of each root vertex
1818 
1819   Level: developer
1820 
1821   Note:
1822   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1823 
1824 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1825 @*/
1826 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1827 {
1828   PetscFunctionBegin;
1829   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1830   PetscSFCheckGraphSet(sf, 1);
1831   PetscAssertPointer(degree, 2);
1832   if (!sf->degreeknown) {
1833     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1834     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1835     PetscCall(PetscFree(sf->degreetmp));
1836     sf->degreeknown = PETSC_TRUE;
1837   }
1838   *degree = sf->degree;
1839   PetscFunctionReturn(PETSC_SUCCESS);
1840 }
1841 
1842 /*@C
1843   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1844   Each multi-root is assigned index of the corresponding original root.
1845 
1846   Collective
1847 
1848   Input Parameters:
1849 + sf     - star forest
1850 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()`
1851 
1852   Output Parameters:
1853 + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1854 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1855 
1856   Level: developer
1857 
1858   Note:
1859   The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed.
1860 
1861 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1862 @*/
1863 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1864 {
1865   PetscSF  msf;
1866   PetscInt k = 0, nroots, nmroots;
1867 
1868   PetscFunctionBegin;
1869   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1870   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1871   if (nroots) PetscAssertPointer(degree, 2);
1872   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1873   PetscAssertPointer(multiRootsOrigNumbering, 4);
1874   PetscCall(PetscSFGetMultiSF(sf, &msf));
1875   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1876   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1877   for (PetscInt i = 0; i < nroots; i++) {
1878     if (!degree[i]) continue;
1879     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1880   }
1881   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1882   if (nMultiRoots) *nMultiRoots = nmroots;
1883   PetscFunctionReturn(PETSC_SUCCESS);
1884 }
1885 
1886 /*@C
1887   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1888 
1889   Collective
1890 
1891   Input Parameters:
1892 + sf       - star forest
1893 . unit     - data type
1894 - leafdata - leaf data to gather to roots
1895 
1896   Output Parameter:
1897 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
1898 
1899   Level: intermediate
1900 
1901 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1902 @*/
1903 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1904 {
1905   PetscSF multi = NULL;
1906 
1907   PetscFunctionBegin;
1908   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1909   PetscCall(PetscSFSetUp(sf));
1910   PetscCall(PetscSFGetMultiSF(sf, &multi));
1911   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1912   PetscFunctionReturn(PETSC_SUCCESS);
1913 }
1914 
1915 /*@C
1916   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1917 
1918   Collective
1919 
1920   Input Parameters:
1921 + sf       - star forest
1922 . unit     - data type
1923 - leafdata - leaf data to gather to roots
1924 
1925   Output Parameter:
1926 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
1927 
1928   Level: intermediate
1929 
1930 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1931 @*/
1932 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1933 {
1934   PetscSF multi = NULL;
1935 
1936   PetscFunctionBegin;
1937   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1938   PetscCall(PetscSFGetMultiSF(sf, &multi));
1939   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1940   PetscFunctionReturn(PETSC_SUCCESS);
1941 }
1942 
1943 /*@C
1944   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1945 
1946   Collective
1947 
1948   Input Parameters:
1949 + sf            - star forest
1950 . unit          - data type
1951 - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has)
1952 
1953   Output Parameter:
1954 . leafdata - leaf data to be update with personal data from each respective root
1955 
1956   Level: intermediate
1957 
1958 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1959 @*/
1960 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1961 {
1962   PetscSF multi = NULL;
1963 
1964   PetscFunctionBegin;
1965   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1966   PetscCall(PetscSFSetUp(sf));
1967   PetscCall(PetscSFGetMultiSF(sf, &multi));
1968   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1969   PetscFunctionReturn(PETSC_SUCCESS);
1970 }
1971 
1972 /*@C
1973   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1974 
1975   Collective
1976 
1977   Input Parameters:
1978 + sf            - star forest
1979 . unit          - data type
1980 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1981 
1982   Output Parameter:
1983 . leafdata - leaf data to be update with personal data from each respective root
1984 
1985   Level: intermediate
1986 
1987 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1988 @*/
1989 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1990 {
1991   PetscSF multi = NULL;
1992 
1993   PetscFunctionBegin;
1994   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1995   PetscCall(PetscSFGetMultiSF(sf, &multi));
1996   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1997   PetscFunctionReturn(PETSC_SUCCESS);
1998 }
1999 
2000 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2001 {
2002   PetscInt        i, n, nleaves;
2003   const PetscInt *ilocal = NULL;
2004   PetscHSetI      seen;
2005 
2006   PetscFunctionBegin;
2007   if (PetscDefined(USE_DEBUG)) {
2008     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2009     PetscCall(PetscHSetICreate(&seen));
2010     for (i = 0; i < nleaves; i++) {
2011       const PetscInt leaf = ilocal ? ilocal[i] : i;
2012       PetscCall(PetscHSetIAdd(seen, leaf));
2013     }
2014     PetscCall(PetscHSetIGetSize(seen, &n));
2015     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2016     PetscCall(PetscHSetIDestroy(&seen));
2017   }
2018   PetscFunctionReturn(PETSC_SUCCESS);
2019 }
2020 
2021 /*@
2022   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2023 
2024   Input Parameters:
2025 + sfA - The first `PetscSF`
2026 - sfB - The second `PetscSF`
2027 
2028   Output Parameter:
2029 . sfBA - The composite `PetscSF`
2030 
2031   Level: developer
2032 
2033   Notes:
2034   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2035   forests, i.e. the same leaf is not connected with different roots.
2036 
2037   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2038   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2039   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2040   `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes.
2041 
2042 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2043 @*/
2044 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2045 {
2046   const PetscSFNode *remotePointsA, *remotePointsB;
2047   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2048   const PetscInt    *localPointsA, *localPointsB;
2049   PetscInt          *localPointsBA;
2050   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2051   PetscBool          denseB;
2052 
2053   PetscFunctionBegin;
2054   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2055   PetscSFCheckGraphSet(sfA, 1);
2056   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2057   PetscSFCheckGraphSet(sfB, 2);
2058   PetscCheckSameComm(sfA, 1, sfB, 2);
2059   PetscAssertPointer(sfBA, 3);
2060   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2061   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2062 
2063   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2064   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2065   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2066      numRootsB; otherwise, garbage will be broadcasted.
2067      Example (comm size = 1):
2068      sfA: 0 <- (0, 0)
2069      sfB: 100 <- (0, 0)
2070           101 <- (0, 1)
2071      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2072      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2073      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2074      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2075   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2076   for (i = 0; i < numRootsB; i++) {
2077     reorderedRemotePointsA[i].rank  = -1;
2078     reorderedRemotePointsA[i].index = -1;
2079   }
2080   for (i = 0; i < numLeavesA; i++) {
2081     PetscInt localp = localPointsA ? localPointsA[i] : i;
2082 
2083     if (localp >= numRootsB) continue;
2084     reorderedRemotePointsA[localp] = remotePointsA[i];
2085   }
2086   remotePointsA = reorderedRemotePointsA;
2087   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2088   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2089   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2090     leafdataB[i].rank  = -1;
2091     leafdataB[i].index = -1;
2092   }
2093   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2094   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2095   PetscCall(PetscFree(reorderedRemotePointsA));
2096 
2097   denseB = (PetscBool)!localPointsB;
2098   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2099     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2100     else numLeavesBA++;
2101   }
2102   if (denseB) {
2103     localPointsBA  = NULL;
2104     remotePointsBA = leafdataB;
2105   } else {
2106     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2107     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2108     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2109       const PetscInt l = localPointsB ? localPointsB[i] : i;
2110 
2111       if (leafdataB[l - minleaf].rank == -1) continue;
2112       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2113       localPointsBA[numLeavesBA]  = l;
2114       numLeavesBA++;
2115     }
2116     PetscCall(PetscFree(leafdataB));
2117   }
2118   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2119   PetscCall(PetscSFSetFromOptions(*sfBA));
2120   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 /*@
2125   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2126 
2127   Input Parameters:
2128 + sfA - The first `PetscSF`
2129 - sfB - The second `PetscSF`
2130 
2131   Output Parameter:
2132 . sfBA - The composite `PetscSF`.
2133 
2134   Level: developer
2135 
2136   Notes:
2137   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2138   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2139   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2140 
2141   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2142   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2143   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()`
2144   on `sfA`, then
2145   a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots.
2146 
2147 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2148 @*/
2149 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2150 {
2151   const PetscSFNode *remotePointsA, *remotePointsB;
2152   PetscSFNode       *remotePointsBA;
2153   const PetscInt    *localPointsA, *localPointsB;
2154   PetscSFNode       *reorderedRemotePointsA = NULL;
2155   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2156   MPI_Op             op;
2157 #if defined(PETSC_USE_64BIT_INDICES)
2158   PetscBool iswin;
2159 #endif
2160 
2161   PetscFunctionBegin;
2162   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2163   PetscSFCheckGraphSet(sfA, 1);
2164   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2165   PetscSFCheckGraphSet(sfB, 2);
2166   PetscCheckSameComm(sfA, 1, sfB, 2);
2167   PetscAssertPointer(sfBA, 3);
2168   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2169   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2170 
2171   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2172   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2173 
2174   /* TODO: Check roots of sfB have degree of 1 */
2175   /* Once we implement it, we can replace the MPI_MAXLOC
2176      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2177      We use MPI_MAXLOC only to have a deterministic output from this routine if
2178      the root condition is not meet.
2179    */
2180   op = MPI_MAXLOC;
2181 #if defined(PETSC_USE_64BIT_INDICES)
2182   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2183   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2184   if (iswin) op = MPI_REPLACE;
2185 #endif
2186 
2187   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2188   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2189   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2190     reorderedRemotePointsA[i].rank  = -1;
2191     reorderedRemotePointsA[i].index = -1;
2192   }
2193   if (localPointsA) {
2194     for (i = 0; i < numLeavesA; i++) {
2195       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2196       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2197     }
2198   } else {
2199     for (i = 0; i < numLeavesA; i++) {
2200       if (i > maxleaf || i < minleaf) continue;
2201       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2202     }
2203   }
2204 
2205   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2206   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2207   for (i = 0; i < numRootsB; i++) {
2208     remotePointsBA[i].rank  = -1;
2209     remotePointsBA[i].index = -1;
2210   }
2211 
2212   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2213   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2214   PetscCall(PetscFree(reorderedRemotePointsA));
2215   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2216     if (remotePointsBA[i].rank == -1) continue;
2217     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2218     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2219     localPointsBA[numLeavesBA]        = i;
2220     numLeavesBA++;
2221   }
2222   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2223   PetscCall(PetscSFSetFromOptions(*sfBA));
2224   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2225   PetscFunctionReturn(PETSC_SUCCESS);
2226 }
2227 
2228 /*
2229   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2230 
2231   Input Parameter:
2232 . sf - The global `PetscSF`
2233 
2234   Output Parameter:
2235 . out - The local `PetscSF`
2236 
2237 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
2238  */
2239 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2240 {
2241   MPI_Comm           comm;
2242   PetscMPIInt        myrank;
2243   const PetscInt    *ilocal;
2244   const PetscSFNode *iremote;
2245   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2246   PetscSFNode       *liremote;
2247   PetscSF            lsf;
2248 
2249   PetscFunctionBegin;
2250   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2251   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2252   else {
2253     PetscMPIInt irank;
2254 
2255     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2256     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2257     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2258 
2259     /* Find out local edges and build a local SF */
2260     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2261     for (i = lnleaves = 0; i < nleaves; i++) {
2262       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2263       if (irank == myrank) lnleaves++;
2264     }
2265     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2266     PetscCall(PetscMalloc1(lnleaves, &liremote));
2267 
2268     for (i = j = 0; i < nleaves; i++) {
2269       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2270       if (irank == myrank) {
2271         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2272         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2273         liremote[j].index = iremote[i].index;
2274         j++;
2275       }
2276     }
2277     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2278     PetscCall(PetscSFSetFromOptions(lsf));
2279     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2280     PetscCall(PetscSFSetUp(lsf));
2281     *out = lsf;
2282   }
2283   PetscFunctionReturn(PETSC_SUCCESS);
2284 }
2285 
2286 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2287 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2288 {
2289   PetscMemType rootmtype, leafmtype;
2290 
2291   PetscFunctionBegin;
2292   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2293   PetscCall(PetscSFSetUp(sf));
2294   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2295   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2296   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2297   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2298   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2299   PetscFunctionReturn(PETSC_SUCCESS);
2300 }
2301 
2302 /*@
2303   PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF`
2304 
2305   Input Parameters:
2306 + comm        - the communicator
2307 . nsfs        - the number of input `PetscSF`
2308 . sfs         - the array of input `PetscSF`
2309 . rootMode    - the root mode specifying how roots are handled
2310 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2311 
2312   Output Parameter:
2313 . newsf - The resulting `PetscSF`
2314 
2315   Level: advanced
2316 
2317   Notes:
2318   The communicator of all `PetscSF`s in `sfs` must be comm.
2319 
2320   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2321 
2322   The offsets in `leafOffsets` are added to the original leaf indices.
2323 
2324   If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2325   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2326 
2327   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2328   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2329 
2330   All root modes retain the essential connectivity condition.
2331 
2332   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2333 
2334   Parameter `rootMode` controls how the input root spaces are combined.
2335   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2336   and is also the same in the output `PetscSF`.
2337 
2338   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2339   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2340   roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2341   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2342   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2343   the original root MPI processes are ignored.
2344   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2345   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process
2346   to keep the load balancing.
2347   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes.
2348 
2349   Example:
2350   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2351 .vb
2352   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2353   for m in {local,global,shared}; do
2354     mpiexec -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2355   done
2356 .ve
2357   we generate two identical `PetscSF`s sf_0 and sf_1,
2358 .vb
2359   PetscSF Object: sf_0 2 MPI processes
2360     type: basic
2361     rank #leaves #roots
2362     [ 0]       4      2
2363     [ 1]       4      2
2364     leaves      roots       roots in global numbering
2365     ( 0,  0) <- ( 0,  0)  =   0
2366     ( 0,  1) <- ( 0,  1)  =   1
2367     ( 0,  2) <- ( 1,  0)  =   2
2368     ( 0,  3) <- ( 1,  1)  =   3
2369     ( 1,  0) <- ( 0,  0)  =   0
2370     ( 1,  1) <- ( 0,  1)  =   1
2371     ( 1,  2) <- ( 1,  0)  =   2
2372     ( 1,  3) <- ( 1,  1)  =   3
2373 .ve
2374   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2375 .vb
2376   rootMode = local:
2377   PetscSF Object: result_sf 2 MPI processes
2378     type: basic
2379     rank #leaves #roots
2380     [ 0]       8      4
2381     [ 1]       8      4
2382     leaves      roots       roots in global numbering
2383     ( 0,  0) <- ( 0,  0)  =   0
2384     ( 0,  1) <- ( 0,  1)  =   1
2385     ( 0,  2) <- ( 1,  0)  =   4
2386     ( 0,  3) <- ( 1,  1)  =   5
2387     ( 0,  4) <- ( 0,  2)  =   2
2388     ( 0,  5) <- ( 0,  3)  =   3
2389     ( 0,  6) <- ( 1,  2)  =   6
2390     ( 0,  7) <- ( 1,  3)  =   7
2391     ( 1,  0) <- ( 0,  0)  =   0
2392     ( 1,  1) <- ( 0,  1)  =   1
2393     ( 1,  2) <- ( 1,  0)  =   4
2394     ( 1,  3) <- ( 1,  1)  =   5
2395     ( 1,  4) <- ( 0,  2)  =   2
2396     ( 1,  5) <- ( 0,  3)  =   3
2397     ( 1,  6) <- ( 1,  2)  =   6
2398     ( 1,  7) <- ( 1,  3)  =   7
2399 
2400   rootMode = global:
2401   PetscSF Object: result_sf 2 MPI processes
2402     type: basic
2403     rank #leaves #roots
2404     [ 0]       8      4
2405     [ 1]       8      4
2406     leaves      roots       roots in global numbering
2407     ( 0,  0) <- ( 0,  0)  =   0
2408     ( 0,  1) <- ( 0,  1)  =   1
2409     ( 0,  2) <- ( 0,  2)  =   2
2410     ( 0,  3) <- ( 0,  3)  =   3
2411     ( 0,  4) <- ( 1,  0)  =   4
2412     ( 0,  5) <- ( 1,  1)  =   5
2413     ( 0,  6) <- ( 1,  2)  =   6
2414     ( 0,  7) <- ( 1,  3)  =   7
2415     ( 1,  0) <- ( 0,  0)  =   0
2416     ( 1,  1) <- ( 0,  1)  =   1
2417     ( 1,  2) <- ( 0,  2)  =   2
2418     ( 1,  3) <- ( 0,  3)  =   3
2419     ( 1,  4) <- ( 1,  0)  =   4
2420     ( 1,  5) <- ( 1,  1)  =   5
2421     ( 1,  6) <- ( 1,  2)  =   6
2422     ( 1,  7) <- ( 1,  3)  =   7
2423 
2424   rootMode = shared:
2425   PetscSF Object: result_sf 2 MPI processes
2426     type: basic
2427     rank #leaves #roots
2428     [ 0]       8      2
2429     [ 1]       8      2
2430     leaves      roots       roots in global numbering
2431     ( 0,  0) <- ( 0,  0)  =   0
2432     ( 0,  1) <- ( 0,  1)  =   1
2433     ( 0,  2) <- ( 1,  0)  =   2
2434     ( 0,  3) <- ( 1,  1)  =   3
2435     ( 0,  4) <- ( 0,  0)  =   0
2436     ( 0,  5) <- ( 0,  1)  =   1
2437     ( 0,  6) <- ( 1,  0)  =   2
2438     ( 0,  7) <- ( 1,  1)  =   3
2439     ( 1,  0) <- ( 0,  0)  =   0
2440     ( 1,  1) <- ( 0,  1)  =   1
2441     ( 1,  2) <- ( 1,  0)  =   2
2442     ( 1,  3) <- ( 1,  1)  =   3
2443     ( 1,  4) <- ( 0,  0)  =   0
2444     ( 1,  5) <- ( 0,  1)  =   1
2445     ( 1,  6) <- ( 1,  0)  =   2
2446     ( 1,  7) <- ( 1,  1)  =   3
2447 .ve
2448 
2449 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2450 @*/
2451 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2452 {
2453   PetscInt     i, s, nLeaves, nRoots;
2454   PetscInt    *leafArrayOffsets;
2455   PetscInt    *ilocal_new;
2456   PetscSFNode *iremote_new;
2457   PetscBool    all_ilocal_null = PETSC_FALSE;
2458   PetscLayout  glayout         = NULL;
2459   PetscInt    *gremote         = NULL;
2460   PetscMPIInt  rank, size;
2461 
2462   PetscFunctionBegin;
2463   if (PetscDefined(USE_DEBUG)) {
2464     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2465 
2466     PetscCall(PetscSFCreate(comm, &dummy));
2467     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2468     PetscAssertPointer(sfs, 3);
2469     for (i = 0; i < nsfs; i++) {
2470       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2471       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2472     }
2473     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
2474     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2475     PetscAssertPointer(newsf, 6);
2476     PetscCall(PetscSFDestroy(&dummy));
2477   }
2478   if (!nsfs) {
2479     PetscCall(PetscSFCreate(comm, newsf));
2480     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2481     PetscFunctionReturn(PETSC_SUCCESS);
2482   }
2483   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2484   PetscCallMPI(MPI_Comm_size(comm, &size));
2485 
2486   /* Calculate leaf array offsets */
2487   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2488   leafArrayOffsets[0] = 0;
2489   for (s = 0; s < nsfs; s++) {
2490     PetscInt nl;
2491 
2492     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2493     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2494   }
2495   nLeaves = leafArrayOffsets[nsfs];
2496 
2497   /* Calculate number of roots */
2498   switch (rootMode) {
2499   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2500     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2501     if (PetscDefined(USE_DEBUG)) {
2502       for (s = 1; s < nsfs; s++) {
2503         PetscInt nr;
2504 
2505         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2506         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2507       }
2508     }
2509   } break;
2510   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2511     /* Calculate also global layout in this case */
2512     PetscInt    *nls;
2513     PetscLayout *lts;
2514     PetscInt   **inds;
2515     PetscInt     j;
2516     PetscInt     rootOffset = 0;
2517 
2518     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2519     PetscCall(PetscLayoutCreate(comm, &glayout));
2520     glayout->bs = 1;
2521     glayout->n  = 0;
2522     glayout->N  = 0;
2523     for (s = 0; s < nsfs; s++) {
2524       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2525       glayout->n += lts[s]->n;
2526       glayout->N += lts[s]->N;
2527     }
2528     PetscCall(PetscLayoutSetUp(glayout));
2529     PetscCall(PetscMalloc1(nLeaves, &gremote));
2530     for (s = 0, j = 0; s < nsfs; s++) {
2531       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2532       rootOffset += lts[s]->N;
2533       PetscCall(PetscLayoutDestroy(&lts[s]));
2534       PetscCall(PetscFree(inds[s]));
2535     }
2536     PetscCall(PetscFree3(lts, nls, inds));
2537     nRoots = glayout->N;
2538   } break;
2539   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2540     /* nRoots calculated later in this case */
2541     break;
2542   default:
2543     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2544   }
2545 
2546   if (!leafOffsets) {
2547     all_ilocal_null = PETSC_TRUE;
2548     for (s = 0; s < nsfs; s++) {
2549       const PetscInt *ilocal;
2550 
2551       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2552       if (ilocal) {
2553         all_ilocal_null = PETSC_FALSE;
2554         break;
2555       }
2556     }
2557     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2558   }
2559 
2560   /* Renumber and concatenate local leaves */
2561   ilocal_new = NULL;
2562   if (!all_ilocal_null) {
2563     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2564     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2565     for (s = 0; s < nsfs; s++) {
2566       const PetscInt *ilocal;
2567       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2568       PetscInt        i, nleaves_l;
2569 
2570       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2571       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2572     }
2573   }
2574 
2575   /* Renumber and concatenate remote roots */
2576   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2577     PetscInt rootOffset = 0;
2578 
2579     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2580     for (i = 0; i < nLeaves; i++) {
2581       iremote_new[i].rank  = -1;
2582       iremote_new[i].index = -1;
2583     }
2584     for (s = 0; s < nsfs; s++) {
2585       PetscInt           i, nl, nr;
2586       PetscSF            tmp_sf;
2587       const PetscSFNode *iremote;
2588       PetscSFNode       *tmp_rootdata;
2589       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2590 
2591       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2592       PetscCall(PetscSFCreate(comm, &tmp_sf));
2593       /* create helper SF with contiguous leaves */
2594       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2595       PetscCall(PetscSFSetUp(tmp_sf));
2596       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2597       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2598         for (i = 0; i < nr; i++) {
2599           tmp_rootdata[i].index = i + rootOffset;
2600           tmp_rootdata[i].rank  = rank;
2601         }
2602         rootOffset += nr;
2603       } else {
2604         for (i = 0; i < nr; i++) {
2605           tmp_rootdata[i].index = i;
2606           tmp_rootdata[i].rank  = rank;
2607         }
2608       }
2609       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2610       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2611       PetscCall(PetscSFDestroy(&tmp_sf));
2612       PetscCall(PetscFree(tmp_rootdata));
2613     }
2614     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2615 
2616     /* Build the new SF */
2617     PetscCall(PetscSFCreate(comm, newsf));
2618     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2619   } else {
2620     /* Build the new SF */
2621     PetscCall(PetscSFCreate(comm, newsf));
2622     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2623   }
2624   PetscCall(PetscSFSetUp(*newsf));
2625   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2626   PetscCall(PetscLayoutDestroy(&glayout));
2627   PetscCall(PetscFree(gremote));
2628   PetscCall(PetscFree(leafArrayOffsets));
2629   PetscFunctionReturn(PETSC_SUCCESS);
2630 }
2631 
2632 /*@
2633   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications.
2634 
2635   Collective
2636 
2637   Input Parameters:
2638 + sf       - star forest
2639 . unit     - the data type contained within `rootdata` and `leafdata`
2640 . rootdata - root data that will be used for multiple `PetscSF` communications
2641 - leafdata - leaf data that will be used for multiple `PetscSF` communications
2642 
2643   Level: advanced
2644 
2645   Notes:
2646   Implementations of `PetscSF` can make optimizations
2647   for repeated communication using the same memory regions, but these optimizations
2648   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2649   The intended pattern is
2650 
2651 .vb
2652   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2653 
2654   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2655   // repeated use of rootdata and leafdata will now be optimized
2656 
2657   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2658   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2659   // ...
2660   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2661   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2662   // ... (other communications)
2663 
2664   // rootdata and leafdata must be deregistered before freeing
2665   // skipping this can lead to undefined behavior including
2666   // deadlocks
2667   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2668 
2669   // it is now safe to free rootdata and leafdata
2670   PetscFree2(rootdata, leafdata);
2671 .ve
2672 
2673   If you do not register `rootdata` and `leafdata` it will not cause an error,
2674   but optimizations that reduce the setup time for each communication cannot be
2675   made.  Currently, the only implementation of `PetscSF` that benefits from
2676   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2677   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2678 
2679 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2680 @*/
2681 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2682 {
2683   PetscFunctionBegin;
2684   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2685   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2686   PetscFunctionReturn(PETSC_SUCCESS);
2687 }
2688 
2689 /*@
2690   PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded.
2691 
2692   Collective
2693 
2694   Input Parameters:
2695 + sf       - star forest
2696 . unit     - the data type contained within the `rootdata` and `leafdata`
2697 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2698 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2699 
2700   Level: advanced
2701 
2702   Note:
2703   See `PetscSFRegisterPersistent()` for when and how to use this function.
2704 
2705 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2706 @*/
2707 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2708 {
2709   PetscFunctionBegin;
2710   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2711   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2712   PetscFunctionReturn(PETSC_SUCCESS);
2713 }
2714 
2715 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2716 {
2717   MPI_Aint lb, lb_true, bytes, bytes_true;
2718 
2719   PetscFunctionBegin;
2720   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2721   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2722   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2723   *size = bytes;
2724   PetscFunctionReturn(PETSC_SUCCESS);
2725 }
2726