xref: /petsc/src/vec/is/sf/interface/sf.c (revision 0da4d79be8ca4b371773b94100cfa6921df903e2)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <petscdevice_cuda.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 extern void PetscSFCheckGraphSet(PetscSF, int);
16 #else
17   #if defined(PETSC_USE_DEBUG)
18     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
19   #else
20     #define PetscSFCheckGraphSet(sf, arg) \
21       do { \
22       } while (0)
23   #endif
24 #endif
25 
26 const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
28 
29 /*@
30   PetscSFCreate - create a star forest communication context
31 
32   Collective
33 
34   Input Parameter:
35 . comm - communicator on which the star forest will operate
36 
37   Output Parameter:
38 . sf - new star forest context
39 
40   Options Database Key:
41 + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
42 . -sf_type window                - Use MPI-3 one-sided window for communication
43 . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
44 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
45 
46   Level: intermediate
47 
48   Note:
49   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
50   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
51   `PetscSF`s are optimized and have better performance than the general `PetscSF`s.
52 
53 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
54 @*/
55 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
56 {
57   PetscSF b;
58 
59   PetscFunctionBegin;
60   PetscAssertPointer(sf, 2);
61   PetscCall(PetscSFInitializePackage());
62 
63   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_INT_MAX;
67   b->maxleaf   = PETSC_INT_MIN;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream = PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78   b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80   b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82   b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
89   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95   *sf             = b;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 /*@
100   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102   Collective
103 
104   Input Parameter:
105 . sf - star forest
106 
107   Level: advanced
108 
109 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
115   PetscTryTypeMethod(sf, Reset);
116   PetscCall(PetscSFDestroy(&sf->rankssf));
117 
118   sf->nroots   = -1;
119   sf->nleaves  = -1;
120   sf->minleaf  = PETSC_INT_MAX;
121   sf->maxleaf  = PETSC_INT_MIN;
122   sf->mine     = NULL;
123   sf->remote   = NULL;
124   sf->graphset = PETSC_FALSE;
125   PetscCall(PetscFree(sf->mine_alloc));
126   PetscCall(PetscFree(sf->remote_alloc));
127   sf->nranks = -1;
128   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
129   sf->degreeknown = PETSC_FALSE;
130   PetscCall(PetscFree(sf->degree));
131   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
132   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
133 
134   if (sf->multi) sf->multi->multi = NULL;
135   PetscCall(PetscSFDestroy(&sf->multi));
136 
137   PetscCall(PetscLayoutDestroy(&sf->map));
138 
139 #if defined(PETSC_HAVE_DEVICE)
140   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
141 #endif
142 
143   sf->setupcalled = PETSC_FALSE;
144   PetscFunctionReturn(PETSC_SUCCESS);
145 }
146 
147 /*@
148   PetscSFSetType - Set the `PetscSF` communication implementation
149 
150   Collective
151 
152   Input Parameters:
153 + sf   - the `PetscSF` context
154 - type - a known method
155 .vb
156     PETSCSFWINDOW - MPI-2/3 one-sided
157     PETSCSFBASIC - basic implementation using MPI-1 two-sided
158 .ve
159 
160   Options Database Key:
161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
162 
163   Level: intermediate
164 
165   Notes:
166   See `PetscSFType` for possible values
167 
168 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`
169 @*/
170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171 {
172   PetscBool match;
173   PetscErrorCode (*r)(PetscSF);
174 
175   PetscFunctionBegin;
176   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
177   PetscAssertPointer(type, 2);
178 
179   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
180   if (match) PetscFunctionReturn(PETSC_SUCCESS);
181 
182   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
183   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
184   /* Destroy the previous PetscSF implementation context */
185   PetscTryTypeMethod(sf, Destroy);
186   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
187   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
188   PetscCall((*r)(sf));
189   PetscFunctionReturn(PETSC_SUCCESS);
190 }
191 
192 /*@
193   PetscSFGetType - Get the `PetscSF` communication implementation
194 
195   Not Collective
196 
197   Input Parameter:
198 . sf - the `PetscSF` context
199 
200   Output Parameter:
201 . type - the `PetscSF` type name
202 
203   Level: intermediate
204 
205 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
206 @*/
207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208 {
209   PetscFunctionBegin;
210   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
211   PetscAssertPointer(type, 2);
212   *type = ((PetscObject)sf)->type_name;
213   PetscFunctionReturn(PETSC_SUCCESS);
214 }
215 
216 /*@
217   PetscSFDestroy - destroy a star forest
218 
219   Collective
220 
221   Input Parameter:
222 . sf - address of star forest
223 
224   Level: intermediate
225 
226 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
227 @*/
228 PetscErrorCode PetscSFDestroy(PetscSF *sf)
229 {
230   PetscFunctionBegin;
231   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
233   if (--((PetscObject)*sf)->refct > 0) {
234     *sf = NULL;
235     PetscFunctionReturn(PETSC_SUCCESS);
236   }
237   PetscCall(PetscSFReset(*sf));
238   PetscTryTypeMethod(*sf, Destroy);
239   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
240   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242   if ((*sf)->use_stream_aware_mpi) {
243     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245   }
246 #endif
247   PetscCall(PetscHeaderDestroy(sf));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252 {
253   PetscInt           i, nleaves;
254   PetscMPIInt        size;
255   const PetscInt    *ilocal;
256   const PetscSFNode *iremote;
257 
258   PetscFunctionBegin;
259   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
260   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
261   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262   for (i = 0; i < nleaves; i++) {
263     const PetscInt rank   = iremote[i].rank;
264     const PetscInt remote = iremote[i].index;
265     const PetscInt leaf   = ilocal ? ilocal[i] : i;
266     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
267     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
268     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269   }
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 /*@
274   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
275 
276   Collective
277 
278   Input Parameter:
279 . sf - star forest communication object
280 
281   Level: beginner
282 
283 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
284 @*/
285 PetscErrorCode PetscSFSetUp(PetscSF sf)
286 {
287   PetscFunctionBegin;
288   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
289   PetscSFCheckGraphSet(sf, 1);
290   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
291   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
292   PetscCall(PetscSFCheckGraphValid_Private(sf));
293   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294   PetscTryTypeMethod(sf, SetUp);
295 #if defined(PETSC_HAVE_CUDA)
296   if (sf->backend == PETSCSF_BACKEND_CUDA) {
297     sf->ops->Malloc = PetscSFMalloc_CUDA;
298     sf->ops->Free   = PetscSFFree_CUDA;
299   }
300 #endif
301 #if defined(PETSC_HAVE_HIP)
302   if (sf->backend == PETSCSF_BACKEND_HIP) {
303     sf->ops->Malloc = PetscSFMalloc_HIP;
304     sf->ops->Free   = PetscSFFree_HIP;
305   }
306 #endif
307 
308 #if defined(PETSC_HAVE_KOKKOS)
309   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
310     sf->ops->Malloc = PetscSFMalloc_Kokkos;
311     sf->ops->Free   = PetscSFFree_Kokkos;
312   }
313 #endif
314   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
315   sf->setupcalled = PETSC_TRUE;
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 /*@
320   PetscSFSetFromOptions - set `PetscSF` options using the options database
321 
322   Logically Collective
323 
324   Input Parameter:
325 . sf - star forest
326 
327   Options Database Keys:
328 + -sf_type                      - implementation type, see `PetscSFSetType()`
329 . -sf_rank_order                - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise
330 . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
331                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
332                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
333 . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
334                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
335 
336 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
337                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
338                                   the only available is kokkos.
339 
340   Level: intermediate
341 
342 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
343 @*/
344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345 {
346   PetscSFType deft;
347   char        type[256];
348   PetscBool   flg;
349 
350   PetscFunctionBegin;
351   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
352   PetscObjectOptionsBegin((PetscObject)sf);
353   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
354   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
355   PetscCall(PetscSFSetType(sf, flg ? type : deft));
356   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
358 #if defined(PETSC_HAVE_DEVICE)
359   {
360     char      backendstr[32] = {0};
361     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
362     /* Change the defaults set in PetscSFCreate() with command line options */
363     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
364     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
365     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
366     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
367     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
368     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
369   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
370     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
371     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
372     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
373     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
374   #elif defined(PETSC_HAVE_KOKKOS)
375     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376   #endif
377 
378   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379     if (sf->use_stream_aware_mpi) {
380       MPI_Info info;
381 
382       PetscCallMPI(MPI_Info_create(&info));
383       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386       PetscCallMPI(MPI_Info_free(&info));
387       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388     }
389   #endif
390   }
391 #endif
392   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393   PetscOptionsEnd();
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 /*@
398   PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order
399 
400   Logically Collective
401 
402   Input Parameters:
403 + sf  - star forest
404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic)
405 
406   Level: advanced
407 
408 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
409 @*/
410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411 {
412   PetscFunctionBegin;
413   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
414   PetscValidLogicalCollectiveBool(sf, flg, 2);
415   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
416   sf->rankorder = flg;
417   PetscFunctionReturn(PETSC_SUCCESS);
418 }
419 
420 /*@
421   PetscSFSetGraph - Set a parallel star forest
422 
423   Collective
424 
425   Input Parameters:
426 + sf         - star forest
427 . nroots     - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves)
428 . nleaves    - number of leaf vertices on the current MPI process, each of these references a root on any process
429 . ilocal     - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1))
430 . localmode  - copy mode for `ilocal`
431 . iremote    - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode)
432 - remotemode - copy mode for `iremote`
433 
434   Level: intermediate
435 
436   Notes:
437   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
438 
439   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
440   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
441   PETSc might modify the respective array;
442   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
443   If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
444 
445   `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations.
446 
447   Fortran Notes:
448   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449 
450   Developer Notes:
451   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
452   This also allows to compare leaf sets of two `PetscSF`s easily.
453 
454 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()`
455 @*/
456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457 {
458   PetscBool unique, contiguous;
459 
460   PetscFunctionBegin;
461   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
462   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
463   if (nleaves > 0) PetscAssertPointer(iremote, 6);
464   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
465   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
466   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
467    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
468   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
469   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
470 
471   if (sf->nroots >= 0) { /* Reset only if graph already set */
472     PetscCall(PetscSFReset(sf));
473   }
474 
475   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
476   if (PetscDefined(USE_DEBUG)) {
477     PetscMPIInt size;
478 
479     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
481   }
482 
483   sf->nroots  = nroots;
484   sf->nleaves = nleaves;
485 
486   if (localmode == PETSC_COPY_VALUES && ilocal) {
487     PetscInt *tlocal = NULL;
488 
489     PetscCall(PetscMalloc1(nleaves, &tlocal));
490     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491     ilocal = tlocal;
492   }
493   if (remotemode == PETSC_COPY_VALUES) {
494     PetscSFNode *tremote = NULL;
495 
496     PetscCall(PetscMalloc1(nleaves, &tremote));
497     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498     iremote = tremote;
499   }
500 
501   if (nleaves && ilocal) {
502     PetscSFNode work;
503 
504     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
505     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506     unique = PetscNot(unique);
507     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508     sf->minleaf = ilocal[0];
509     sf->maxleaf = ilocal[nleaves - 1];
510     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
511   } else {
512     sf->minleaf = 0;
513     sf->maxleaf = nleaves - 1;
514     unique      = PETSC_TRUE;
515     contiguous  = PETSC_TRUE;
516   }
517 
518   if (contiguous) {
519     if (localmode == PETSC_USE_POINTER) {
520       ilocal = NULL;
521     } else {
522       PetscCall(PetscFree(ilocal));
523     }
524   }
525   sf->mine = ilocal;
526   if (localmode == PETSC_USE_POINTER) {
527     sf->mine_alloc = NULL;
528   } else {
529     sf->mine_alloc = ilocal;
530   }
531   if (PetscDefined(USE_DEBUG)) {
532     PetscMPIInt size;
533 
534     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
536   }
537   sf->remote = iremote;
538   if (remotemode == PETSC_USE_POINTER) {
539     sf->remote_alloc = NULL;
540   } else {
541     sf->remote_alloc = iremote;
542   }
543   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
544   sf->graphset = PETSC_TRUE;
545   PetscFunctionReturn(PETSC_SUCCESS);
546 }
547 
548 /*@
549   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550 
551   Collective
552 
553   Input Parameters:
554 + sf      - The `PetscSF`
555 . map     - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557 
558   Level: intermediate
559 
560   Notes:
561   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`.
562   `n` and `N` are the local and global sizes of `root` respectively.
563 
564   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to
565   sequential vectors `leaves` on all MPI processes.
566 
567   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a
568   sequential vector `leaves` on MPI rank 0.
569 
570   With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine
571   creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i
572   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
573   not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data
574   items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric.
575 
576 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
577  @*/
578 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
579 {
580   MPI_Comm    comm;
581   PetscInt    n, N, res[2];
582   PetscMPIInt rank, size;
583   PetscSFType type;
584 
585   PetscFunctionBegin;
586   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
587   PetscValidLogicalCollectiveEnum(sf, pattern, 3);
588   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
589   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
590   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
591   PetscCallMPI(MPI_Comm_rank(comm, &rank));
592   PetscCallMPI(MPI_Comm_size(comm, &size));
593 
594   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
595     PetscInt sizei = size;
596 
597     type = PETSCSFALLTOALL;
598     PetscCall(PetscLayoutCreate(comm, &sf->map));
599     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
600     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
601     PetscCall(PetscLayoutSetUp(sf->map));
602   } else {
603     PetscCall(PetscLayoutGetLocalSize(map, &n));
604     PetscCall(PetscLayoutGetSize(map, &N));
605     res[0] = n;
606     res[1] = -n;
607     /* Check if n are same over all ranks so that we can optimize it */
608     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
609     if (res[0] == -res[1]) { /* same n */
610       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
611     } else {
612       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
613     }
614     PetscCall(PetscLayoutReference(map, &sf->map));
615   }
616   PetscCall(PetscSFSetType(sf, type));
617 
618   sf->pattern = pattern;
619   sf->mine    = NULL; /* Contiguous */
620 
621   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
622      Also set other easy stuff.
623    */
624   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
625     sf->nleaves = N;
626     sf->nroots  = n;
627     sf->nranks  = size;
628     sf->minleaf = 0;
629     sf->maxleaf = N - 1;
630   } else if (pattern == PETSCSF_PATTERN_GATHER) {
631     sf->nleaves = rank ? 0 : N;
632     sf->nroots  = n;
633     sf->nranks  = rank ? 0 : size;
634     sf->minleaf = 0;
635     sf->maxleaf = rank ? -1 : N - 1;
636   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
637     sf->nleaves = size;
638     sf->nroots  = size;
639     sf->nranks  = size;
640     sf->minleaf = 0;
641     sf->maxleaf = size - 1;
642   }
643   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
644   sf->graphset = PETSC_TRUE;
645   PetscFunctionReturn(PETSC_SUCCESS);
646 }
647 
648 /*@
649   PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map
650 
651   Collective
652 
653   Input Parameter:
654 . sf - star forest to invert
655 
656   Output Parameter:
657 . isf - inverse of `sf`
658 
659   Level: advanced
660 
661   Notes:
662   All roots must have degree 1.
663 
664   The local space may be a permutation, but cannot be sparse.
665 
666 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
667 @*/
668 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
669 {
670   PetscMPIInt     rank;
671   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
672   const PetscInt *ilocal;
673   PetscSFNode    *roots, *leaves;
674 
675   PetscFunctionBegin;
676   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
677   PetscSFCheckGraphSet(sf, 1);
678   PetscAssertPointer(isf, 2);
679 
680   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
681   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
682 
683   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
684   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
685   for (i = 0; i < maxlocal; i++) {
686     leaves[i].rank  = rank;
687     leaves[i].index = i;
688   }
689   for (i = 0; i < nroots; i++) {
690     roots[i].rank  = -1;
691     roots[i].index = -1;
692   }
693   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
694   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
695 
696   /* Check whether our leaves are sparse */
697   for (i = 0, count = 0; i < nroots; i++)
698     if (roots[i].rank >= 0) count++;
699   if (count == nroots) newilocal = NULL;
700   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
701     for (i = 0, count = 0; i < nroots; i++) {
702       if (roots[i].rank >= 0) {
703         newilocal[count]   = i;
704         roots[count].rank  = roots[i].rank;
705         roots[count].index = roots[i].index;
706         count++;
707       }
708     }
709   }
710 
711   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
712   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
713   PetscCall(PetscFree2(roots, leaves));
714   PetscFunctionReturn(PETSC_SUCCESS);
715 }
716 
717 /*@
718   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
719 
720   Collective
721 
722   Input Parameters:
723 + sf  - communication object to duplicate
724 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
725 
726   Output Parameter:
727 . newsf - new communication object
728 
729   Level: beginner
730 
731 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
732 @*/
733 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
734 {
735   PetscSFType  type;
736   MPI_Datatype dtype = MPIU_SCALAR;
737 
738   PetscFunctionBegin;
739   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
740   PetscValidLogicalCollectiveEnum(sf, opt, 2);
741   PetscAssertPointer(newsf, 3);
742   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
743   PetscCall(PetscSFGetType(sf, &type));
744   if (type) PetscCall(PetscSFSetType(*newsf, type));
745   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
746   if (opt == PETSCSF_DUPLICATE_GRAPH) {
747     PetscSFCheckGraphSet(sf, 1);
748     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
749       PetscInt           nroots, nleaves;
750       const PetscInt    *ilocal;
751       const PetscSFNode *iremote;
752       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
753       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
754     } else {
755       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
756     }
757   }
758   /* Since oldtype is committed, so is newtype, according to MPI */
759   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
760   (*newsf)->vscat.bs     = sf->vscat.bs;
761   (*newsf)->vscat.unit   = dtype;
762   (*newsf)->vscat.to_n   = sf->vscat.to_n;
763   (*newsf)->vscat.from_n = sf->vscat.from_n;
764   /* Do not copy lsf. Build it on demand since it is rarely used */
765 
766 #if defined(PETSC_HAVE_DEVICE)
767   (*newsf)->backend              = sf->backend;
768   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
769   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
770   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
771 #endif
772   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
773   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
774   PetscFunctionReturn(PETSC_SUCCESS);
775 }
776 
777 /*@C
778   PetscSFGetGraph - Get the graph specifying a parallel star forest
779 
780   Not Collective
781 
782   Input Parameter:
783 . sf - star forest
784 
785   Output Parameters:
786 + nroots  - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves)
787 . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process
788 . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
789 - iremote - remote locations of root vertices for each leaf on the current process
790 
791   Level: intermediate
792 
793   Notes:
794   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
795 
796   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
797 
798   Fortran Note:
799   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
800 
801 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
802 @*/
803 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
804 {
805   PetscFunctionBegin;
806   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
807   if (sf->ops->GetGraph) {
808     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
809   } else {
810     if (nroots) *nroots = sf->nroots;
811     if (nleaves) *nleaves = sf->nleaves;
812     if (ilocal) *ilocal = sf->mine;
813     if (iremote) *iremote = sf->remote;
814   }
815   PetscFunctionReturn(PETSC_SUCCESS);
816 }
817 
818 /*@
819   PetscSFGetLeafRange - Get the active leaf ranges
820 
821   Not Collective
822 
823   Input Parameter:
824 . sf - star forest
825 
826   Output Parameters:
827 + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves.
828 - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves.
829 
830   Level: developer
831 
832 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
833 @*/
834 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
835 {
836   PetscFunctionBegin;
837   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
838   PetscSFCheckGraphSet(sf, 1);
839   if (minleaf) *minleaf = sf->minleaf;
840   if (maxleaf) *maxleaf = sf->maxleaf;
841   PetscFunctionReturn(PETSC_SUCCESS);
842 }
843 
844 /*@
845   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
846 
847   Collective
848 
849   Input Parameters:
850 + A    - the star forest
851 . obj  - Optional object that provides the prefix for the option names
852 - name - command line option
853 
854   Level: intermediate
855 
856   Note:
857   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
858 
859 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
860 @*/
861 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
862 {
863   PetscFunctionBegin;
864   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
865   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
866   PetscFunctionReturn(PETSC_SUCCESS);
867 }
868 
869 /*@
870   PetscSFView - view a star forest
871 
872   Collective
873 
874   Input Parameters:
875 + sf     - star forest
876 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
877 
878   Level: beginner
879 
880 .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
881 @*/
882 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
883 {
884   PetscBool         isascii;
885   PetscViewerFormat format;
886 
887   PetscFunctionBegin;
888   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
889   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
890   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
891   PetscCheckSameComm(sf, 1, viewer, 2);
892   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
893   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
894   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
895     PetscMPIInt rank;
896     PetscInt    j;
897 
898     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
899     PetscCall(PetscViewerASCIIPushTab(viewer));
900     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
901       if (!sf->graphset) {
902         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
903         PetscCall(PetscViewerASCIIPopTab(viewer));
904         PetscFunctionReturn(PETSC_SUCCESS);
905       }
906       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
907       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
908       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
909       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
910       PetscCall(PetscViewerFlush(viewer));
911       PetscCall(PetscViewerGetFormat(viewer, &format));
912       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
913         PetscMPIInt *tmpranks, *perm;
914 
915         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
916         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
917         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
918         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
919         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
920         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
921           PetscMPIInt i = perm[ii];
922 
923           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
924           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
925         }
926         PetscCall(PetscFree2(tmpranks, perm));
927       }
928       PetscCall(PetscViewerFlush(viewer));
929       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
930     }
931     PetscCall(PetscViewerASCIIPopTab(viewer));
932   }
933   PetscTryTypeMethod(sf, View, viewer);
934   PetscFunctionReturn(PETSC_SUCCESS);
935 }
936 
937 /*@C
938   PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process
939 
940   Not Collective
941 
942   Input Parameter:
943 . sf - star forest
944 
945   Output Parameters:
946 + nranks  - number of MPI processes referenced by local part
947 . ranks   - [`nranks`] array of MPI ranks
948 . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process
949 . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL`
950 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL`
951 
952   Level: developer
953 
954 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()`
955 @*/
956 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
957 {
958   PetscFunctionBegin;
959   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
960   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
961   if (sf->ops->GetRootRanks) {
962     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
963   } else {
964     /* The generic implementation */
965     if (nranks) *nranks = sf->nranks;
966     if (ranks) *ranks = sf->ranks;
967     if (roffset) *roffset = sf->roffset;
968     if (rmine) *rmine = sf->rmine;
969     if (rremote) *rremote = sf->rremote;
970   }
971   PetscFunctionReturn(PETSC_SUCCESS);
972 }
973 
974 /*@C
975   PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process
976 
977   Not Collective
978 
979   Input Parameter:
980 . sf - star forest
981 
982   Output Parameters:
983 + niranks  - number of leaf MPI processes referencing roots on this process
984 . iranks   - [`niranks`] array of MPI ranks
985 . ioffset  - [`niranks`+1] offset in `irootloc` for each MPI process
986 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process
987 
988   Level: developer
989 
990 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
991 @*/
992 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
993 {
994   PetscFunctionBegin;
995   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
996   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
997   if (sf->ops->GetLeafRanks) {
998     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
999   } else {
1000     PetscSFType type;
1001     PetscCall(PetscSFGetType(sf, &type));
1002     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1003   }
1004   PetscFunctionReturn(PETSC_SUCCESS);
1005 }
1006 
1007 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1008 {
1009   PetscInt i;
1010   for (i = 0; i < n; i++) {
1011     if (needle == list[i]) return PETSC_TRUE;
1012   }
1013   return PETSC_FALSE;
1014 }
1015 
1016 /*@C
1017   PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations.
1018 
1019   Collective
1020 
1021   Input Parameters:
1022 + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1023 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1024 
1025   Level: developer
1026 
1027 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
1028 @*/
1029 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1030 {
1031   PetscHMapI    table;
1032   PetscHashIter pos;
1033   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1034   PetscInt     *rcount;
1035   PetscInt      irank, sfnrank, ranksi;
1036   PetscMPIInt   i, orank = -1;
1037 
1038   PetscFunctionBegin;
1039   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1040   PetscSFCheckGraphSet(sf, 1);
1041   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1042   PetscCall(PetscHMapICreateWithSize(10, &table));
1043   for (i = 0; i < sf->nleaves; i++) {
1044     /* Log 1-based rank */
1045     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1046   }
1047   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1048   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1049   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1050   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1051   PetscHashIterBegin(table, pos);
1052   for (i = 0; i < sf->nranks; i++) {
1053     PetscHashIterGetKey(table, pos, ranksi);
1054     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1055     PetscHashIterGetVal(table, pos, rcount[i]);
1056     PetscHashIterNext(table, pos);
1057     ranks[i]--; /* Convert back to 0-based */
1058   }
1059   PetscCall(PetscHMapIDestroy(&table));
1060 
1061   /* We expect that dgroup is reliably "small" while nranks could be large */
1062   {
1063     MPI_Group    group = MPI_GROUP_NULL;
1064     PetscMPIInt *dgroupranks;
1065 
1066     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1067     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1068     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1069     PetscCall(PetscMalloc1(groupsize, &groupranks));
1070     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1071     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1072     PetscCallMPI(MPI_Group_free(&group));
1073     PetscCall(PetscFree(dgroupranks));
1074   }
1075 
1076   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1077   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1078     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1079       if (InList(ranks[i], groupsize, groupranks)) break;
1080     }
1081     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1082       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1083     }
1084     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1085       PetscMPIInt tmprank;
1086       PetscInt    tmpcount;
1087 
1088       tmprank             = ranks[i];
1089       tmpcount            = rcount[i];
1090       ranks[i]            = ranks[sf->ndranks];
1091       rcount[i]           = rcount[sf->ndranks];
1092       ranks[sf->ndranks]  = tmprank;
1093       rcount[sf->ndranks] = tmpcount;
1094       sf->ndranks++;
1095     }
1096   }
1097   PetscCall(PetscFree(groupranks));
1098   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1099   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1100   sf->roffset[0] = 0;
1101   for (i = 0; i < sf->nranks; i++) {
1102     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1103     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1104     rcount[i]          = 0;
1105   }
1106   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1107     /* short circuit */
1108     if (orank != sf->remote[i].rank) {
1109       /* Search for index of iremote[i].rank in sf->ranks */
1110       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1111       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1112       if (irank < 0) {
1113         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1114         if (irank >= 0) irank += sf->ndranks;
1115       }
1116     }
1117     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1118     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1119     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1120     rcount[irank]++;
1121   }
1122   PetscCall(PetscFree2(rcount, ranks));
1123   PetscFunctionReturn(PETSC_SUCCESS);
1124 }
1125 
1126 /*@C
1127   PetscSFGetGroups - gets incoming and outgoing process groups
1128 
1129   Collective
1130 
1131   Input Parameter:
1132 . sf - star forest
1133 
1134   Output Parameters:
1135 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1136 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1137 
1138   Level: developer
1139 
1140 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1141 @*/
1142 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1143 {
1144   MPI_Group group = MPI_GROUP_NULL;
1145 
1146   PetscFunctionBegin;
1147   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1148   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1149   if (sf->ingroup == MPI_GROUP_NULL) {
1150     PetscInt        i;
1151     const PetscInt *indegree;
1152     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1153     PetscSFNode    *remote;
1154     PetscSF         bgcount;
1155 
1156     /* Compute the number of incoming ranks */
1157     PetscCall(PetscMalloc1(sf->nranks, &remote));
1158     for (i = 0; i < sf->nranks; i++) {
1159       remote[i].rank  = sf->ranks[i];
1160       remote[i].index = 0;
1161     }
1162     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1163     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1164     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1165     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1166     /* Enumerate the incoming ranks */
1167     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1168     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1169     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1170     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1171     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1172     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1173     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1174     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1175     PetscCallMPI(MPI_Group_free(&group));
1176     PetscCall(PetscFree2(inranks, outranks));
1177     PetscCall(PetscSFDestroy(&bgcount));
1178   }
1179   *incoming = sf->ingroup;
1180 
1181   if (sf->outgroup == MPI_GROUP_NULL) {
1182     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1183     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1184     PetscCallMPI(MPI_Group_free(&group));
1185   }
1186   *outgoing = sf->outgroup;
1187   PetscFunctionReturn(PETSC_SUCCESS);
1188 }
1189 
1190 /*@
1191   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1192 
1193   Collective
1194 
1195   Input Parameter:
1196 . sf - star forest
1197 
1198   Output Parameter:
1199 . rsf - the star forest with a single root per MPI process to perform communications
1200 
1201   Level: developer
1202 
1203 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1204 @*/
1205 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1206 {
1207   PetscFunctionBegin;
1208   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1209   PetscAssertPointer(rsf, 2);
1210   if (!sf->rankssf) {
1211     PetscSFNode       *rremotes;
1212     const PetscMPIInt *ranks;
1213     PetscMPIInt        nranks;
1214 
1215     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1216     PetscCall(PetscMalloc1(nranks, &rremotes));
1217     for (PetscInt i = 0; i < nranks; i++) {
1218       rremotes[i].rank  = ranks[i];
1219       rremotes[i].index = 0;
1220     }
1221     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1222     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1223   }
1224   *rsf = sf->rankssf;
1225   PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227 
1228 /*@
1229   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1230 
1231   Collective
1232 
1233   Input Parameter:
1234 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1235 
1236   Output Parameter:
1237 . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf)
1238 
1239   Level: developer
1240 
1241   Note:
1242   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1243   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1244   edge, it is a candidate for future optimization that might involve its removal.
1245 
1246 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1247 @*/
1248 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1249 {
1250   PetscFunctionBegin;
1251   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1252   PetscAssertPointer(multi, 2);
1253   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1254     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1255     *multi           = sf->multi;
1256     sf->multi->multi = sf->multi;
1257     PetscFunctionReturn(PETSC_SUCCESS);
1258   }
1259   if (!sf->multi) {
1260     const PetscInt *indegree;
1261     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1262     PetscSFNode    *remote;
1263     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1264     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1265     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1266     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1267     inoffset[0] = 0;
1268     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1269     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1270     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1271     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1272     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1273     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1274       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1275     }
1276     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1277     for (i = 0; i < sf->nleaves; i++) {
1278       remote[i].rank  = sf->remote[i].rank;
1279       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1280     }
1281     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1282     sf->multi->multi = sf->multi;
1283     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1284     if (sf->rankorder) { /* Sort the ranks */
1285       PetscMPIInt  rank;
1286       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1287       PetscSFNode *newremote;
1288       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1289       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1290       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1291       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1292       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1293       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1294       /* Sort the incoming ranks at each vertex, build the inverse map */
1295       for (i = 0; i < sf->nroots; i++) {
1296         PetscInt j;
1297         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1298         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1299         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1300       }
1301       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1302       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1303       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1304       for (i = 0; i < sf->nleaves; i++) {
1305         newremote[i].rank  = sf->remote[i].rank;
1306         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1307       }
1308       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1309       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1310     }
1311     PetscCall(PetscFree3(inoffset, outones, outoffset));
1312   }
1313   *multi = sf->multi;
1314   PetscFunctionReturn(PETSC_SUCCESS);
1315 }
1316 
1317 /*@C
1318   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1319 
1320   Collective
1321 
1322   Input Parameters:
1323 + sf        - original star forest
1324 . nselected - number of selected roots on this MPI process
1325 - selected  - indices of the selected roots on this MPI process
1326 
1327   Output Parameter:
1328 . esf - new star forest
1329 
1330   Level: advanced
1331 
1332   Note:
1333   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1334   be done by calling PetscSFGetGraph().
1335 
1336 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1337 @*/
1338 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf)
1339 {
1340   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1341   const PetscInt    *ilocal;
1342   signed char       *rootdata, *leafdata, *leafmem;
1343   const PetscSFNode *iremote;
1344   PetscSFNode       *new_iremote;
1345   MPI_Comm           comm;
1346 
1347   PetscFunctionBegin;
1348   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1349   PetscSFCheckGraphSet(sf, 1);
1350   if (nselected) PetscAssertPointer(selected, 3);
1351   PetscAssertPointer(esf, 4);
1352 
1353   PetscCall(PetscSFSetUp(sf));
1354   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1355   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1356   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1357 
1358   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1359     PetscBool dups;
1360     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1361     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1362     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1363   }
1364 
1365   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1366   else {
1367     /* A generic version of creating embedded sf */
1368     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1369     maxlocal = maxleaf - minleaf + 1;
1370     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1371     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1372     /* Tag selected roots and bcast to leaves */
1373     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1374     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1375     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1376 
1377     /* Build esf with leaves that are still connected */
1378     esf_nleaves = 0;
1379     for (i = 0; i < nleaves; i++) {
1380       j = ilocal ? ilocal[i] : i;
1381       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1382          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1383       */
1384       esf_nleaves += (leafdata[j] ? 1 : 0);
1385     }
1386     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1387     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1388     for (i = n = 0; i < nleaves; i++) {
1389       j = ilocal ? ilocal[i] : i;
1390       if (leafdata[j]) {
1391         new_ilocal[n]        = j;
1392         new_iremote[n].rank  = iremote[i].rank;
1393         new_iremote[n].index = iremote[i].index;
1394         ++n;
1395       }
1396     }
1397     PetscCall(PetscSFCreate(comm, esf));
1398     PetscCall(PetscSFSetFromOptions(*esf));
1399     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1400     PetscCall(PetscFree2(rootdata, leafmem));
1401   }
1402   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1403   PetscFunctionReturn(PETSC_SUCCESS);
1404 }
1405 
1406 /*@C
1407   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1408 
1409   Collective
1410 
1411   Input Parameters:
1412 + sf        - original star forest
1413 . nselected - number of selected leaves on this MPI process
1414 - selected  - indices of the selected leaves on this MPI process
1415 
1416   Output Parameter:
1417 . newsf - new star forest
1418 
1419   Level: advanced
1420 
1421 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1422 @*/
1423 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf)
1424 {
1425   const PetscSFNode *iremote;
1426   PetscSFNode       *new_iremote;
1427   const PetscInt    *ilocal;
1428   PetscInt           i, nroots, *leaves, *new_ilocal;
1429   MPI_Comm           comm;
1430 
1431   PetscFunctionBegin;
1432   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1433   PetscSFCheckGraphSet(sf, 1);
1434   if (nselected) PetscAssertPointer(selected, 3);
1435   PetscAssertPointer(newsf, 4);
1436 
1437   /* Uniq selected[] and put results in leaves[] */
1438   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1439   PetscCall(PetscMalloc1(nselected, &leaves));
1440   PetscCall(PetscArraycpy(leaves, selected, nselected));
1441   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1442   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1443 
1444   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1445   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1446   else {
1447     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1448     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1449     PetscCall(PetscMalloc1(nselected, &new_iremote));
1450     for (i = 0; i < nselected; ++i) {
1451       const PetscInt l     = leaves[i];
1452       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1453       new_iremote[i].rank  = iremote[l].rank;
1454       new_iremote[i].index = iremote[l].index;
1455     }
1456     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1457     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1458   }
1459   PetscCall(PetscFree(leaves));
1460   PetscFunctionReturn(PETSC_SUCCESS);
1461 }
1462 
1463 /*@C
1464   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1465 
1466   Collective
1467 
1468   Input Parameters:
1469 + sf       - star forest on which to communicate
1470 . unit     - data type associated with each node
1471 . rootdata - buffer to broadcast
1472 - op       - operation to use for reduction
1473 
1474   Output Parameter:
1475 . leafdata - buffer to be reduced with values from each leaf's respective root
1476 
1477   Level: intermediate
1478 
1479   Note:
1480   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1481   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1482   use `PetscSFBcastWithMemTypeBegin()` instead.
1483 
1484 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1485 @*/
1486 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1487 {
1488   PetscMemType rootmtype, leafmtype;
1489 
1490   PetscFunctionBegin;
1491   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1492   PetscCall(PetscSFSetUp(sf));
1493   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1494   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1495   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1496   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1497   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1498   PetscFunctionReturn(PETSC_SUCCESS);
1499 }
1500 
1501 /*@C
1502   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1503   to `PetscSFBcastEnd()`
1504 
1505   Collective
1506 
1507   Input Parameters:
1508 + sf        - star forest on which to communicate
1509 . unit      - data type associated with each node
1510 . rootmtype - memory type of `rootdata`
1511 . rootdata  - buffer to broadcast
1512 . leafmtype - memory type of `leafdata`
1513 - op        - operation to use for reduction
1514 
1515   Output Parameter:
1516 . leafdata - buffer to be reduced with values from each leaf's respective root
1517 
1518   Level: intermediate
1519 
1520 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1521 @*/
1522 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1523 {
1524   PetscFunctionBegin;
1525   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1526   PetscCall(PetscSFSetUp(sf));
1527   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1528   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1529   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1530   PetscFunctionReturn(PETSC_SUCCESS);
1531 }
1532 
1533 /*@C
1534   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1535 
1536   Collective
1537 
1538   Input Parameters:
1539 + sf       - star forest
1540 . unit     - data type
1541 . rootdata - buffer to broadcast
1542 - op       - operation to use for reduction
1543 
1544   Output Parameter:
1545 . leafdata - buffer to be reduced with values from each leaf's respective root
1546 
1547   Level: intermediate
1548 
1549 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1550 @*/
1551 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1552 {
1553   PetscFunctionBegin;
1554   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1555   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1556   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1557   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1558   PetscFunctionReturn(PETSC_SUCCESS);
1559 }
1560 
1561 /*@C
1562   PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()`
1563 
1564   Collective
1565 
1566   Input Parameters:
1567 + sf       - star forest
1568 . unit     - data type
1569 . leafdata - values to reduce (communicate)
1570 - op       - reduction operation
1571 
1572   Output Parameter:
1573 . rootdata - result of reduction of values (`leafdata`) from all leaves to each root
1574 
1575   Level: intermediate
1576 
1577   Note:
1578   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1579   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1580   use `PetscSFReduceWithMemTypeBegin()` instead.
1581 
1582 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype`
1583 @*/
1584 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1585 {
1586   PetscMemType rootmtype, leafmtype;
1587 
1588   PetscFunctionBegin;
1589   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1590   PetscCall(PetscSFSetUp(sf));
1591   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1592   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1593   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1594   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1595   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1596   PetscFunctionReturn(PETSC_SUCCESS);
1597 }
1598 
1599 /*@C
1600   PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1601 
1602   Collective
1603 
1604   Input Parameters:
1605 + sf        - star forest
1606 . unit      - data type
1607 . leafmtype - memory type of `leafdata`
1608 . leafdata  - values to reduce
1609 . rootmtype - memory type of `rootdata`
1610 - op        - reduction operation
1611 
1612   Output Parameter:
1613 . rootdata - result of reduction of values from all leaves of each root
1614 
1615   Level: intermediate
1616 
1617 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1618 @*/
1619 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1620 {
1621   PetscFunctionBegin;
1622   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1623   PetscCall(PetscSFSetUp(sf));
1624   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1625   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1626   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1627   PetscFunctionReturn(PETSC_SUCCESS);
1628 }
1629 
1630 /*@C
1631   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1632 
1633   Collective
1634 
1635   Input Parameters:
1636 + sf       - star forest
1637 . unit     - data type
1638 . leafdata - values to reduce
1639 - op       - reduction operation
1640 
1641   Output Parameter:
1642 . rootdata - result of reduction of values from all leaves of each root
1643 
1644   Level: intermediate
1645 
1646 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1647 @*/
1648 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1649 {
1650   PetscFunctionBegin;
1651   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1652   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1653   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1654   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1655   PetscFunctionReturn(PETSC_SUCCESS);
1656 }
1657 
1658 /*@C
1659   PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`,
1660   to be completed with `PetscSFFetchAndOpEnd()`
1661 
1662   Collective
1663 
1664   Input Parameters:
1665 + sf       - star forest
1666 . unit     - data type
1667 . leafdata - leaf values to use in reduction
1668 - op       - operation to use for reduction
1669 
1670   Output Parameters:
1671 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1672 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1673 
1674   Level: advanced
1675 
1676   Note:
1677   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1678   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1679   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1680   integers.
1681 
1682 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1683 @*/
1684 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1685 {
1686   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1687 
1688   PetscFunctionBegin;
1689   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1690   PetscCall(PetscSFSetUp(sf));
1691   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1692   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1693   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1694   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1695   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1696   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1697   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1698   PetscFunctionReturn(PETSC_SUCCESS);
1699 }
1700 
1701 /*@C
1702   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1703   applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()`
1704 
1705   Collective
1706 
1707   Input Parameters:
1708 + sf              - star forest
1709 . unit            - data type
1710 . rootmtype       - memory type of `rootdata`
1711 . leafmtype       - memory type of `leafdata`
1712 . leafdata        - leaf values to use in reduction
1713 . leafupdatemtype - memory type of `leafupdate`
1714 - op              - operation to use for reduction
1715 
1716   Output Parameters:
1717 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1718 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1719 
1720   Level: advanced
1721 
1722   Note:
1723   See `PetscSFFetchAndOpBegin()` for more details.
1724 
1725 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1726 @*/
1727 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1728 {
1729   PetscFunctionBegin;
1730   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1731   PetscCall(PetscSFSetUp(sf));
1732   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1733   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1734   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1735   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1736   PetscFunctionReturn(PETSC_SUCCESS);
1737 }
1738 
1739 /*@C
1740   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1741   to fetch values from roots and update atomically by applying operation using `leafdata`
1742 
1743   Collective
1744 
1745   Input Parameters:
1746 + sf       - star forest
1747 . unit     - data type
1748 . leafdata - leaf values to use in reduction
1749 - op       - operation to use for reduction
1750 
1751   Output Parameters:
1752 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1753 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1754 
1755   Level: advanced
1756 
1757 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1758 @*/
1759 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1760 {
1761   PetscFunctionBegin;
1762   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1763   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1764   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1765   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1766   PetscFunctionReturn(PETSC_SUCCESS);
1767 }
1768 
1769 /*@C
1770   PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1771 
1772   Collective
1773 
1774   Input Parameter:
1775 . sf - star forest
1776 
1777   Output Parameter:
1778 . degree - degree (the number of leaves) of each root vertex
1779 
1780   Level: advanced
1781 
1782   Note:
1783   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1784 
1785 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1786 @*/
1787 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1788 {
1789   PetscFunctionBegin;
1790   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1791   PetscSFCheckGraphSet(sf, 1);
1792   PetscAssertPointer(degree, 2);
1793   if (!sf->degreeknown) {
1794     PetscInt i, nroots = sf->nroots, maxlocal;
1795     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1796     maxlocal = sf->maxleaf - sf->minleaf + 1;
1797     PetscCall(PetscMalloc1(nroots, &sf->degree));
1798     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1799     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1800     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1801     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1802   }
1803   *degree = NULL;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 /*@C
1808   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1809 
1810   Collective
1811 
1812   Input Parameter:
1813 . sf - star forest
1814 
1815   Output Parameter:
1816 . degree - degree of each root vertex
1817 
1818   Level: developer
1819 
1820   Note:
1821   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1822 
1823 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1824 @*/
1825 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1826 {
1827   PetscFunctionBegin;
1828   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1829   PetscSFCheckGraphSet(sf, 1);
1830   PetscAssertPointer(degree, 2);
1831   if (!sf->degreeknown) {
1832     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1833     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1834     PetscCall(PetscFree(sf->degreetmp));
1835     sf->degreeknown = PETSC_TRUE;
1836   }
1837   *degree = sf->degree;
1838   PetscFunctionReturn(PETSC_SUCCESS);
1839 }
1840 
1841 /*@C
1842   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1843   Each multi-root is assigned index of the corresponding original root.
1844 
1845   Collective
1846 
1847   Input Parameters:
1848 + sf     - star forest
1849 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()`
1850 
1851   Output Parameters:
1852 + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1853 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1854 
1855   Level: developer
1856 
1857   Note:
1858   The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed.
1859 
1860 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1861 @*/
1862 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1863 {
1864   PetscSF  msf;
1865   PetscInt k = 0, nroots, nmroots;
1866 
1867   PetscFunctionBegin;
1868   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1869   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1870   if (nroots) PetscAssertPointer(degree, 2);
1871   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1872   PetscAssertPointer(multiRootsOrigNumbering, 4);
1873   PetscCall(PetscSFGetMultiSF(sf, &msf));
1874   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1875   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1876   for (PetscInt i = 0; i < nroots; i++) {
1877     if (!degree[i]) continue;
1878     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1879   }
1880   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1881   if (nMultiRoots) *nMultiRoots = nmroots;
1882   PetscFunctionReturn(PETSC_SUCCESS);
1883 }
1884 
1885 /*@C
1886   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1887 
1888   Collective
1889 
1890   Input Parameters:
1891 + sf       - star forest
1892 . unit     - data type
1893 - leafdata - leaf data to gather to roots
1894 
1895   Output Parameter:
1896 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
1897 
1898   Level: intermediate
1899 
1900 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1901 @*/
1902 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1903 {
1904   PetscSF multi = NULL;
1905 
1906   PetscFunctionBegin;
1907   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1908   PetscCall(PetscSFSetUp(sf));
1909   PetscCall(PetscSFGetMultiSF(sf, &multi));
1910   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1911   PetscFunctionReturn(PETSC_SUCCESS);
1912 }
1913 
1914 /*@C
1915   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1916 
1917   Collective
1918 
1919   Input Parameters:
1920 + sf       - star forest
1921 . unit     - data type
1922 - leafdata - leaf data to gather to roots
1923 
1924   Output Parameter:
1925 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
1926 
1927   Level: intermediate
1928 
1929 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1930 @*/
1931 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1932 {
1933   PetscSF multi = NULL;
1934 
1935   PetscFunctionBegin;
1936   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1937   PetscCall(PetscSFGetMultiSF(sf, &multi));
1938   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1939   PetscFunctionReturn(PETSC_SUCCESS);
1940 }
1941 
1942 /*@C
1943   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1944 
1945   Collective
1946 
1947   Input Parameters:
1948 + sf            - star forest
1949 . unit          - data type
1950 - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has)
1951 
1952   Output Parameter:
1953 . leafdata - leaf data to be update with personal data from each respective root
1954 
1955   Level: intermediate
1956 
1957 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1958 @*/
1959 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1960 {
1961   PetscSF multi = NULL;
1962 
1963   PetscFunctionBegin;
1964   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1965   PetscCall(PetscSFSetUp(sf));
1966   PetscCall(PetscSFGetMultiSF(sf, &multi));
1967   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1968   PetscFunctionReturn(PETSC_SUCCESS);
1969 }
1970 
1971 /*@C
1972   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1973 
1974   Collective
1975 
1976   Input Parameters:
1977 + sf            - star forest
1978 . unit          - data type
1979 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1980 
1981   Output Parameter:
1982 . leafdata - leaf data to be update with personal data from each respective root
1983 
1984   Level: intermediate
1985 
1986 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1987 @*/
1988 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1989 {
1990   PetscSF multi = NULL;
1991 
1992   PetscFunctionBegin;
1993   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1994   PetscCall(PetscSFGetMultiSF(sf, &multi));
1995   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1996   PetscFunctionReturn(PETSC_SUCCESS);
1997 }
1998 
1999 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
2000 {
2001   PetscInt        i, n, nleaves;
2002   const PetscInt *ilocal = NULL;
2003   PetscHSetI      seen;
2004 
2005   PetscFunctionBegin;
2006   if (PetscDefined(USE_DEBUG)) {
2007     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2008     PetscCall(PetscHSetICreate(&seen));
2009     for (i = 0; i < nleaves; i++) {
2010       const PetscInt leaf = ilocal ? ilocal[i] : i;
2011       PetscCall(PetscHSetIAdd(seen, leaf));
2012     }
2013     PetscCall(PetscHSetIGetSize(seen, &n));
2014     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2015     PetscCall(PetscHSetIDestroy(&seen));
2016   }
2017   PetscFunctionReturn(PETSC_SUCCESS);
2018 }
2019 
2020 /*@
2021   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2022 
2023   Input Parameters:
2024 + sfA - The first `PetscSF`
2025 - sfB - The second `PetscSF`
2026 
2027   Output Parameter:
2028 . sfBA - The composite `PetscSF`
2029 
2030   Level: developer
2031 
2032   Notes:
2033   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2034   forests, i.e. the same leaf is not connected with different roots.
2035 
2036   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2037   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2038   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2039   `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes.
2040 
2041 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2042 @*/
2043 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2044 {
2045   const PetscSFNode *remotePointsA, *remotePointsB;
2046   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2047   const PetscInt    *localPointsA, *localPointsB;
2048   PetscInt          *localPointsBA;
2049   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2050   PetscBool          denseB;
2051 
2052   PetscFunctionBegin;
2053   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2054   PetscSFCheckGraphSet(sfA, 1);
2055   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2056   PetscSFCheckGraphSet(sfB, 2);
2057   PetscCheckSameComm(sfA, 1, sfB, 2);
2058   PetscAssertPointer(sfBA, 3);
2059   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2060   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2061 
2062   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2063   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2064   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2065      numRootsB; otherwise, garbage will be broadcasted.
2066      Example (comm size = 1):
2067      sfA: 0 <- (0, 0)
2068      sfB: 100 <- (0, 0)
2069           101 <- (0, 1)
2070      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2071      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2072      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2073      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2074   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2075   for (i = 0; i < numRootsB; i++) {
2076     reorderedRemotePointsA[i].rank  = -1;
2077     reorderedRemotePointsA[i].index = -1;
2078   }
2079   for (i = 0; i < numLeavesA; i++) {
2080     PetscInt localp = localPointsA ? localPointsA[i] : i;
2081 
2082     if (localp >= numRootsB) continue;
2083     reorderedRemotePointsA[localp] = remotePointsA[i];
2084   }
2085   remotePointsA = reorderedRemotePointsA;
2086   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2087   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2088   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2089     leafdataB[i].rank  = -1;
2090     leafdataB[i].index = -1;
2091   }
2092   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2093   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2094   PetscCall(PetscFree(reorderedRemotePointsA));
2095 
2096   denseB = (PetscBool)!localPointsB;
2097   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2098     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2099     else numLeavesBA++;
2100   }
2101   if (denseB) {
2102     localPointsBA  = NULL;
2103     remotePointsBA = leafdataB;
2104   } else {
2105     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2106     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2107     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2108       const PetscInt l = localPointsB ? localPointsB[i] : i;
2109 
2110       if (leafdataB[l - minleaf].rank == -1) continue;
2111       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2112       localPointsBA[numLeavesBA]  = l;
2113       numLeavesBA++;
2114     }
2115     PetscCall(PetscFree(leafdataB));
2116   }
2117   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2118   PetscCall(PetscSFSetFromOptions(*sfBA));
2119   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 /*@
2124   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2125 
2126   Input Parameters:
2127 + sfA - The first `PetscSF`
2128 - sfB - The second `PetscSF`
2129 
2130   Output Parameter:
2131 . sfBA - The composite `PetscSF`.
2132 
2133   Level: developer
2134 
2135   Notes:
2136   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2137   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2138   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2139 
2140   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2141   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2142   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()`
2143   on `sfA`, then
2144   a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots.
2145 
2146 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2147 @*/
2148 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2149 {
2150   const PetscSFNode *remotePointsA, *remotePointsB;
2151   PetscSFNode       *remotePointsBA;
2152   const PetscInt    *localPointsA, *localPointsB;
2153   PetscSFNode       *reorderedRemotePointsA = NULL;
2154   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2155   MPI_Op             op;
2156 #if defined(PETSC_USE_64BIT_INDICES)
2157   PetscBool iswin;
2158 #endif
2159 
2160   PetscFunctionBegin;
2161   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2162   PetscSFCheckGraphSet(sfA, 1);
2163   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2164   PetscSFCheckGraphSet(sfB, 2);
2165   PetscCheckSameComm(sfA, 1, sfB, 2);
2166   PetscAssertPointer(sfBA, 3);
2167   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2168   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2169 
2170   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2171   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2172 
2173   /* TODO: Check roots of sfB have degree of 1 */
2174   /* Once we implement it, we can replace the MPI_MAXLOC
2175      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2176      We use MPI_MAXLOC only to have a deterministic output from this routine if
2177      the root condition is not meet.
2178    */
2179   op = MPI_MAXLOC;
2180 #if defined(PETSC_USE_64BIT_INDICES)
2181   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2182   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2183   if (iswin) op = MPI_REPLACE;
2184 #endif
2185 
2186   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2187   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2188   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2189     reorderedRemotePointsA[i].rank  = -1;
2190     reorderedRemotePointsA[i].index = -1;
2191   }
2192   if (localPointsA) {
2193     for (i = 0; i < numLeavesA; i++) {
2194       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2195       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2196     }
2197   } else {
2198     for (i = 0; i < numLeavesA; i++) {
2199       if (i > maxleaf || i < minleaf) continue;
2200       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2201     }
2202   }
2203 
2204   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2205   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2206   for (i = 0; i < numRootsB; i++) {
2207     remotePointsBA[i].rank  = -1;
2208     remotePointsBA[i].index = -1;
2209   }
2210 
2211   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2212   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2213   PetscCall(PetscFree(reorderedRemotePointsA));
2214   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2215     if (remotePointsBA[i].rank == -1) continue;
2216     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2217     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2218     localPointsBA[numLeavesBA]        = i;
2219     numLeavesBA++;
2220   }
2221   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2222   PetscCall(PetscSFSetFromOptions(*sfBA));
2223   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2224   PetscFunctionReturn(PETSC_SUCCESS);
2225 }
2226 
2227 /*
2228   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2229 
2230   Input Parameter:
2231 . sf - The global `PetscSF`
2232 
2233   Output Parameter:
2234 . out - The local `PetscSF`
2235 
2236 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
2237  */
2238 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2239 {
2240   MPI_Comm           comm;
2241   PetscMPIInt        myrank;
2242   const PetscInt    *ilocal;
2243   const PetscSFNode *iremote;
2244   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2245   PetscSFNode       *liremote;
2246   PetscSF            lsf;
2247 
2248   PetscFunctionBegin;
2249   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2250   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2251   else {
2252     PetscMPIInt irank;
2253 
2254     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2255     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2256     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2257 
2258     /* Find out local edges and build a local SF */
2259     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2260     for (i = lnleaves = 0; i < nleaves; i++) {
2261       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2262       if (irank == myrank) lnleaves++;
2263     }
2264     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2265     PetscCall(PetscMalloc1(lnleaves, &liremote));
2266 
2267     for (i = j = 0; i < nleaves; i++) {
2268       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2269       if (irank == myrank) {
2270         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2271         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2272         liremote[j].index = iremote[i].index;
2273         j++;
2274       }
2275     }
2276     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2277     PetscCall(PetscSFSetFromOptions(lsf));
2278     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2279     PetscCall(PetscSFSetUp(lsf));
2280     *out = lsf;
2281   }
2282   PetscFunctionReturn(PETSC_SUCCESS);
2283 }
2284 
2285 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2286 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2287 {
2288   PetscMemType rootmtype, leafmtype;
2289 
2290   PetscFunctionBegin;
2291   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2292   PetscCall(PetscSFSetUp(sf));
2293   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2294   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2295   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2296   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2297   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2298   PetscFunctionReturn(PETSC_SUCCESS);
2299 }
2300 
2301 /*@
2302   PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF`
2303 
2304   Input Parameters:
2305 + comm        - the communicator
2306 . nsfs        - the number of input `PetscSF`
2307 . sfs         - the array of input `PetscSF`
2308 . rootMode    - the root mode specifying how roots are handled
2309 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2310 
2311   Output Parameter:
2312 . newsf - The resulting `PetscSF`
2313 
2314   Level: advanced
2315 
2316   Notes:
2317   The communicator of all `PetscSF`s in `sfs` must be comm.
2318 
2319   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2320 
2321   The offsets in `leafOffsets` are added to the original leaf indices.
2322 
2323   If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2324   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2325 
2326   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2327   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2328 
2329   All root modes retain the essential connectivity condition.
2330 
2331   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2332 
2333   Parameter `rootMode` controls how the input root spaces are combined.
2334   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2335   and is also the same in the output `PetscSF`.
2336 
2337   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2338   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2339   roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2340   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2341   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2342   the original root MPI processes are ignored.
2343   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2344   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process
2345   to keep the load balancing.
2346   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes.
2347 
2348   Example:
2349   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2350 .vb
2351   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2352   for m in {local,global,shared}; do
2353     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2354   done
2355 .ve
2356   we generate two identical `PetscSF`s sf_0 and sf_1,
2357 .vb
2358   PetscSF Object: sf_0 2 MPI processes
2359     type: basic
2360     rank #leaves #roots
2361     [ 0]       4      2
2362     [ 1]       4      2
2363     leaves      roots       roots in global numbering
2364     ( 0,  0) <- ( 0,  0)  =   0
2365     ( 0,  1) <- ( 0,  1)  =   1
2366     ( 0,  2) <- ( 1,  0)  =   2
2367     ( 0,  3) <- ( 1,  1)  =   3
2368     ( 1,  0) <- ( 0,  0)  =   0
2369     ( 1,  1) <- ( 0,  1)  =   1
2370     ( 1,  2) <- ( 1,  0)  =   2
2371     ( 1,  3) <- ( 1,  1)  =   3
2372 .ve
2373   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2374 .vb
2375   rootMode = local:
2376   PetscSF Object: result_sf 2 MPI processes
2377     type: basic
2378     rank #leaves #roots
2379     [ 0]       8      4
2380     [ 1]       8      4
2381     leaves      roots       roots in global numbering
2382     ( 0,  0) <- ( 0,  0)  =   0
2383     ( 0,  1) <- ( 0,  1)  =   1
2384     ( 0,  2) <- ( 1,  0)  =   4
2385     ( 0,  3) <- ( 1,  1)  =   5
2386     ( 0,  4) <- ( 0,  2)  =   2
2387     ( 0,  5) <- ( 0,  3)  =   3
2388     ( 0,  6) <- ( 1,  2)  =   6
2389     ( 0,  7) <- ( 1,  3)  =   7
2390     ( 1,  0) <- ( 0,  0)  =   0
2391     ( 1,  1) <- ( 0,  1)  =   1
2392     ( 1,  2) <- ( 1,  0)  =   4
2393     ( 1,  3) <- ( 1,  1)  =   5
2394     ( 1,  4) <- ( 0,  2)  =   2
2395     ( 1,  5) <- ( 0,  3)  =   3
2396     ( 1,  6) <- ( 1,  2)  =   6
2397     ( 1,  7) <- ( 1,  3)  =   7
2398 
2399   rootMode = global:
2400   PetscSF Object: result_sf 2 MPI processes
2401     type: basic
2402     rank #leaves #roots
2403     [ 0]       8      4
2404     [ 1]       8      4
2405     leaves      roots       roots in global numbering
2406     ( 0,  0) <- ( 0,  0)  =   0
2407     ( 0,  1) <- ( 0,  1)  =   1
2408     ( 0,  2) <- ( 0,  2)  =   2
2409     ( 0,  3) <- ( 0,  3)  =   3
2410     ( 0,  4) <- ( 1,  0)  =   4
2411     ( 0,  5) <- ( 1,  1)  =   5
2412     ( 0,  6) <- ( 1,  2)  =   6
2413     ( 0,  7) <- ( 1,  3)  =   7
2414     ( 1,  0) <- ( 0,  0)  =   0
2415     ( 1,  1) <- ( 0,  1)  =   1
2416     ( 1,  2) <- ( 0,  2)  =   2
2417     ( 1,  3) <- ( 0,  3)  =   3
2418     ( 1,  4) <- ( 1,  0)  =   4
2419     ( 1,  5) <- ( 1,  1)  =   5
2420     ( 1,  6) <- ( 1,  2)  =   6
2421     ( 1,  7) <- ( 1,  3)  =   7
2422 
2423   rootMode = shared:
2424   PetscSF Object: result_sf 2 MPI processes
2425     type: basic
2426     rank #leaves #roots
2427     [ 0]       8      2
2428     [ 1]       8      2
2429     leaves      roots       roots in global numbering
2430     ( 0,  0) <- ( 0,  0)  =   0
2431     ( 0,  1) <- ( 0,  1)  =   1
2432     ( 0,  2) <- ( 1,  0)  =   2
2433     ( 0,  3) <- ( 1,  1)  =   3
2434     ( 0,  4) <- ( 0,  0)  =   0
2435     ( 0,  5) <- ( 0,  1)  =   1
2436     ( 0,  6) <- ( 1,  0)  =   2
2437     ( 0,  7) <- ( 1,  1)  =   3
2438     ( 1,  0) <- ( 0,  0)  =   0
2439     ( 1,  1) <- ( 0,  1)  =   1
2440     ( 1,  2) <- ( 1,  0)  =   2
2441     ( 1,  3) <- ( 1,  1)  =   3
2442     ( 1,  4) <- ( 0,  0)  =   0
2443     ( 1,  5) <- ( 0,  1)  =   1
2444     ( 1,  6) <- ( 1,  0)  =   2
2445     ( 1,  7) <- ( 1,  1)  =   3
2446 .ve
2447 
2448 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2449 @*/
2450 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2451 {
2452   PetscInt     i, s, nLeaves, nRoots;
2453   PetscInt    *leafArrayOffsets;
2454   PetscInt    *ilocal_new;
2455   PetscSFNode *iremote_new;
2456   PetscBool    all_ilocal_null = PETSC_FALSE;
2457   PetscLayout  glayout         = NULL;
2458   PetscInt    *gremote         = NULL;
2459   PetscMPIInt  rank, size;
2460 
2461   PetscFunctionBegin;
2462   if (PetscDefined(USE_DEBUG)) {
2463     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2464 
2465     PetscCall(PetscSFCreate(comm, &dummy));
2466     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2467     PetscAssertPointer(sfs, 3);
2468     for (i = 0; i < nsfs; i++) {
2469       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2470       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2471     }
2472     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
2473     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2474     PetscAssertPointer(newsf, 6);
2475     PetscCall(PetscSFDestroy(&dummy));
2476   }
2477   if (!nsfs) {
2478     PetscCall(PetscSFCreate(comm, newsf));
2479     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2480     PetscFunctionReturn(PETSC_SUCCESS);
2481   }
2482   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2483   PetscCallMPI(MPI_Comm_size(comm, &size));
2484 
2485   /* Calculate leaf array offsets */
2486   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2487   leafArrayOffsets[0] = 0;
2488   for (s = 0; s < nsfs; s++) {
2489     PetscInt nl;
2490 
2491     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2492     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2493   }
2494   nLeaves = leafArrayOffsets[nsfs];
2495 
2496   /* Calculate number of roots */
2497   switch (rootMode) {
2498   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2499     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2500     if (PetscDefined(USE_DEBUG)) {
2501       for (s = 1; s < nsfs; s++) {
2502         PetscInt nr;
2503 
2504         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2505         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2506       }
2507     }
2508   } break;
2509   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2510     /* Calculate also global layout in this case */
2511     PetscInt    *nls;
2512     PetscLayout *lts;
2513     PetscInt   **inds;
2514     PetscInt     j;
2515     PetscInt     rootOffset = 0;
2516 
2517     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2518     PetscCall(PetscLayoutCreate(comm, &glayout));
2519     glayout->bs = 1;
2520     glayout->n  = 0;
2521     glayout->N  = 0;
2522     for (s = 0; s < nsfs; s++) {
2523       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2524       glayout->n += lts[s]->n;
2525       glayout->N += lts[s]->N;
2526     }
2527     PetscCall(PetscLayoutSetUp(glayout));
2528     PetscCall(PetscMalloc1(nLeaves, &gremote));
2529     for (s = 0, j = 0; s < nsfs; s++) {
2530       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2531       rootOffset += lts[s]->N;
2532       PetscCall(PetscLayoutDestroy(&lts[s]));
2533       PetscCall(PetscFree(inds[s]));
2534     }
2535     PetscCall(PetscFree3(lts, nls, inds));
2536     nRoots = glayout->N;
2537   } break;
2538   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2539     /* nRoots calculated later in this case */
2540     break;
2541   default:
2542     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2543   }
2544 
2545   if (!leafOffsets) {
2546     all_ilocal_null = PETSC_TRUE;
2547     for (s = 0; s < nsfs; s++) {
2548       const PetscInt *ilocal;
2549 
2550       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2551       if (ilocal) {
2552         all_ilocal_null = PETSC_FALSE;
2553         break;
2554       }
2555     }
2556     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2557   }
2558 
2559   /* Renumber and concatenate local leaves */
2560   ilocal_new = NULL;
2561   if (!all_ilocal_null) {
2562     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2563     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2564     for (s = 0; s < nsfs; s++) {
2565       const PetscInt *ilocal;
2566       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2567       PetscInt        i, nleaves_l;
2568 
2569       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2570       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2571     }
2572   }
2573 
2574   /* Renumber and concatenate remote roots */
2575   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2576     PetscInt rootOffset = 0;
2577 
2578     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2579     for (i = 0; i < nLeaves; i++) {
2580       iremote_new[i].rank  = -1;
2581       iremote_new[i].index = -1;
2582     }
2583     for (s = 0; s < nsfs; s++) {
2584       PetscInt           i, nl, nr;
2585       PetscSF            tmp_sf;
2586       const PetscSFNode *iremote;
2587       PetscSFNode       *tmp_rootdata;
2588       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2589 
2590       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2591       PetscCall(PetscSFCreate(comm, &tmp_sf));
2592       /* create helper SF with contiguous leaves */
2593       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2594       PetscCall(PetscSFSetUp(tmp_sf));
2595       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2596       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2597         for (i = 0; i < nr; i++) {
2598           tmp_rootdata[i].index = i + rootOffset;
2599           tmp_rootdata[i].rank  = rank;
2600         }
2601         rootOffset += nr;
2602       } else {
2603         for (i = 0; i < nr; i++) {
2604           tmp_rootdata[i].index = i;
2605           tmp_rootdata[i].rank  = rank;
2606         }
2607       }
2608       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2609       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2610       PetscCall(PetscSFDestroy(&tmp_sf));
2611       PetscCall(PetscFree(tmp_rootdata));
2612     }
2613     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2614 
2615     /* Build the new SF */
2616     PetscCall(PetscSFCreate(comm, newsf));
2617     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2618   } else {
2619     /* Build the new SF */
2620     PetscCall(PetscSFCreate(comm, newsf));
2621     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2622   }
2623   PetscCall(PetscSFSetUp(*newsf));
2624   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2625   PetscCall(PetscLayoutDestroy(&glayout));
2626   PetscCall(PetscFree(gremote));
2627   PetscCall(PetscFree(leafArrayOffsets));
2628   PetscFunctionReturn(PETSC_SUCCESS);
2629 }
2630 
2631 /*@
2632   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications.
2633 
2634   Collective
2635 
2636   Input Parameters:
2637 + sf       - star forest
2638 . unit     - the data type contained within `rootdata` and `leafdata`
2639 . rootdata - root data that will be used for multiple `PetscSF` communications
2640 - leafdata - leaf data that will be used for multiple `PetscSF` communications
2641 
2642   Level: advanced
2643 
2644   Notes:
2645   Implementations of `PetscSF` can make optimizations
2646   for repeated communication using the same memory regions, but these optimizations
2647   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2648   The intended pattern is
2649 
2650 .vb
2651   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2652 
2653   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2654   // repeated use of rootdata and leafdata will now be optimized
2655 
2656   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2657   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2658   // ...
2659   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2660   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2661   // ... (other communications)
2662 
2663   // rootdata and leafdata must be deregistered before freeing
2664   // skipping this can lead to undefined behavior including
2665   // deadlocks
2666   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2667 
2668   // it is now safe to free rootdata and leafdata
2669   PetscFree2(rootdata, leafdata);
2670 .ve
2671 
2672   If you do not register `rootdata` and `leafdata` it will not cause an error,
2673   but optimizations that reduce the setup time for each communication cannot be
2674   made.  Currently, the only implementation of `PetscSF` that benefits from
2675   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2676   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2677 
2678 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2679 @*/
2680 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2681 {
2682   PetscFunctionBegin;
2683   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2684   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2685   PetscFunctionReturn(PETSC_SUCCESS);
2686 }
2687 
2688 /*@
2689   PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded.
2690 
2691   Collective
2692 
2693   Input Parameters:
2694 + sf       - star forest
2695 . unit     - the data type contained within the `rootdata` and `leafdata`
2696 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2697 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2698 
2699   Level: advanced
2700 
2701   Note:
2702   See `PetscSFRegisterPersistent()` for when and how to use this function.
2703 
2704 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2705 @*/
2706 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2707 {
2708   PetscFunctionBegin;
2709   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2710   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2715 {
2716   MPI_Aint lb, lb_true, bytes, bytes_true;
2717 
2718   PetscFunctionBegin;
2719   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2720   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2721   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2722   *size = bytes;
2723   PetscFunctionReturn(PETSC_SUCCESS);
2724 }
2725