xref: /petsc/src/vec/is/sf/interface/sf.c (revision f0edcda5f8dd35a8a57e6c38b630fdcc5eaed472)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <petscdevice_cuda.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 extern void PetscSFCheckGraphSet(PetscSF, int);
16 #else
17   #if defined(PETSC_USE_DEBUG)
18     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
19   #else
20     #define PetscSFCheckGraphSet(sf, arg) \
21       do { \
22       } while (0)
23   #endif
24 #endif
25 
26 const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
27 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
28 
29 /*@
30   PetscSFCreate - create a star forest communication context
31 
32   Collective
33 
34   Input Parameter:
35 . comm - communicator on which the star forest will operate
36 
37   Output Parameter:
38 . sf - new star forest context
39 
40   Options Database Key:
41 + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
42 . -sf_type window                - Use MPI-3 one-sided window for communication
43 . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
44 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
45 
46   Level: intermediate
47 
48   Note:
49   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
50   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
51   `PetscSF`s are optimized and have better performance than the general `PetscSF`s.
52 
53 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
54 @*/
55 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
56 {
57   PetscSF b;
58 
59   PetscFunctionBegin;
60   PetscAssertPointer(sf, 2);
61   PetscCall(PetscSFInitializePackage());
62 
63   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_INT_MAX;
67   b->maxleaf   = PETSC_INT_MIN;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream = PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78   b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80   b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82   b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
89   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95   *sf             = b;
96   PetscFunctionReturn(PETSC_SUCCESS);
97 }
98 
99 /*@
100   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102   Collective
103 
104   Input Parameter:
105 . sf - star forest
106 
107   Level: advanced
108 
109 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
115   PetscTryTypeMethod(sf, Reset);
116   PetscCall(PetscSFDestroy(&sf->rankssf));
117 
118   sf->nroots   = -1;
119   sf->nleaves  = -1;
120   sf->minleaf  = PETSC_INT_MAX;
121   sf->maxleaf  = PETSC_INT_MIN;
122   sf->mine     = NULL;
123   sf->remote   = NULL;
124   sf->graphset = PETSC_FALSE;
125   PetscCall(PetscFree(sf->mine_alloc));
126   PetscCall(PetscFree(sf->remote_alloc));
127   sf->nranks = -1;
128   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
129   sf->degreeknown = PETSC_FALSE;
130   PetscCall(PetscFree(sf->degree));
131   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
132   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
133 
134   if (sf->multi) sf->multi->multi = NULL;
135   PetscCall(PetscSFDestroy(&sf->multi));
136 
137   PetscCall(PetscLayoutDestroy(&sf->map));
138 
139 #if defined(PETSC_HAVE_DEVICE)
140   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
141 #endif
142 
143   sf->setupcalled = PETSC_FALSE;
144   PetscFunctionReturn(PETSC_SUCCESS);
145 }
146 
147 /*@
148   PetscSFSetType - Set the `PetscSF` communication implementation
149 
150   Collective
151 
152   Input Parameters:
153 + sf   - the `PetscSF` context
154 - type - a known method
155 .vb
156     PETSCSFWINDOW - MPI-2/3 one-sided
157     PETSCSFBASIC - basic implementation using MPI-1 two-sided
158 .ve
159 
160   Options Database Key:
161 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
162 
163   Level: intermediate
164 
165   Notes:
166   See `PetscSFType` for possible values
167 
168 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`
169 @*/
170 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
171 {
172   PetscBool match;
173   PetscErrorCode (*r)(PetscSF);
174 
175   PetscFunctionBegin;
176   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
177   PetscAssertPointer(type, 2);
178 
179   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
180   if (match) PetscFunctionReturn(PETSC_SUCCESS);
181 
182   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
183   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
184   /* Destroy the previous PetscSF implementation context */
185   PetscTryTypeMethod(sf, Destroy);
186   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
187   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
188   PetscCall((*r)(sf));
189   PetscFunctionReturn(PETSC_SUCCESS);
190 }
191 
192 /*@
193   PetscSFGetType - Get the `PetscSF` communication implementation
194 
195   Not Collective
196 
197   Input Parameter:
198 . sf - the `PetscSF` context
199 
200   Output Parameter:
201 . type - the `PetscSF` type name
202 
203   Level: intermediate
204 
205 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
206 @*/
207 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
208 {
209   PetscFunctionBegin;
210   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
211   PetscAssertPointer(type, 2);
212   *type = ((PetscObject)sf)->type_name;
213   PetscFunctionReturn(PETSC_SUCCESS);
214 }
215 
216 /*@
217   PetscSFDestroy - destroy a star forest
218 
219   Collective
220 
221   Input Parameter:
222 . sf - address of star forest
223 
224   Level: intermediate
225 
226 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
227 @*/
228 PetscErrorCode PetscSFDestroy(PetscSF *sf)
229 {
230   PetscFunctionBegin;
231   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
232   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
233   if (--((PetscObject)*sf)->refct > 0) {
234     *sf = NULL;
235     PetscFunctionReturn(PETSC_SUCCESS);
236   }
237   PetscCall(PetscSFReset(*sf));
238   PetscTryTypeMethod(*sf, Destroy);
239   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
240   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
241 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
242   if ((*sf)->use_stream_aware_mpi) {
243     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
244     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
245   }
246 #endif
247   PetscCall(PetscHeaderDestroy(sf));
248   PetscFunctionReturn(PETSC_SUCCESS);
249 }
250 
251 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
252 {
253   PetscInt           i, nleaves;
254   PetscMPIInt        size;
255   const PetscInt    *ilocal;
256   const PetscSFNode *iremote;
257 
258   PetscFunctionBegin;
259   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
260   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
261   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
262   for (i = 0; i < nleaves; i++) {
263     const PetscInt rank   = iremote[i].rank;
264     const PetscInt remote = iremote[i].index;
265     const PetscInt leaf   = ilocal ? ilocal[i] : i;
266     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
267     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
268     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
269   }
270   PetscFunctionReturn(PETSC_SUCCESS);
271 }
272 
273 /*@
274   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
275 
276   Collective
277 
278   Input Parameter:
279 . sf - star forest communication object
280 
281   Level: beginner
282 
283 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
284 @*/
285 PetscErrorCode PetscSFSetUp(PetscSF sf)
286 {
287   PetscFunctionBegin;
288   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
289   PetscSFCheckGraphSet(sf, 1);
290   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
291   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
292   PetscCall(PetscSFCheckGraphValid_Private(sf));
293   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
294   PetscTryTypeMethod(sf, SetUp);
295 #if defined(PETSC_HAVE_CUDA)
296   if (sf->backend == PETSCSF_BACKEND_CUDA) {
297     sf->ops->Malloc = PetscSFMalloc_CUDA;
298     sf->ops->Free   = PetscSFFree_CUDA;
299   }
300 #endif
301 #if defined(PETSC_HAVE_HIP)
302   if (sf->backend == PETSCSF_BACKEND_HIP) {
303     sf->ops->Malloc = PetscSFMalloc_HIP;
304     sf->ops->Free   = PetscSFFree_HIP;
305   }
306 #endif
307 
308 #if defined(PETSC_HAVE_KOKKOS)
309   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
310     sf->ops->Malloc = PetscSFMalloc_Kokkos;
311     sf->ops->Free   = PetscSFFree_Kokkos;
312   }
313 #endif
314   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
315   sf->setupcalled = PETSC_TRUE;
316   PetscFunctionReturn(PETSC_SUCCESS);
317 }
318 
319 /*@
320   PetscSFSetFromOptions - set `PetscSF` options using the options database
321 
322   Logically Collective
323 
324   Input Parameter:
325 . sf - star forest
326 
327   Options Database Keys:
328 + -sf_type                      - implementation type, see `PetscSFSetType()`
329 . -sf_rank_order                - sort composite points for gathers and scatters in MPI rank order, gathers are non-deterministic otherwise
330 . -sf_use_default_stream        - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
331                                   use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
332                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
333 . -sf_use_stream_aware_mpi      - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
334                                   If true, this option only works with `-use_gpu_aware_mpi 1`.
335 
336 - -sf_backend <cuda,hip,kokkos> - Select the device backend`PetscSF` uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
337                                   On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
338                                   the only available is kokkos.
339 
340   Level: intermediate
341 
342 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
343 @*/
344 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
345 {
346   PetscSFType deft;
347   char        type[256];
348   PetscBool   flg;
349 
350   PetscFunctionBegin;
351   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
352   PetscObjectOptionsBegin((PetscObject)sf);
353   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
354   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
355   PetscCall(PetscSFSetType(sf, flg ? type : deft));
356   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
357   PetscCall(PetscOptionsBool("-sf_monitor", "monitor the MPI communication in sf", NULL, sf->monitor, &sf->monitor, NULL));
358 #if defined(PETSC_HAVE_DEVICE)
359   {
360     char      backendstr[32] = {0};
361     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
362     /* Change the defaults set in PetscSFCreate() with command line options */
363     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
364     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
365     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
366     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
367     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
368     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
369   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
370     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
371     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
372     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
373     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
374   #elif defined(PETSC_HAVE_KOKKOS)
375     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376   #endif
377 
378   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
379     if (sf->use_stream_aware_mpi) {
380       MPI_Info info;
381 
382       PetscCallMPI(MPI_Info_create(&info));
383       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
384       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
385       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
386       PetscCallMPI(MPI_Info_free(&info));
387       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
388     }
389   #endif
390   }
391 #endif
392   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
393   PetscOptionsEnd();
394   PetscFunctionReturn(PETSC_SUCCESS);
395 }
396 
397 /*@
398   PetscSFSetRankOrder - sort multi-points for gathers and scatters by MPI rank order
399 
400   Logically Collective
401 
402   Input Parameters:
403 + sf  - star forest
404 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (false has a lower setup cost, but is non-deterministic)
405 
406   Level: advanced
407 
408 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
409 @*/
410 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
411 {
412   PetscFunctionBegin;
413   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
414   PetscValidLogicalCollectiveBool(sf, flg, 2);
415   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
416   sf->rankorder = flg;
417   PetscFunctionReturn(PETSC_SUCCESS);
418 }
419 
420 /*@
421   PetscSFSetGraph - Set a parallel star forest
422 
423   Collective
424 
425   Input Parameters:
426 + sf         - star forest
427 . nroots     - number of root vertices on the current MPI process (these are possible targets for other process to attach leaves)
428 . nleaves    - number of leaf vertices on the current MPI process, each of these references a root on any process
429 . ilocal     - locations of leaves in leafdata buffers (locations must be >= 0, enforced during setup in debug mode), pass `NULL` for contiguous storage (same as passing (0, 1, 2, ..., nleaves-1))
430 . localmode  - copy mode for `ilocal`
431 . iremote    - remote locations of root vertices for each leaf on the current process, length is `nleaves' (locations must be >= 0, enforced during setup in debug mode)
432 - remotemode - copy mode for `iremote`
433 
434   Level: intermediate
435 
436   Notes:
437   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
438 
439   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
440   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
441   PETSc might modify the respective array;
442   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
443   If `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
444 
445   `PetscSFSetGraphWithPattern()` is an alternative approach to provide certain communication patterns that have extra optimizations.
446 
447   Fortran Notes:
448   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
449 
450   Developer Notes:
451   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
452   This also allows to compare leaf sets of two `PetscSF`s easily.
453 
454 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`, `PetscSFSetGraphWithPattern()`
455 @*/
456 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt ilocal[], PetscCopyMode localmode, PetscSFNode iremote[], PetscCopyMode remotemode)
457 {
458   PetscBool unique, contiguous;
459 
460   PetscFunctionBegin;
461   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
462   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
463   if (nleaves > 0) PetscAssertPointer(iremote, 6);
464   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
465   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
466   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
467    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
468   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
469   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
470 
471   if (sf->nroots >= 0) { /* Reset only if graph already set */
472     PetscCall(PetscSFReset(sf));
473   }
474 
475   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
476   if (PetscDefined(USE_DEBUG)) {
477     PetscMPIInt size;
478 
479     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
480     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
481   }
482 
483   sf->nroots  = nroots;
484   sf->nleaves = nleaves;
485 
486   if (localmode == PETSC_COPY_VALUES && ilocal) {
487     PetscInt *tlocal = NULL;
488 
489     PetscCall(PetscMalloc1(nleaves, &tlocal));
490     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
491     ilocal = tlocal;
492   }
493   if (remotemode == PETSC_COPY_VALUES) {
494     PetscSFNode *tremote = NULL;
495 
496     PetscCall(PetscMalloc1(nleaves, &tremote));
497     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
498     iremote = tremote;
499   }
500 
501   if (nleaves && ilocal) {
502     PetscSFNode work;
503 
504     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
505     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
506     unique = PetscNot(unique);
507     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
508     sf->minleaf = ilocal[0];
509     sf->maxleaf = ilocal[nleaves - 1];
510     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
511   } else {
512     sf->minleaf = 0;
513     sf->maxleaf = nleaves - 1;
514     unique      = PETSC_TRUE;
515     contiguous  = PETSC_TRUE;
516   }
517 
518   if (contiguous) {
519     if (localmode == PETSC_USE_POINTER) {
520       ilocal = NULL;
521     } else {
522       PetscCall(PetscFree(ilocal));
523     }
524   }
525   sf->mine = ilocal;
526   if (localmode == PETSC_USE_POINTER) {
527     sf->mine_alloc = NULL;
528   } else {
529     sf->mine_alloc = ilocal;
530   }
531   if (PetscDefined(USE_DEBUG)) {
532     PetscMPIInt size;
533 
534     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
535     for (PetscInt i = 0; i < nleaves; i++) PetscCheck(iremote[i].rank >= -1 && iremote[i].rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "iremote contains incorrect rank values");
536   }
537   sf->remote = iremote;
538   if (remotemode == PETSC_USE_POINTER) {
539     sf->remote_alloc = NULL;
540   } else {
541     sf->remote_alloc = iremote;
542   }
543   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
544   sf->graphset = PETSC_TRUE;
545   PetscFunctionReturn(PETSC_SUCCESS);
546 }
547 
548 /*@
549   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
550 
551   Collective
552 
553   Input Parameters:
554 + sf      - The `PetscSF`
555 . map     - Layout of roots over all processes (not used when pattern is `PETSCSF_PATTERN_ALLTOALL`)
556 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
557 
558   Level: intermediate
559 
560   Notes:
561   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `root` and its `PetscLayout` is `map`.
562   `n` and `N` are the local and global sizes of `root` respectively.
563 
564   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to
565   sequential vectors `leaves` on all MPI processes.
566 
567   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on it, it will copy `root` to a
568   sequential vector `leaves` on MPI rank 0.
569 
570   With `PETSCSF_PATTERN_ALLTOALL`, map is not used. Suppose NP is the size of `sf`'s communicator. The routine
571   creates a graph where every MPI process has NP leaves and NP roots. On MPI rank i, its leaf j is connected to root i
572   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
573   not mean one can not send multiple items. One needs to create a new MPI datatype for the multiple data
574   items with `MPI_Type_contiguous` and use that as the <unit> argument in the `PetscSF` routines. In this case, roots and leaves are symmetric.
575 
576 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
577  @*/
578 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
579 {
580   MPI_Comm    comm;
581   PetscInt    n, N, res[2];
582   PetscMPIInt rank, size;
583   PetscSFType type;
584 
585   PetscFunctionBegin;
586   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
587   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
588   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
589   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
590   PetscCallMPI(MPI_Comm_rank(comm, &rank));
591   PetscCallMPI(MPI_Comm_size(comm, &size));
592 
593   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
594     PetscInt sizei = size;
595 
596     type = PETSCSFALLTOALL;
597     PetscCall(PetscLayoutCreate(comm, &sf->map));
598     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
599     PetscCall(PetscLayoutSetSize(sf->map, PetscSqr(sizei)));
600     PetscCall(PetscLayoutSetUp(sf->map));
601   } else {
602     PetscCall(PetscLayoutGetLocalSize(map, &n));
603     PetscCall(PetscLayoutGetSize(map, &N));
604     res[0] = n;
605     res[1] = -n;
606     /* Check if n are same over all ranks so that we can optimize it */
607     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
608     if (res[0] == -res[1]) { /* same n */
609       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
610     } else {
611       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
612     }
613     PetscCall(PetscLayoutReference(map, &sf->map));
614   }
615   PetscCall(PetscSFSetType(sf, type));
616 
617   sf->pattern = pattern;
618   sf->mine    = NULL; /* Contiguous */
619 
620   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
621      Also set other easy stuff.
622    */
623   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
624     sf->nleaves = N;
625     sf->nroots  = n;
626     sf->nranks  = size;
627     sf->minleaf = 0;
628     sf->maxleaf = N - 1;
629   } else if (pattern == PETSCSF_PATTERN_GATHER) {
630     sf->nleaves = rank ? 0 : N;
631     sf->nroots  = n;
632     sf->nranks  = rank ? 0 : size;
633     sf->minleaf = 0;
634     sf->maxleaf = rank ? -1 : N - 1;
635   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
636     sf->nleaves = size;
637     sf->nroots  = size;
638     sf->nranks  = size;
639     sf->minleaf = 0;
640     sf->maxleaf = size - 1;
641   }
642   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
643   sf->graphset = PETSC_TRUE;
644   PetscFunctionReturn(PETSC_SUCCESS);
645 }
646 
647 /*@
648   PetscSFCreateInverseSF - given a `PetscSF` in which all roots have degree 1 (exactly one leaf), creates the inverse map
649 
650   Collective
651 
652   Input Parameter:
653 . sf - star forest to invert
654 
655   Output Parameter:
656 . isf - inverse of `sf`
657 
658   Level: advanced
659 
660   Notes:
661   All roots must have degree 1.
662 
663   The local space may be a permutation, but cannot be sparse.
664 
665 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
666 @*/
667 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
668 {
669   PetscMPIInt     rank;
670   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
671   const PetscInt *ilocal;
672   PetscSFNode    *roots, *leaves;
673 
674   PetscFunctionBegin;
675   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
676   PetscSFCheckGraphSet(sf, 1);
677   PetscAssertPointer(isf, 2);
678 
679   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
680   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
681 
682   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
683   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
684   for (i = 0; i < maxlocal; i++) {
685     leaves[i].rank  = rank;
686     leaves[i].index = i;
687   }
688   for (i = 0; i < nroots; i++) {
689     roots[i].rank  = -1;
690     roots[i].index = -1;
691   }
692   PetscCall(PetscSFReduceBegin(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
693   PetscCall(PetscSFReduceEnd(sf, MPIU_SF_NODE, leaves, roots, MPI_REPLACE));
694 
695   /* Check whether our leaves are sparse */
696   for (i = 0, count = 0; i < nroots; i++)
697     if (roots[i].rank >= 0) count++;
698   if (count == nroots) newilocal = NULL;
699   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
700     for (i = 0, count = 0; i < nroots; i++) {
701       if (roots[i].rank >= 0) {
702         newilocal[count]   = i;
703         roots[count].rank  = roots[i].rank;
704         roots[count].index = roots[i].index;
705         count++;
706       }
707     }
708   }
709 
710   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
711   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
712   PetscCall(PetscFree2(roots, leaves));
713   PetscFunctionReturn(PETSC_SUCCESS);
714 }
715 
716 /*@
717   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
718 
719   Collective
720 
721   Input Parameters:
722 + sf  - communication object to duplicate
723 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
724 
725   Output Parameter:
726 . newsf - new communication object
727 
728   Level: beginner
729 
730 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
731 @*/
732 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
733 {
734   PetscSFType  type;
735   MPI_Datatype dtype = MPIU_SCALAR;
736 
737   PetscFunctionBegin;
738   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
739   PetscValidLogicalCollectiveEnum(sf, opt, 2);
740   PetscAssertPointer(newsf, 3);
741   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
742   PetscCall(PetscSFGetType(sf, &type));
743   if (type) PetscCall(PetscSFSetType(*newsf, type));
744   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
745   if (opt == PETSCSF_DUPLICATE_GRAPH) {
746     PetscSFCheckGraphSet(sf, 1);
747     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
748       PetscInt           nroots, nleaves;
749       const PetscInt    *ilocal;
750       const PetscSFNode *iremote;
751       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
752       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
753     } else {
754       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
755     }
756   }
757   /* Since oldtype is committed, so is newtype, according to MPI */
758   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
759   (*newsf)->vscat.bs     = sf->vscat.bs;
760   (*newsf)->vscat.unit   = dtype;
761   (*newsf)->vscat.to_n   = sf->vscat.to_n;
762   (*newsf)->vscat.from_n = sf->vscat.from_n;
763   /* Do not copy lsf. Build it on demand since it is rarely used */
764 
765 #if defined(PETSC_HAVE_DEVICE)
766   (*newsf)->backend              = sf->backend;
767   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
768   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
769   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
770 #endif
771   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
772   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
773   PetscFunctionReturn(PETSC_SUCCESS);
774 }
775 
776 /*@C
777   PetscSFGetGraph - Get the graph specifying a parallel star forest
778 
779   Not Collective
780 
781   Input Parameter:
782 . sf - star forest
783 
784   Output Parameters:
785 + nroots  - number of root vertices on the current process (these are possible targets for other MPI process to attach leaves)
786 . nleaves - number of leaf vertices on the current process, each of these references a root on any MPI process
787 . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
788 - iremote - remote locations of root vertices for each leaf on the current process
789 
790   Level: intermediate
791 
792   Notes:
793   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
794 
795   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
796 
797   Fortran Note:
798   Use `PetscSFRestoreGraph()` when access to the arrays is no longer needed
799 
800 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
801 @*/
802 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt *ilocal[], const PetscSFNode *iremote[])
803 {
804   PetscFunctionBegin;
805   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
806   if (sf->ops->GetGraph) {
807     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
808   } else {
809     if (nroots) *nroots = sf->nroots;
810     if (nleaves) *nleaves = sf->nleaves;
811     if (ilocal) *ilocal = sf->mine;
812     if (iremote) *iremote = sf->remote;
813   }
814   PetscFunctionReturn(PETSC_SUCCESS);
815 }
816 
817 /*@
818   PetscSFGetLeafRange - Get the active leaf ranges
819 
820   Not Collective
821 
822   Input Parameter:
823 . sf - star forest
824 
825   Output Parameters:
826 + minleaf - minimum active leaf on this MPI process. Returns 0 if there are no leaves.
827 - maxleaf - maximum active leaf on this MPI process. Returns -1 if there are no leaves.
828 
829   Level: developer
830 
831 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
832 @*/
833 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
834 {
835   PetscFunctionBegin;
836   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
837   PetscSFCheckGraphSet(sf, 1);
838   if (minleaf) *minleaf = sf->minleaf;
839   if (maxleaf) *maxleaf = sf->maxleaf;
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 /*@
844   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
845 
846   Collective
847 
848   Input Parameters:
849 + A    - the star forest
850 . obj  - Optional object that provides the prefix for the option names
851 - name - command line option
852 
853   Level: intermediate
854 
855   Note:
856   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
857 
858 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
859 @*/
860 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
861 {
862   PetscFunctionBegin;
863   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
864   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
865   PetscFunctionReturn(PETSC_SUCCESS);
866 }
867 
868 /*@
869   PetscSFView - view a star forest
870 
871   Collective
872 
873   Input Parameters:
874 + sf     - star forest
875 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
876 
877   Level: beginner
878 
879 .seealso: [](sec_petscsf), `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
880 @*/
881 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
882 {
883   PetscBool         isascii;
884   PetscViewerFormat format;
885 
886   PetscFunctionBegin;
887   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
888   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
889   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
890   PetscCheckSameComm(sf, 1, viewer, 2);
891   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
892   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
893   if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
894     PetscMPIInt rank;
895     PetscInt    j;
896 
897     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
898     PetscCall(PetscViewerASCIIPushTab(viewer));
899     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
900       if (!sf->graphset) {
901         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
902         PetscCall(PetscViewerASCIIPopTab(viewer));
903         PetscFunctionReturn(PETSC_SUCCESS);
904       }
905       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
906       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
907       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%d\n", rank, sf->nroots, sf->nleaves, sf->nranks));
908       for (PetscInt i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
909       PetscCall(PetscViewerFlush(viewer));
910       PetscCall(PetscViewerGetFormat(viewer, &format));
911       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
912         PetscMPIInt *tmpranks, *perm;
913 
914         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
915         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
916         for (PetscMPIInt i = 0; i < sf->nranks; i++) perm[i] = i;
917         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
918         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
919         for (PetscMPIInt ii = 0; ii < sf->nranks; ii++) {
920           PetscMPIInt i = perm[ii];
921 
922           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
923           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
924         }
925         PetscCall(PetscFree2(tmpranks, perm));
926       }
927       PetscCall(PetscViewerFlush(viewer));
928       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
929     }
930     PetscCall(PetscViewerASCIIPopTab(viewer));
931   }
932   PetscTryTypeMethod(sf, View, viewer);
933   PetscFunctionReturn(PETSC_SUCCESS);
934 }
935 
936 /*@C
937   PetscSFGetRootRanks - Get the root MPI ranks and number of vertices referenced by leaves on this process
938 
939   Not Collective
940 
941   Input Parameter:
942 . sf - star forest
943 
944   Output Parameters:
945 + nranks  - number of MPI processes referenced by local part
946 . ranks   - [`nranks`] array of MPI ranks
947 . roffset - [`nranks`+1] offset in `rmine` and `rremote` for each MPI process
948 . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote MPI process, or `NULL`
949 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote MPI process, or `NULL`
950 
951   Level: developer
952 
953 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetLeafRanks()`
954 @*/
955 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscMPIInt *nranks, const PetscMPIInt *ranks[], const PetscInt *roffset[], const PetscInt *rmine[], const PetscInt *rremote[])
956 {
957   PetscFunctionBegin;
958   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
959   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
960   if (sf->ops->GetRootRanks) {
961     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
962   } else {
963     /* The generic implementation */
964     if (nranks) *nranks = sf->nranks;
965     if (ranks) *ranks = sf->ranks;
966     if (roffset) *roffset = sf->roffset;
967     if (rmine) *rmine = sf->rmine;
968     if (rremote) *rremote = sf->rremote;
969   }
970   PetscFunctionReturn(PETSC_SUCCESS);
971 }
972 
973 /*@C
974   PetscSFGetLeafRanks - Get leaf MPI ranks referencing roots on this process
975 
976   Not Collective
977 
978   Input Parameter:
979 . sf - star forest
980 
981   Output Parameters:
982 + niranks  - number of leaf MPI processes referencing roots on this process
983 . iranks   - [`niranks`] array of MPI ranks
984 . ioffset  - [`niranks`+1] offset in `irootloc` for each MPI process
985 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf MPI process
986 
987   Level: developer
988 
989 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
990 @*/
991 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscMPIInt *niranks, const PetscMPIInt *iranks[], const PetscInt *ioffset[], const PetscInt *irootloc[])
992 {
993   PetscFunctionBegin;
994   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
995   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
996   if (sf->ops->GetLeafRanks) {
997     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
998   } else {
999     PetscSFType type;
1000     PetscCall(PetscSFGetType(sf, &type));
1001     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
1002   }
1003   PetscFunctionReturn(PETSC_SUCCESS);
1004 }
1005 
1006 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1007 {
1008   PetscInt i;
1009   for (i = 0; i < n; i++) {
1010     if (needle == list[i]) return PETSC_TRUE;
1011   }
1012   return PETSC_FALSE;
1013 }
1014 
1015 /*@C
1016   PetscSFSetUpRanks - Set up data structures associated with MPI ranks; this is for internal use by `PetscSF` implementations.
1017 
1018   Collective
1019 
1020   Input Parameters:
1021 + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1022 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1023 
1024   Level: developer
1025 
1026 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetRootRanks()`
1027 @*/
1028 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1029 {
1030   PetscHMapI    table;
1031   PetscHashIter pos;
1032   PetscMPIInt   size, groupsize, *groupranks, *ranks;
1033   PetscInt     *rcount;
1034   PetscInt      irank, sfnrank, ranksi;
1035   PetscMPIInt   i, orank = -1;
1036 
1037   PetscFunctionBegin;
1038   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1039   PetscSFCheckGraphSet(sf, 1);
1040   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1041   PetscCall(PetscHMapICreateWithSize(10, &table));
1042   for (i = 0; i < sf->nleaves; i++) {
1043     /* Log 1-based rank */
1044     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1045   }
1046   PetscCall(PetscHMapIGetSize(table, &sfnrank));
1047   PetscCall(PetscMPIIntCast(sfnrank, &sf->nranks));
1048   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1049   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1050   PetscHashIterBegin(table, pos);
1051   for (i = 0; i < sf->nranks; i++) {
1052     PetscHashIterGetKey(table, pos, ranksi);
1053     PetscCall(PetscMPIIntCast(ranksi, &ranks[i]));
1054     PetscHashIterGetVal(table, pos, rcount[i]);
1055     PetscHashIterNext(table, pos);
1056     ranks[i]--; /* Convert back to 0-based */
1057   }
1058   PetscCall(PetscHMapIDestroy(&table));
1059 
1060   /* We expect that dgroup is reliably "small" while nranks could be large */
1061   {
1062     MPI_Group    group = MPI_GROUP_NULL;
1063     PetscMPIInt *dgroupranks;
1064 
1065     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1066     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1067     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1068     PetscCall(PetscMalloc1(groupsize, &groupranks));
1069     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1070     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1071     PetscCallMPI(MPI_Group_free(&group));
1072     PetscCall(PetscFree(dgroupranks));
1073   }
1074 
1075   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1076   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1077     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1078       if (InList(ranks[i], groupsize, groupranks)) break;
1079     }
1080     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1081       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1082     }
1083     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1084       PetscMPIInt tmprank;
1085       PetscInt    tmpcount;
1086 
1087       tmprank             = ranks[i];
1088       tmpcount            = rcount[i];
1089       ranks[i]            = ranks[sf->ndranks];
1090       rcount[i]           = rcount[sf->ndranks];
1091       ranks[sf->ndranks]  = tmprank;
1092       rcount[sf->ndranks] = tmpcount;
1093       sf->ndranks++;
1094     }
1095   }
1096   PetscCall(PetscFree(groupranks));
1097   PetscCall(PetscSortMPIIntWithIntArray(sf->ndranks, ranks, rcount));
1098   if (rcount) PetscCall(PetscSortMPIIntWithIntArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1099   sf->roffset[0] = 0;
1100   for (i = 0; i < sf->nranks; i++) {
1101     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1102     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1103     rcount[i]          = 0;
1104   }
1105   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1106     /* short circuit */
1107     if (orank != sf->remote[i].rank) {
1108       /* Search for index of iremote[i].rank in sf->ranks */
1109       PetscCall(PetscMPIIntCast(sf->remote[i].rank, &orank));
1110       PetscCall(PetscFindMPIInt(orank, sf->ndranks, sf->ranks, &irank));
1111       if (irank < 0) {
1112         PetscCall(PetscFindMPIInt(orank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1113         if (irank >= 0) irank += sf->ndranks;
1114       }
1115     }
1116     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %d in array", orank);
1117     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1118     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1119     rcount[irank]++;
1120   }
1121   PetscCall(PetscFree2(rcount, ranks));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 /*@C
1126   PetscSFGetGroups - gets incoming and outgoing process groups
1127 
1128   Collective
1129 
1130   Input Parameter:
1131 . sf - star forest
1132 
1133   Output Parameters:
1134 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1135 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1136 
1137   Level: developer
1138 
1139 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1140 @*/
1141 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1142 {
1143   MPI_Group group = MPI_GROUP_NULL;
1144 
1145   PetscFunctionBegin;
1146   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1147   if (sf->ingroup == MPI_GROUP_NULL) {
1148     PetscInt        i;
1149     const PetscInt *indegree;
1150     PetscMPIInt     rank, *outranks, *inranks, indegree0;
1151     PetscSFNode    *remote;
1152     PetscSF         bgcount;
1153 
1154     /* Compute the number of incoming ranks */
1155     PetscCall(PetscMalloc1(sf->nranks, &remote));
1156     for (i = 0; i < sf->nranks; i++) {
1157       remote[i].rank  = sf->ranks[i];
1158       remote[i].index = 0;
1159     }
1160     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1161     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1162     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1163     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1164     /* Enumerate the incoming ranks */
1165     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1166     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1167     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1168     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1169     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1170     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1171     PetscCall(PetscMPIIntCast(indegree[0], &indegree0));
1172     PetscCallMPI(MPI_Group_incl(group, indegree0, inranks, &sf->ingroup));
1173     PetscCallMPI(MPI_Group_free(&group));
1174     PetscCall(PetscFree2(inranks, outranks));
1175     PetscCall(PetscSFDestroy(&bgcount));
1176   }
1177   *incoming = sf->ingroup;
1178 
1179   if (sf->outgroup == MPI_GROUP_NULL) {
1180     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1181     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1182     PetscCallMPI(MPI_Group_free(&group));
1183   }
1184   *outgoing = sf->outgroup;
1185   PetscFunctionReturn(PETSC_SUCCESS);
1186 }
1187 
1188 /*@
1189   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1190 
1191   Collective
1192 
1193   Input Parameter:
1194 . sf - star forest
1195 
1196   Output Parameter:
1197 . rsf - the star forest with a single root per MPI process to perform communications
1198 
1199   Level: developer
1200 
1201 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1202 @*/
1203 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1204 {
1205   PetscFunctionBegin;
1206   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1207   PetscAssertPointer(rsf, 2);
1208   if (!sf->rankssf) {
1209     PetscSFNode       *rremotes;
1210     const PetscMPIInt *ranks;
1211     PetscMPIInt        nranks;
1212 
1213     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1214     PetscCall(PetscMalloc1(nranks, &rremotes));
1215     for (PetscInt i = 0; i < nranks; i++) {
1216       rremotes[i].rank  = ranks[i];
1217       rremotes[i].index = 0;
1218     }
1219     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1220     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1221   }
1222   *rsf = sf->rankssf;
1223   PetscFunctionReturn(PETSC_SUCCESS);
1224 }
1225 
1226 /*@
1227   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1228 
1229   Collective
1230 
1231   Input Parameter:
1232 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1233 
1234   Output Parameter:
1235 . multi - star forest with split roots, such that each root has degree exactly 1 (has one leaf)
1236 
1237   Level: developer
1238 
1239   Note:
1240   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1241   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1242   edge, it is a candidate for future optimization that might involve its removal.
1243 
1244 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1245 @*/
1246 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1247 {
1248   PetscFunctionBegin;
1249   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1250   PetscAssertPointer(multi, 2);
1251   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1252     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1253     *multi           = sf->multi;
1254     sf->multi->multi = sf->multi;
1255     PetscFunctionReturn(PETSC_SUCCESS);
1256   }
1257   if (!sf->multi) {
1258     const PetscInt *indegree;
1259     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1260     PetscSFNode    *remote;
1261     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1262     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1263     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1264     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1265     inoffset[0] = 0;
1266     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1267     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1268     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1269     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1270     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1271     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1272       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1273     }
1274     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1275     for (i = 0; i < sf->nleaves; i++) {
1276       remote[i].rank  = sf->remote[i].rank;
1277       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1278     }
1279     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1280     sf->multi->multi = sf->multi;
1281     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1282     if (sf->rankorder) { /* Sort the ranks */
1283       PetscMPIInt  rank;
1284       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1285       PetscSFNode *newremote;
1286       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1287       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1288       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1289       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1290       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1291       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1292       /* Sort the incoming ranks at each vertex, build the inverse map */
1293       for (i = 0; i < sf->nroots; i++) {
1294         PetscInt j;
1295         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1296         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1297         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1298       }
1299       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1300       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1301       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1302       for (i = 0; i < sf->nleaves; i++) {
1303         newremote[i].rank  = sf->remote[i].rank;
1304         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1305       }
1306       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1307       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1308     }
1309     PetscCall(PetscFree3(inoffset, outones, outoffset));
1310   }
1311   *multi = sf->multi;
1312   PetscFunctionReturn(PETSC_SUCCESS);
1313 }
1314 
1315 /*@C
1316   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1317 
1318   Collective
1319 
1320   Input Parameters:
1321 + sf        - original star forest
1322 . nselected - number of selected roots on this MPI process
1323 - selected  - indices of the selected roots on this MPI process
1324 
1325   Output Parameter:
1326 . esf - new star forest
1327 
1328   Level: advanced
1329 
1330   Note:
1331   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1332   be done by calling PetscSFGetGraph().
1333 
1334 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1335 @*/
1336 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *esf)
1337 {
1338   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1339   const PetscInt    *ilocal;
1340   signed char       *rootdata, *leafdata, *leafmem;
1341   const PetscSFNode *iremote;
1342   PetscSFNode       *new_iremote;
1343   MPI_Comm           comm;
1344 
1345   PetscFunctionBegin;
1346   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1347   PetscSFCheckGraphSet(sf, 1);
1348   if (nselected) PetscAssertPointer(selected, 3);
1349   PetscAssertPointer(esf, 4);
1350 
1351   PetscCall(PetscSFSetUp(sf));
1352   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1353   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1354   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1355 
1356   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1357     PetscBool dups;
1358     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1359     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1360     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1361   }
1362 
1363   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1364   else {
1365     /* A generic version of creating embedded sf */
1366     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1367     maxlocal = maxleaf - minleaf + 1;
1368     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1369     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1370     /* Tag selected roots and bcast to leaves */
1371     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1372     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1373     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1374 
1375     /* Build esf with leaves that are still connected */
1376     esf_nleaves = 0;
1377     for (i = 0; i < nleaves; i++) {
1378       j = ilocal ? ilocal[i] : i;
1379       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1380          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1381       */
1382       esf_nleaves += (leafdata[j] ? 1 : 0);
1383     }
1384     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1385     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1386     for (i = n = 0; i < nleaves; i++) {
1387       j = ilocal ? ilocal[i] : i;
1388       if (leafdata[j]) {
1389         new_ilocal[n]        = j;
1390         new_iremote[n].rank  = iremote[i].rank;
1391         new_iremote[n].index = iremote[i].index;
1392         ++n;
1393       }
1394     }
1395     PetscCall(PetscSFCreate(comm, esf));
1396     PetscCall(PetscSFSetFromOptions(*esf));
1397     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1398     PetscCall(PetscFree2(rootdata, leafmem));
1399   }
1400   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1401   PetscFunctionReturn(PETSC_SUCCESS);
1402 }
1403 
1404 /*@C
1405   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1406 
1407   Collective
1408 
1409   Input Parameters:
1410 + sf        - original star forest
1411 . nselected - number of selected leaves on this MPI process
1412 - selected  - indices of the selected leaves on this MPI process
1413 
1414   Output Parameter:
1415 . newsf - new star forest
1416 
1417   Level: advanced
1418 
1419 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1420 @*/
1421 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt selected[], PetscSF *newsf)
1422 {
1423   const PetscSFNode *iremote;
1424   PetscSFNode       *new_iremote;
1425   const PetscInt    *ilocal;
1426   PetscInt           i, nroots, *leaves, *new_ilocal;
1427   MPI_Comm           comm;
1428 
1429   PetscFunctionBegin;
1430   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1431   PetscSFCheckGraphSet(sf, 1);
1432   if (nselected) PetscAssertPointer(selected, 3);
1433   PetscAssertPointer(newsf, 4);
1434 
1435   /* Uniq selected[] and put results in leaves[] */
1436   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1437   PetscCall(PetscMalloc1(nselected, &leaves));
1438   PetscCall(PetscArraycpy(leaves, selected, nselected));
1439   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1440   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1441 
1442   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1443   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1444   else {
1445     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1446     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1447     PetscCall(PetscMalloc1(nselected, &new_iremote));
1448     for (i = 0; i < nselected; ++i) {
1449       const PetscInt l     = leaves[i];
1450       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1451       new_iremote[i].rank  = iremote[l].rank;
1452       new_iremote[i].index = iremote[l].index;
1453     }
1454     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1455     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1456   }
1457   PetscCall(PetscFree(leaves));
1458   PetscFunctionReturn(PETSC_SUCCESS);
1459 }
1460 
1461 /*@C
1462   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1463 
1464   Collective
1465 
1466   Input Parameters:
1467 + sf       - star forest on which to communicate
1468 . unit     - data type associated with each node
1469 . rootdata - buffer to broadcast
1470 - op       - operation to use for reduction
1471 
1472   Output Parameter:
1473 . leafdata - buffer to be reduced with values from each leaf's respective root
1474 
1475   Level: intermediate
1476 
1477   Note:
1478   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1479   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1480   use `PetscSFBcastWithMemTypeBegin()` instead.
1481 
1482 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1483 @*/
1484 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1485 {
1486   PetscMemType rootmtype, leafmtype;
1487 
1488   PetscFunctionBegin;
1489   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1490   PetscCall(PetscSFSetUp(sf));
1491   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1492   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1493   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1494   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1495   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 /*@C
1500   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1501   to `PetscSFBcastEnd()`
1502 
1503   Collective
1504 
1505   Input Parameters:
1506 + sf        - star forest on which to communicate
1507 . unit      - data type associated with each node
1508 . rootmtype - memory type of `rootdata`
1509 . rootdata  - buffer to broadcast
1510 . leafmtype - memory type of `leafdata`
1511 - op        - operation to use for reduction
1512 
1513   Output Parameter:
1514 . leafdata - buffer to be reduced with values from each leaf's respective root
1515 
1516   Level: intermediate
1517 
1518 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1519 @*/
1520 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1521 {
1522   PetscFunctionBegin;
1523   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1524   PetscCall(PetscSFSetUp(sf));
1525   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1526   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1527   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1528   PetscFunctionReturn(PETSC_SUCCESS);
1529 }
1530 
1531 /*@C
1532   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1533 
1534   Collective
1535 
1536   Input Parameters:
1537 + sf       - star forest
1538 . unit     - data type
1539 . rootdata - buffer to broadcast
1540 - op       - operation to use for reduction
1541 
1542   Output Parameter:
1543 . leafdata - buffer to be reduced with values from each leaf's respective root
1544 
1545   Level: intermediate
1546 
1547 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1548 @*/
1549 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1550 {
1551   PetscFunctionBegin;
1552   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1553   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1554   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1555   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1556   PetscFunctionReturn(PETSC_SUCCESS);
1557 }
1558 
1559 /*@C
1560   PetscSFReduceBegin - begin reduction (communication) of `leafdata` into `rootdata`, to be completed with call to `PetscSFReduceEnd()`
1561 
1562   Collective
1563 
1564   Input Parameters:
1565 + sf       - star forest
1566 . unit     - data type
1567 . leafdata - values to reduce (communicate)
1568 - op       - reduction operation
1569 
1570   Output Parameter:
1571 . rootdata - result of reduction of values (`leafdata`) from all leaves to each root
1572 
1573   Level: intermediate
1574 
1575   Note:
1576   When PETSc is configured with device support, it will use `PetscGetMemType()` to determine whether the given data pointers
1577   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the memory type, you should
1578   use `PetscSFReduceWithMemTypeBegin()` instead.
1579 
1580 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`, `MPI_Datatype`
1581 @*/
1582 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1583 {
1584   PetscMemType rootmtype, leafmtype;
1585 
1586   PetscFunctionBegin;
1587   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1588   PetscCall(PetscSFSetUp(sf));
1589   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1590   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1591   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1592   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1593   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1594   PetscFunctionReturn(PETSC_SUCCESS);
1595 }
1596 
1597 /*@C
1598   PetscSFReduceWithMemTypeBegin - begin reduction of `leafdata` into `rootdata` with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1599 
1600   Collective
1601 
1602   Input Parameters:
1603 + sf        - star forest
1604 . unit      - data type
1605 . leafmtype - memory type of `leafdata`
1606 . leafdata  - values to reduce
1607 . rootmtype - memory type of `rootdata`
1608 - op        - reduction operation
1609 
1610   Output Parameter:
1611 . rootdata - result of reduction of values from all leaves of each root
1612 
1613   Level: intermediate
1614 
1615 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1616 @*/
1617 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1618 {
1619   PetscFunctionBegin;
1620   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1621   PetscCall(PetscSFSetUp(sf));
1622   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1623   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1624   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1625   PetscFunctionReturn(PETSC_SUCCESS);
1626 }
1627 
1628 /*@C
1629   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1630 
1631   Collective
1632 
1633   Input Parameters:
1634 + sf       - star forest
1635 . unit     - data type
1636 . leafdata - values to reduce
1637 - op       - reduction operation
1638 
1639   Output Parameter:
1640 . rootdata - result of reduction of values from all leaves of each root
1641 
1642   Level: intermediate
1643 
1644 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1645 @*/
1646 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1647 {
1648   PetscFunctionBegin;
1649   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1650   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1651   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1652   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1653   PetscFunctionReturn(PETSC_SUCCESS);
1654 }
1655 
1656 /*@C
1657   PetscSFFetchAndOpBegin - begin operation that fetches values from `rootdata` and updates it atomically by applying operation using `leafdata`,
1658   to be completed with `PetscSFFetchAndOpEnd()`
1659 
1660   Collective
1661 
1662   Input Parameters:
1663 + sf       - star forest
1664 . unit     - data type
1665 . leafdata - leaf values to use in reduction
1666 - op       - operation to use for reduction
1667 
1668   Output Parameters:
1669 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1670 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1671 
1672   Level: advanced
1673 
1674   Note:
1675   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1676   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1677   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1678   integers.
1679 
1680 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1681 @*/
1682 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1683 {
1684   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1685 
1686   PetscFunctionBegin;
1687   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1688   PetscCall(PetscSFSetUp(sf));
1689   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1690   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1691   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1692   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1693   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1694   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1695   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1696   PetscFunctionReturn(PETSC_SUCCESS);
1697 }
1698 
1699 /*@C
1700   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1701   applying operation using `leafdata`, to be completed with `PetscSFFetchAndOpEnd()`
1702 
1703   Collective
1704 
1705   Input Parameters:
1706 + sf              - star forest
1707 . unit            - data type
1708 . rootmtype       - memory type of `rootdata`
1709 . leafmtype       - memory type of `leafdata`
1710 . leafdata        - leaf values to use in reduction
1711 . leafupdatemtype - memory type of `leafupdate`
1712 - op              - operation to use for reduction
1713 
1714   Output Parameters:
1715 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1716 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1717 
1718   Level: advanced
1719 
1720   Note:
1721   See `PetscSFFetchAndOpBegin()` for more details.
1722 
1723 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1724 @*/
1725 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1726 {
1727   PetscFunctionBegin;
1728   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1729   PetscCall(PetscSFSetUp(sf));
1730   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1731   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1732   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1733   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1734   PetscFunctionReturn(PETSC_SUCCESS);
1735 }
1736 
1737 /*@C
1738   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1739   to fetch values from roots and update atomically by applying operation using `leafdata`
1740 
1741   Collective
1742 
1743   Input Parameters:
1744 + sf       - star forest
1745 . unit     - data type
1746 . leafdata - leaf values to use in reduction
1747 - op       - operation to use for reduction
1748 
1749   Output Parameters:
1750 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1751 - leafupdate - state at each leaf's respective root immediately prior to atomic update
1752 
1753   Level: advanced
1754 
1755 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1756 @*/
1757 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1758 {
1759   PetscFunctionBegin;
1760   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1761   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1762   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1763   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1764   PetscFunctionReturn(PETSC_SUCCESS);
1765 }
1766 
1767 /*@C
1768   PetscSFComputeDegreeBegin - begin computation of the degree of each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1769 
1770   Collective
1771 
1772   Input Parameter:
1773 . sf - star forest
1774 
1775   Output Parameter:
1776 . degree - degree (the number of leaves) of each root vertex
1777 
1778   Level: advanced
1779 
1780   Note:
1781   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1782 
1783 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1784 @*/
1785 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt *degree[])
1786 {
1787   PetscFunctionBegin;
1788   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1789   PetscSFCheckGraphSet(sf, 1);
1790   PetscAssertPointer(degree, 2);
1791   if (!sf->degreeknown) {
1792     PetscInt i, nroots = sf->nroots, maxlocal;
1793     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1794     maxlocal = sf->maxleaf - sf->minleaf + 1;
1795     PetscCall(PetscMalloc1(nroots, &sf->degree));
1796     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1797     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1798     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1799     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1800   }
1801   *degree = NULL;
1802   PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804 
1805 /*@C
1806   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1807 
1808   Collective
1809 
1810   Input Parameter:
1811 . sf - star forest
1812 
1813   Output Parameter:
1814 . degree - degree of each root vertex
1815 
1816   Level: developer
1817 
1818   Note:
1819   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1820 
1821 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1822 @*/
1823 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt *degree[])
1824 {
1825   PetscFunctionBegin;
1826   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1827   PetscSFCheckGraphSet(sf, 1);
1828   PetscAssertPointer(degree, 2);
1829   if (!sf->degreeknown) {
1830     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1831     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1832     PetscCall(PetscFree(sf->degreetmp));
1833     sf->degreeknown = PETSC_TRUE;
1834   }
1835   *degree = sf->degree;
1836   PetscFunctionReturn(PETSC_SUCCESS);
1837 }
1838 
1839 /*@C
1840   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1841   Each multi-root is assigned index of the corresponding original root.
1842 
1843   Collective
1844 
1845   Input Parameters:
1846 + sf     - star forest
1847 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()` and `PetscSFComputeDegreeEnd()`
1848 
1849   Output Parameters:
1850 + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1851 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1852 
1853   Level: developer
1854 
1855   Note:
1856   The returned array `multiRootsOrigNumbering` should be destroyed with `PetscFree()` when no longer needed.
1857 
1858 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1859 @*/
1860 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1861 {
1862   PetscSF  msf;
1863   PetscInt k = 0, nroots, nmroots;
1864 
1865   PetscFunctionBegin;
1866   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1867   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1868   if (nroots) PetscAssertPointer(degree, 2);
1869   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1870   PetscAssertPointer(multiRootsOrigNumbering, 4);
1871   PetscCall(PetscSFGetMultiSF(sf, &msf));
1872   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1873   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1874   for (PetscInt i = 0; i < nroots; i++) {
1875     if (!degree[i]) continue;
1876     for (PetscInt j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1877   }
1878   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1879   if (nMultiRoots) *nMultiRoots = nmroots;
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 /*@C
1884   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1885 
1886   Collective
1887 
1888   Input Parameters:
1889 + sf       - star forest
1890 . unit     - data type
1891 - leafdata - leaf data to gather to roots
1892 
1893   Output Parameter:
1894 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
1895 
1896   Level: intermediate
1897 
1898 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1899 @*/
1900 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1901 {
1902   PetscSF multi = NULL;
1903 
1904   PetscFunctionBegin;
1905   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1906   PetscCall(PetscSFSetUp(sf));
1907   PetscCall(PetscSFGetMultiSF(sf, &multi));
1908   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1909   PetscFunctionReturn(PETSC_SUCCESS);
1910 }
1911 
1912 /*@C
1913   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1914 
1915   Collective
1916 
1917   Input Parameters:
1918 + sf       - star forest
1919 . unit     - data type
1920 - leafdata - leaf data to gather to roots
1921 
1922   Output Parameter:
1923 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree (which is the number of leaves that root has)
1924 
1925   Level: intermediate
1926 
1927 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1928 @*/
1929 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1930 {
1931   PetscSF multi = NULL;
1932 
1933   PetscFunctionBegin;
1934   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1935   PetscCall(PetscSFGetMultiSF(sf, &multi));
1936   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1937   PetscFunctionReturn(PETSC_SUCCESS);
1938 }
1939 
1940 /*@C
1941   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1942 
1943   Collective
1944 
1945   Input Parameters:
1946 + sf            - star forest
1947 . unit          - data type
1948 - multirootdata - root buffer to send to each leaf, one unit of data is provided to each leaf thus the amount of space per root is equal to its degree (which is the number of leaves that root has)
1949 
1950   Output Parameter:
1951 . leafdata - leaf data to be update with personal data from each respective root
1952 
1953   Level: intermediate
1954 
1955 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1956 @*/
1957 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1958 {
1959   PetscSF multi = NULL;
1960 
1961   PetscFunctionBegin;
1962   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1963   PetscCall(PetscSFSetUp(sf));
1964   PetscCall(PetscSFGetMultiSF(sf, &multi));
1965   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1966   PetscFunctionReturn(PETSC_SUCCESS);
1967 }
1968 
1969 /*@C
1970   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1971 
1972   Collective
1973 
1974   Input Parameters:
1975 + sf            - star forest
1976 . unit          - data type
1977 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1978 
1979   Output Parameter:
1980 . leafdata - leaf data to be update with personal data from each respective root
1981 
1982   Level: intermediate
1983 
1984 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1985 @*/
1986 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1987 {
1988   PetscSF multi = NULL;
1989 
1990   PetscFunctionBegin;
1991   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1992   PetscCall(PetscSFGetMultiSF(sf, &multi));
1993   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1994   PetscFunctionReturn(PETSC_SUCCESS);
1995 }
1996 
1997 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1998 {
1999   PetscInt        i, n, nleaves;
2000   const PetscInt *ilocal = NULL;
2001   PetscHSetI      seen;
2002 
2003   PetscFunctionBegin;
2004   if (PetscDefined(USE_DEBUG)) {
2005     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
2006     PetscCall(PetscHSetICreate(&seen));
2007     for (i = 0; i < nleaves; i++) {
2008       const PetscInt leaf = ilocal ? ilocal[i] : i;
2009       PetscCall(PetscHSetIAdd(seen, leaf));
2010     }
2011     PetscCall(PetscHSetIGetSize(seen, &n));
2012     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2013     PetscCall(PetscHSetIDestroy(&seen));
2014   }
2015   PetscFunctionReturn(PETSC_SUCCESS);
2016 }
2017 
2018 /*@
2019   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2020 
2021   Input Parameters:
2022 + sfA - The first `PetscSF`
2023 - sfB - The second `PetscSF`
2024 
2025   Output Parameter:
2026 . sfBA - The composite `PetscSF`
2027 
2028   Level: developer
2029 
2030   Notes:
2031   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2032   forests, i.e. the same leaf is not connected with different roots.
2033 
2034   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2035   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2036   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2037   `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on `sfB`, on connected nodes.
2038 
2039 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2040 @*/
2041 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2042 {
2043   const PetscSFNode *remotePointsA, *remotePointsB;
2044   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2045   const PetscInt    *localPointsA, *localPointsB;
2046   PetscInt          *localPointsBA;
2047   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2048   PetscBool          denseB;
2049 
2050   PetscFunctionBegin;
2051   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2052   PetscSFCheckGraphSet(sfA, 1);
2053   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2054   PetscSFCheckGraphSet(sfB, 2);
2055   PetscCheckSameComm(sfA, 1, sfB, 2);
2056   PetscAssertPointer(sfBA, 3);
2057   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2058   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2059 
2060   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2061   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2062   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2063      numRootsB; otherwise, garbage will be broadcasted.
2064      Example (comm size = 1):
2065      sfA: 0 <- (0, 0)
2066      sfB: 100 <- (0, 0)
2067           101 <- (0, 1)
2068      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2069      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2070      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2071      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2072   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2073   for (i = 0; i < numRootsB; i++) {
2074     reorderedRemotePointsA[i].rank  = -1;
2075     reorderedRemotePointsA[i].index = -1;
2076   }
2077   for (i = 0; i < numLeavesA; i++) {
2078     PetscInt localp = localPointsA ? localPointsA[i] : i;
2079 
2080     if (localp >= numRootsB) continue;
2081     reorderedRemotePointsA[localp] = remotePointsA[i];
2082   }
2083   remotePointsA = reorderedRemotePointsA;
2084   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2085   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2086   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2087     leafdataB[i].rank  = -1;
2088     leafdataB[i].index = -1;
2089   }
2090   PetscCall(PetscSFBcastBegin(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2091   PetscCall(PetscSFBcastEnd(sfB, MPIU_SF_NODE, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2092   PetscCall(PetscFree(reorderedRemotePointsA));
2093 
2094   denseB = (PetscBool)!localPointsB;
2095   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2096     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2097     else numLeavesBA++;
2098   }
2099   if (denseB) {
2100     localPointsBA  = NULL;
2101     remotePointsBA = leafdataB;
2102   } else {
2103     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2104     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2105     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2106       const PetscInt l = localPointsB ? localPointsB[i] : i;
2107 
2108       if (leafdataB[l - minleaf].rank == -1) continue;
2109       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2110       localPointsBA[numLeavesBA]  = l;
2111       numLeavesBA++;
2112     }
2113     PetscCall(PetscFree(leafdataB));
2114   }
2115   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2116   PetscCall(PetscSFSetFromOptions(*sfBA));
2117   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2118   PetscFunctionReturn(PETSC_SUCCESS);
2119 }
2120 
2121 /*@
2122   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2123 
2124   Input Parameters:
2125 + sfA - The first `PetscSF`
2126 - sfB - The second `PetscSF`
2127 
2128   Output Parameter:
2129 . sfBA - The composite `PetscSF`.
2130 
2131   Level: developer
2132 
2133   Notes:
2134   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2135   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2136   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2137 
2138   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2139   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2140   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()` and `PetscSFBcastEnd()`
2141   on `sfA`, then
2142   a `PetscSFReduceBegin()` and `PetscSFReduceEnd()` on `sfB`, on connected roots.
2143 
2144 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2145 @*/
2146 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2147 {
2148   const PetscSFNode *remotePointsA, *remotePointsB;
2149   PetscSFNode       *remotePointsBA;
2150   const PetscInt    *localPointsA, *localPointsB;
2151   PetscSFNode       *reorderedRemotePointsA = NULL;
2152   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2153   MPI_Op             op;
2154 #if defined(PETSC_USE_64BIT_INDICES)
2155   PetscBool iswin;
2156 #endif
2157 
2158   PetscFunctionBegin;
2159   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2160   PetscSFCheckGraphSet(sfA, 1);
2161   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2162   PetscSFCheckGraphSet(sfB, 2);
2163   PetscCheckSameComm(sfA, 1, sfB, 2);
2164   PetscAssertPointer(sfBA, 3);
2165   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2166   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2167 
2168   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2169   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2170 
2171   /* TODO: Check roots of sfB have degree of 1 */
2172   /* Once we implement it, we can replace the MPI_MAXLOC
2173      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2174      We use MPI_MAXLOC only to have a deterministic output from this routine if
2175      the root condition is not meet.
2176    */
2177   op = MPI_MAXLOC;
2178 #if defined(PETSC_USE_64BIT_INDICES)
2179   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2180   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2181   if (iswin) op = MPI_REPLACE;
2182 #endif
2183 
2184   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2185   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2186   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2187     reorderedRemotePointsA[i].rank  = -1;
2188     reorderedRemotePointsA[i].index = -1;
2189   }
2190   if (localPointsA) {
2191     for (i = 0; i < numLeavesA; i++) {
2192       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2193       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2194     }
2195   } else {
2196     for (i = 0; i < numLeavesA; i++) {
2197       if (i > maxleaf || i < minleaf) continue;
2198       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2199     }
2200   }
2201 
2202   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2203   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2204   for (i = 0; i < numRootsB; i++) {
2205     remotePointsBA[i].rank  = -1;
2206     remotePointsBA[i].index = -1;
2207   }
2208 
2209   PetscCall(PetscSFReduceBegin(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2210   PetscCall(PetscSFReduceEnd(sfB, MPIU_SF_NODE, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2211   PetscCall(PetscFree(reorderedRemotePointsA));
2212   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2213     if (remotePointsBA[i].rank == -1) continue;
2214     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2215     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2216     localPointsBA[numLeavesBA]        = i;
2217     numLeavesBA++;
2218   }
2219   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2220   PetscCall(PetscSFSetFromOptions(*sfBA));
2221   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2222   PetscFunctionReturn(PETSC_SUCCESS);
2223 }
2224 
2225 /*
2226   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2227 
2228   Input Parameter:
2229 . sf - The global `PetscSF`
2230 
2231   Output Parameter:
2232 . out - The local `PetscSF`
2233 
2234 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCreate()`
2235  */
2236 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2237 {
2238   MPI_Comm           comm;
2239   PetscMPIInt        myrank;
2240   const PetscInt    *ilocal;
2241   const PetscSFNode *iremote;
2242   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2243   PetscSFNode       *liremote;
2244   PetscSF            lsf;
2245 
2246   PetscFunctionBegin;
2247   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2248   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2249   else {
2250     PetscMPIInt irank;
2251 
2252     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2253     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2254     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2255 
2256     /* Find out local edges and build a local SF */
2257     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2258     for (i = lnleaves = 0; i < nleaves; i++) {
2259       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2260       if (irank == myrank) lnleaves++;
2261     }
2262     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2263     PetscCall(PetscMalloc1(lnleaves, &liremote));
2264 
2265     for (i = j = 0; i < nleaves; i++) {
2266       PetscCall(PetscMPIIntCast(iremote[i].rank, &irank));
2267       if (irank == myrank) {
2268         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2269         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2270         liremote[j].index = iremote[i].index;
2271         j++;
2272       }
2273     }
2274     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2275     PetscCall(PetscSFSetFromOptions(lsf));
2276     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2277     PetscCall(PetscSFSetUp(lsf));
2278     *out = lsf;
2279   }
2280   PetscFunctionReturn(PETSC_SUCCESS);
2281 }
2282 
2283 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2284 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2285 {
2286   PetscMemType rootmtype, leafmtype;
2287 
2288   PetscFunctionBegin;
2289   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2290   PetscCall(PetscSFSetUp(sf));
2291   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2292   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2293   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2294   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2295   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2296   PetscFunctionReturn(PETSC_SUCCESS);
2297 }
2298 
2299 /*@
2300   PetscSFConcatenate - concatenate multiple `PetscSF` into a new `PetscSF`
2301 
2302   Input Parameters:
2303 + comm        - the communicator
2304 . nsfs        - the number of input `PetscSF`
2305 . sfs         - the array of input `PetscSF`
2306 . rootMode    - the root mode specifying how roots are handled
2307 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2308 
2309   Output Parameter:
2310 . newsf - The resulting `PetscSF`
2311 
2312   Level: advanced
2313 
2314   Notes:
2315   The communicator of all `PetscSF`s in `sfs` must be comm.
2316 
2317   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2318 
2319   The offsets in `leafOffsets` are added to the original leaf indices.
2320 
2321   If all input `PetscSF`s use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2322   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2323 
2324   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2325   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2326 
2327   All root modes retain the essential connectivity condition.
2328 
2329   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2330 
2331   Parameter `rootMode` controls how the input root spaces are combined.
2332   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2333   and is also the same in the output `PetscSF`.
2334 
2335   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2336   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2337   roots of sfs[0], sfs[1], sfs[2], ... are joined on each MPI process separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2338   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2339   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2340   the original root MPI processes are ignored.
2341   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2342   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each MPI process
2343   to keep the load balancing.
2344   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different MPI processes.
2345 
2346   Example:
2347   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2348 .vb
2349   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2350   for m in {local,global,shared}; do
2351     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2352   done
2353 .ve
2354   we generate two identical `PetscSF`s sf_0 and sf_1,
2355 .vb
2356   PetscSF Object: sf_0 2 MPI processes
2357     type: basic
2358     rank #leaves #roots
2359     [ 0]       4      2
2360     [ 1]       4      2
2361     leaves      roots       roots in global numbering
2362     ( 0,  0) <- ( 0,  0)  =   0
2363     ( 0,  1) <- ( 0,  1)  =   1
2364     ( 0,  2) <- ( 1,  0)  =   2
2365     ( 0,  3) <- ( 1,  1)  =   3
2366     ( 1,  0) <- ( 0,  0)  =   0
2367     ( 1,  1) <- ( 0,  1)  =   1
2368     ( 1,  2) <- ( 1,  0)  =   2
2369     ( 1,  3) <- ( 1,  1)  =   3
2370 .ve
2371   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2372 .vb
2373   rootMode = local:
2374   PetscSF Object: result_sf 2 MPI processes
2375     type: basic
2376     rank #leaves #roots
2377     [ 0]       8      4
2378     [ 1]       8      4
2379     leaves      roots       roots in global numbering
2380     ( 0,  0) <- ( 0,  0)  =   0
2381     ( 0,  1) <- ( 0,  1)  =   1
2382     ( 0,  2) <- ( 1,  0)  =   4
2383     ( 0,  3) <- ( 1,  1)  =   5
2384     ( 0,  4) <- ( 0,  2)  =   2
2385     ( 0,  5) <- ( 0,  3)  =   3
2386     ( 0,  6) <- ( 1,  2)  =   6
2387     ( 0,  7) <- ( 1,  3)  =   7
2388     ( 1,  0) <- ( 0,  0)  =   0
2389     ( 1,  1) <- ( 0,  1)  =   1
2390     ( 1,  2) <- ( 1,  0)  =   4
2391     ( 1,  3) <- ( 1,  1)  =   5
2392     ( 1,  4) <- ( 0,  2)  =   2
2393     ( 1,  5) <- ( 0,  3)  =   3
2394     ( 1,  6) <- ( 1,  2)  =   6
2395     ( 1,  7) <- ( 1,  3)  =   7
2396 
2397   rootMode = global:
2398   PetscSF Object: result_sf 2 MPI processes
2399     type: basic
2400     rank #leaves #roots
2401     [ 0]       8      4
2402     [ 1]       8      4
2403     leaves      roots       roots in global numbering
2404     ( 0,  0) <- ( 0,  0)  =   0
2405     ( 0,  1) <- ( 0,  1)  =   1
2406     ( 0,  2) <- ( 0,  2)  =   2
2407     ( 0,  3) <- ( 0,  3)  =   3
2408     ( 0,  4) <- ( 1,  0)  =   4
2409     ( 0,  5) <- ( 1,  1)  =   5
2410     ( 0,  6) <- ( 1,  2)  =   6
2411     ( 0,  7) <- ( 1,  3)  =   7
2412     ( 1,  0) <- ( 0,  0)  =   0
2413     ( 1,  1) <- ( 0,  1)  =   1
2414     ( 1,  2) <- ( 0,  2)  =   2
2415     ( 1,  3) <- ( 0,  3)  =   3
2416     ( 1,  4) <- ( 1,  0)  =   4
2417     ( 1,  5) <- ( 1,  1)  =   5
2418     ( 1,  6) <- ( 1,  2)  =   6
2419     ( 1,  7) <- ( 1,  3)  =   7
2420 
2421   rootMode = shared:
2422   PetscSF Object: result_sf 2 MPI processes
2423     type: basic
2424     rank #leaves #roots
2425     [ 0]       8      2
2426     [ 1]       8      2
2427     leaves      roots       roots in global numbering
2428     ( 0,  0) <- ( 0,  0)  =   0
2429     ( 0,  1) <- ( 0,  1)  =   1
2430     ( 0,  2) <- ( 1,  0)  =   2
2431     ( 0,  3) <- ( 1,  1)  =   3
2432     ( 0,  4) <- ( 0,  0)  =   0
2433     ( 0,  5) <- ( 0,  1)  =   1
2434     ( 0,  6) <- ( 1,  0)  =   2
2435     ( 0,  7) <- ( 1,  1)  =   3
2436     ( 1,  0) <- ( 0,  0)  =   0
2437     ( 1,  1) <- ( 0,  1)  =   1
2438     ( 1,  2) <- ( 1,  0)  =   2
2439     ( 1,  3) <- ( 1,  1)  =   3
2440     ( 1,  4) <- ( 0,  0)  =   0
2441     ( 1,  5) <- ( 0,  1)  =   1
2442     ( 1,  6) <- ( 1,  0)  =   2
2443     ( 1,  7) <- ( 1,  1)  =   3
2444 .ve
2445 
2446 .seealso: [](sec_petscsf), `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2447 @*/
2448 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2449 {
2450   PetscInt     i, s, nLeaves, nRoots;
2451   PetscInt    *leafArrayOffsets;
2452   PetscInt    *ilocal_new;
2453   PetscSFNode *iremote_new;
2454   PetscBool    all_ilocal_null = PETSC_FALSE;
2455   PetscLayout  glayout         = NULL;
2456   PetscInt    *gremote         = NULL;
2457   PetscMPIInt  rank, size;
2458 
2459   PetscFunctionBegin;
2460   if (PetscDefined(USE_DEBUG)) {
2461     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2462 
2463     PetscCall(PetscSFCreate(comm, &dummy));
2464     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2465     PetscAssertPointer(sfs, 3);
2466     for (i = 0; i < nsfs; i++) {
2467       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2468       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2469     }
2470     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
2471     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2472     PetscAssertPointer(newsf, 6);
2473     PetscCall(PetscSFDestroy(&dummy));
2474   }
2475   if (!nsfs) {
2476     PetscCall(PetscSFCreate(comm, newsf));
2477     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2478     PetscFunctionReturn(PETSC_SUCCESS);
2479   }
2480   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2481   PetscCallMPI(MPI_Comm_size(comm, &size));
2482 
2483   /* Calculate leaf array offsets */
2484   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2485   leafArrayOffsets[0] = 0;
2486   for (s = 0; s < nsfs; s++) {
2487     PetscInt nl;
2488 
2489     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2490     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2491   }
2492   nLeaves = leafArrayOffsets[nsfs];
2493 
2494   /* Calculate number of roots */
2495   switch (rootMode) {
2496   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2497     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2498     if (PetscDefined(USE_DEBUG)) {
2499       for (s = 1; s < nsfs; s++) {
2500         PetscInt nr;
2501 
2502         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2503         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2504       }
2505     }
2506   } break;
2507   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2508     /* Calculate also global layout in this case */
2509     PetscInt    *nls;
2510     PetscLayout *lts;
2511     PetscInt   **inds;
2512     PetscInt     j;
2513     PetscInt     rootOffset = 0;
2514 
2515     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2516     PetscCall(PetscLayoutCreate(comm, &glayout));
2517     glayout->bs = 1;
2518     glayout->n  = 0;
2519     glayout->N  = 0;
2520     for (s = 0; s < nsfs; s++) {
2521       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2522       glayout->n += lts[s]->n;
2523       glayout->N += lts[s]->N;
2524     }
2525     PetscCall(PetscLayoutSetUp(glayout));
2526     PetscCall(PetscMalloc1(nLeaves, &gremote));
2527     for (s = 0, j = 0; s < nsfs; s++) {
2528       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2529       rootOffset += lts[s]->N;
2530       PetscCall(PetscLayoutDestroy(&lts[s]));
2531       PetscCall(PetscFree(inds[s]));
2532     }
2533     PetscCall(PetscFree3(lts, nls, inds));
2534     nRoots = glayout->N;
2535   } break;
2536   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2537     /* nRoots calculated later in this case */
2538     break;
2539   default:
2540     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2541   }
2542 
2543   if (!leafOffsets) {
2544     all_ilocal_null = PETSC_TRUE;
2545     for (s = 0; s < nsfs; s++) {
2546       const PetscInt *ilocal;
2547 
2548       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2549       if (ilocal) {
2550         all_ilocal_null = PETSC_FALSE;
2551         break;
2552       }
2553     }
2554     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2555   }
2556 
2557   /* Renumber and concatenate local leaves */
2558   ilocal_new = NULL;
2559   if (!all_ilocal_null) {
2560     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2561     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2562     for (s = 0; s < nsfs; s++) {
2563       const PetscInt *ilocal;
2564       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2565       PetscInt        i, nleaves_l;
2566 
2567       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2568       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2569     }
2570   }
2571 
2572   /* Renumber and concatenate remote roots */
2573   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2574     PetscInt rootOffset = 0;
2575 
2576     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2577     for (i = 0; i < nLeaves; i++) {
2578       iremote_new[i].rank  = -1;
2579       iremote_new[i].index = -1;
2580     }
2581     for (s = 0; s < nsfs; s++) {
2582       PetscInt           i, nl, nr;
2583       PetscSF            tmp_sf;
2584       const PetscSFNode *iremote;
2585       PetscSFNode       *tmp_rootdata;
2586       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2587 
2588       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2589       PetscCall(PetscSFCreate(comm, &tmp_sf));
2590       /* create helper SF with contiguous leaves */
2591       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2592       PetscCall(PetscSFSetUp(tmp_sf));
2593       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2594       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2595         for (i = 0; i < nr; i++) {
2596           tmp_rootdata[i].index = i + rootOffset;
2597           tmp_rootdata[i].rank  = rank;
2598         }
2599         rootOffset += nr;
2600       } else {
2601         for (i = 0; i < nr; i++) {
2602           tmp_rootdata[i].index = i;
2603           tmp_rootdata[i].rank  = rank;
2604         }
2605       }
2606       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2607       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_SF_NODE, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2608       PetscCall(PetscSFDestroy(&tmp_sf));
2609       PetscCall(PetscFree(tmp_rootdata));
2610     }
2611     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2612 
2613     /* Build the new SF */
2614     PetscCall(PetscSFCreate(comm, newsf));
2615     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2616   } else {
2617     /* Build the new SF */
2618     PetscCall(PetscSFCreate(comm, newsf));
2619     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2620   }
2621   PetscCall(PetscSFSetUp(*newsf));
2622   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2623   PetscCall(PetscLayoutDestroy(&glayout));
2624   PetscCall(PetscFree(gremote));
2625   PetscCall(PetscFree(leafArrayOffsets));
2626   PetscFunctionReturn(PETSC_SUCCESS);
2627 }
2628 
2629 /*@
2630   PetscSFRegisterPersistent - Register root and leaf data as memory regions that will be used for repeated `PetscSF` communications.
2631 
2632   Collective
2633 
2634   Input Parameters:
2635 + sf       - star forest
2636 . unit     - the data type contained within `rootdata` and `leafdata`
2637 . rootdata - root data that will be used for multiple `PetscSF` communications
2638 - leafdata - leaf data that will be used for multiple `PetscSF` communications
2639 
2640   Level: advanced
2641 
2642   Notes:
2643   Implementations of `PetscSF` can make optimizations
2644   for repeated communication using the same memory regions, but these optimizations
2645   can be unsound if `rootdata` or `leafdata` is deallocated and the `PetscSF` is not informed.
2646   The intended pattern is
2647 
2648 .vb
2649   PetscMalloc2(nroots, &rootdata, nleaves, &leafdata);
2650 
2651   PetscSFRegisterPersistent(sf, unit, rootdata, leafdata);
2652   // repeated use of rootdata and leafdata will now be optimized
2653 
2654   PetscSFBcastBegin(sf, unit, rootdata, leafdata, MPI_REPLACE);
2655   PetscSFBcastEnd(sf, unit, rootdata, leafdata, MPI_REPLACE);
2656   // ...
2657   PetscSFReduceBegin(sf, unit, leafdata, rootdata, MPI_SUM);
2658   PetscSFReduceEnd(sf, unit, leafdata, rootdata, MPI_SUM);
2659   // ... (other communications)
2660 
2661   // rootdata and leafdata must be deregistered before freeing
2662   // skipping this can lead to undefined behavior including
2663   // deadlocks
2664   PetscSFDeregisterPersistent(sf, unit, rootdata, leafdata);
2665 
2666   // it is now safe to free rootdata and leafdata
2667   PetscFree2(rootdata, leafdata);
2668 .ve
2669 
2670   If you do not register `rootdata` and `leafdata` it will not cause an error,
2671   but optimizations that reduce the setup time for each communication cannot be
2672   made.  Currently, the only implementation of `PetscSF` that benefits from
2673   `PetscSFRegisterPersistent()` is `PETSCSFWINDOW`.  For the default
2674   `PETSCSFBASIC` there is no benefit to using `PetscSFRegisterPersistent()`.
2675 
2676 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFDeregisterPersistent()`
2677 @*/
2678 PetscErrorCode PetscSFRegisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2679 {
2680   PetscFunctionBegin;
2681   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2682   PetscTryMethod(sf, "PetscSFRegisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2683   PetscFunctionReturn(PETSC_SUCCESS);
2684 }
2685 
2686 /*@
2687   PetscSFDeregisterPersistent - Signal that repeated usage of `rootdata` and `leafdata` for `PetscSF` communication has concluded.
2688 
2689   Collective
2690 
2691   Input Parameters:
2692 + sf       - star forest
2693 . unit     - the data type contained within the `rootdata` and `leafdata`
2694 . rootdata - root data that was previously registered with `PetscSFRegisterPersistent()`
2695 - leafdata - leaf data that was previously registered with `PetscSFRegisterPersistent()`
2696 
2697   Level: advanced
2698 
2699   Note:
2700   See `PetscSFRegisterPersistent()` for when and how to use this function.
2701 
2702 .seealso: [](sec_petscsf), `PetscSF`, `PETSCSFWINDOW`, `PetscSFRegisterPersistent()`
2703 @*/
2704 PetscErrorCode PetscSFDeregisterPersistent(PetscSF sf, MPI_Datatype unit, const void *rootdata, const void *leafdata)
2705 {
2706   PetscFunctionBegin;
2707   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2708   PetscTryMethod(sf, "PetscSFDeregisterPersistent_C", (PetscSF, MPI_Datatype, const void *, const void *), (sf, unit, rootdata, leafdata));
2709   PetscFunctionReturn(PETSC_SUCCESS);
2710 }
2711 
2712 PETSC_INTERN PetscErrorCode PetscSFGetDatatypeSize_Internal(MPI_Comm comm, MPI_Datatype unit, MPI_Aint *size)
2713 {
2714   MPI_Aint lb, lb_true, bytes, bytes_true;
2715 
2716   PetscFunctionBegin;
2717   PetscCallMPI(MPI_Type_get_extent(unit, &lb, &bytes));
2718   PetscCallMPI(MPI_Type_get_true_extent(unit, &lb_true, &bytes_true));
2719   PetscCheck(lb == 0 && lb_true == 0, comm, PETSC_ERR_SUP, "No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
2720   *size = bytes;
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723