xref: /petsc/src/vec/is/sf/interface/sf.c (revision 456dbbcd9876bdf1526b93d27a05ea9aadb1fa98)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petsc/private/hashmapi.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8   #include <petscdevice_cuda.h>
9 #endif
10 
11 #if defined(PETSC_HAVE_HIP)
12   #include <hip/hip_runtime.h>
13 #endif
14 
15 #if defined(PETSC_CLANG_STATIC_ANALYZER)
16 extern void PetscSFCheckGraphSet(PetscSF, int);
17 #else
18   #if defined(PETSC_USE_DEBUG)
19     #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME)
20   #else
21     #define PetscSFCheckGraphSet(sf, arg) \
22       do { \
23       } while (0)
24   #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[]     = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
28 const char *const PetscSFConcatenateRootModes[] = {"local", "shared", "global", "PetscSFConcatenateRootMode", "PETSCSF_CONCATENATE_ROOTMODE_", NULL};
29 
30 /*@
31   PetscSFCreate - create a star forest communication context
32 
33   Collective
34 
35   Input Parameter:
36 . comm - communicator on which the star forest will operate
37 
38   Output Parameter:
39 . sf - new star forest context
40 
41   Options Database Key:
42 + -sf_type basic                 - Use MPI persistent Isend/Irecv for communication (Default)
43 . -sf_type window                - Use MPI-3 one-sided window for communication
44 . -sf_type neighbor              - Use MPI-3 neighborhood collectives for communication
45 - -sf_neighbor_persistent <bool> - If true, use MPI-4 persistent neighborhood collectives for communication (used along with -sf_type neighbor)
46 
47   Level: intermediate
48 
49   Note:
50   When one knows the communication graph is one of the predefined graph, such as `MPI_Alltoall()`, `MPI_Allgatherv()`,
51   `MPI_Gatherv()`, one can create a `PetscSF` and then set its graph with `PetscSFSetGraphWithPattern()`. These special
52   `SF`s are optimized and they have better performance than the general `SF`s.
53 
54 .seealso: `PetscSF`, `PetscSFSetType`, `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
55 @*/
56 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf)
57 {
58   PetscSF b;
59 
60   PetscFunctionBegin;
61   PetscAssertPointer(sf, 2);
62   PetscCall(PetscSFInitializePackage());
63 
64   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
65 
66   b->nroots    = -1;
67   b->nleaves   = -1;
68   b->minleaf   = PETSC_MAX_INT;
69   b->maxleaf   = PETSC_MIN_INT;
70   b->nranks    = -1;
71   b->rankorder = PETSC_TRUE;
72   b->ingroup   = MPI_GROUP_NULL;
73   b->outgroup  = MPI_GROUP_NULL;
74   b->graphset  = PETSC_FALSE;
75 #if defined(PETSC_HAVE_DEVICE)
76   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
77   b->use_stream_aware_mpi = PETSC_FALSE;
78   b->unknown_input_stream = PETSC_FALSE;
79   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
80   b->backend = PETSCSF_BACKEND_KOKKOS;
81   #elif defined(PETSC_HAVE_CUDA)
82   b->backend = PETSCSF_BACKEND_CUDA;
83   #elif defined(PETSC_HAVE_HIP)
84   b->backend = PETSCSF_BACKEND_HIP;
85   #endif
86 
87   #if defined(PETSC_HAVE_NVSHMEM)
88   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
89   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
90   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
91   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
92   #endif
93 #endif
94   b->vscat.from_n = -1;
95   b->vscat.to_n   = -1;
96   b->vscat.unit   = MPIU_SCALAR;
97   *sf             = b;
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 /*@
102   PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
103 
104   Collective
105 
106   Input Parameter:
107 . sf - star forest
108 
109   Level: advanced
110 
111 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
112 @*/
113 PetscErrorCode PetscSFReset(PetscSF sf)
114 {
115   PetscFunctionBegin;
116   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
117   PetscTryTypeMethod(sf, Reset);
118   PetscCall(PetscSFDestroy(&sf->rankssf));
119 
120   sf->nroots   = -1;
121   sf->nleaves  = -1;
122   sf->minleaf  = PETSC_MAX_INT;
123   sf->maxleaf  = PETSC_MIN_INT;
124   sf->mine     = NULL;
125   sf->remote   = NULL;
126   sf->graphset = PETSC_FALSE;
127   PetscCall(PetscFree(sf->mine_alloc));
128   PetscCall(PetscFree(sf->remote_alloc));
129   sf->nranks = -1;
130   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
131   sf->degreeknown = PETSC_FALSE;
132   PetscCall(PetscFree(sf->degree));
133   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
134   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
135 
136   if (sf->multi) sf->multi->multi = NULL;
137   PetscCall(PetscSFDestroy(&sf->multi));
138 
139   PetscCall(PetscLayoutDestroy(&sf->map));
140 
141 #if defined(PETSC_HAVE_DEVICE)
142   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
143 #endif
144 
145   sf->setupcalled = PETSC_FALSE;
146   PetscFunctionReturn(PETSC_SUCCESS);
147 }
148 
149 /*@C
150   PetscSFSetType - Set the `PetscSF` communication implementation
151 
152   Collective
153 
154   Input Parameters:
155 + sf   - the `PetscSF` context
156 - type - a known method
157 .vb
158     PETSCSFWINDOW - MPI-2/3 one-sided
159     PETSCSFBASIC - basic implementation using MPI-1 two-sided
160 .ve
161 
162   Options Database Key:
163 . -sf_type <type> - Sets the method; for example `basic` or `window` use -help for a list of available methods
164 
165   Level: intermediate
166 
167   Notes:
168   See `PetscSFType` for possible values
169 
170 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`
171 @*/
172 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type)
173 {
174   PetscBool match;
175   PetscErrorCode (*r)(PetscSF);
176 
177   PetscFunctionBegin;
178   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
179   PetscAssertPointer(type, 2);
180 
181   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
182   if (match) PetscFunctionReturn(PETSC_SUCCESS);
183 
184   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
185   PetscCheck(r, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
186   /* Destroy the previous PetscSF implementation context */
187   PetscTryTypeMethod(sf, Destroy);
188   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
189   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
190   PetscCall((*r)(sf));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 /*@C
195   PetscSFGetType - Get the `PetscSF` communication implementation
196 
197   Not Collective
198 
199   Input Parameter:
200 . sf - the `PetscSF` context
201 
202   Output Parameter:
203 . type - the `PetscSF` type name
204 
205   Level: intermediate
206 
207 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetType()`, `PetscSFCreate()`
208 @*/
209 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
210 {
211   PetscFunctionBegin;
212   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
213   PetscAssertPointer(type, 2);
214   *type = ((PetscObject)sf)->type_name;
215   PetscFunctionReturn(PETSC_SUCCESS);
216 }
217 
218 /*@C
219   PetscSFDestroy - destroy a star forest
220 
221   Collective
222 
223   Input Parameter:
224 . sf - address of star forest
225 
226   Level: intermediate
227 
228 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFReset()`
229 @*/
230 PetscErrorCode PetscSFDestroy(PetscSF *sf)
231 {
232   PetscFunctionBegin;
233   if (!*sf) PetscFunctionReturn(PETSC_SUCCESS);
234   PetscValidHeaderSpecific(*sf, PETSCSF_CLASSID, 1);
235   if (--((PetscObject)*sf)->refct > 0) {
236     *sf = NULL;
237     PetscFunctionReturn(PETSC_SUCCESS);
238   }
239   PetscCall(PetscSFReset(*sf));
240   PetscTryTypeMethod(*sf, Destroy);
241   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
242   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
243 #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
244   if ((*sf)->use_stream_aware_mpi) {
245     PetscCallMPI(MPIX_Stream_free(&(*sf)->mpi_stream));
246     PetscCallMPI(MPI_Comm_free(&(*sf)->stream_comm));
247   }
248 #endif
249   PetscCall(PetscHeaderDestroy(sf));
250   PetscFunctionReturn(PETSC_SUCCESS);
251 }
252 
253 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
254 {
255   PetscInt           i, nleaves;
256   PetscMPIInt        size;
257   const PetscInt    *ilocal;
258   const PetscSFNode *iremote;
259 
260   PetscFunctionBegin;
261   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(PETSC_SUCCESS);
262   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
263   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
264   for (i = 0; i < nleaves; i++) {
265     const PetscInt rank   = iremote[i].rank;
266     const PetscInt remote = iremote[i].index;
267     const PetscInt leaf   = ilocal ? ilocal[i] : i;
268     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
269     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
270     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
271   }
272   PetscFunctionReturn(PETSC_SUCCESS);
273 }
274 
275 /*@
276   PetscSFSetUp - set up communication structures for a `PetscSF`, after this is done it may be used to perform communication
277 
278   Collective
279 
280   Input Parameter:
281 . sf - star forest communication object
282 
283   Level: beginner
284 
285 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetFromOptions()`, `PetscSFSetType()`
286 @*/
287 PetscErrorCode PetscSFSetUp(PetscSF sf)
288 {
289   PetscFunctionBegin;
290   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
291   PetscSFCheckGraphSet(sf, 1);
292   if (sf->setupcalled) PetscFunctionReturn(PETSC_SUCCESS);
293   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
294   PetscCall(PetscSFCheckGraphValid_Private(sf));
295   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
296   PetscTryTypeMethod(sf, SetUp);
297 #if defined(PETSC_HAVE_CUDA)
298   if (sf->backend == PETSCSF_BACKEND_CUDA) {
299     sf->ops->Malloc = PetscSFMalloc_CUDA;
300     sf->ops->Free   = PetscSFFree_CUDA;
301   }
302 #endif
303 #if defined(PETSC_HAVE_HIP)
304   if (sf->backend == PETSCSF_BACKEND_HIP) {
305     sf->ops->Malloc = PetscSFMalloc_HIP;
306     sf->ops->Free   = PetscSFFree_HIP;
307   }
308 #endif
309 
310 #if defined(PETSC_HAVE_KOKKOS)
311   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
312     sf->ops->Malloc = PetscSFMalloc_Kokkos;
313     sf->ops->Free   = PetscSFFree_Kokkos;
314   }
315 #endif
316   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
317   sf->setupcalled = PETSC_TRUE;
318   PetscFunctionReturn(PETSC_SUCCESS);
319 }
320 
321 /*@
322   PetscSFSetFromOptions - set `PetscSF` options using the options database
323 
324   Logically Collective
325 
326   Input Parameter:
327 . sf - star forest
328 
329   Options Database Keys:
330 + -sf_type                                                                                                         - implementation type, see `PetscSFSetType()`
331 . -sf_rank_order                                                                                                   - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
332 . -sf_use_default_stream                                                                                           - Assume callers of `PetscSF` computed the input root/leafdata with the default CUDA stream. `PetscSF` will also
333                             use the default stream to process data. Therefore, no stream synchronization is needed between `PetscSF` and its caller (default: true).
334                             If true, this option only works with `-use_gpu_aware_mpi 1`.
335 . -sf_use_stream_aware_mpi                                                                                         - Assume the underlying MPI is CUDA-stream aware and `PetscSF` won't sync streams for send/recv buffers passed to MPI (default: false).
336                                If true, this option only works with `-use_gpu_aware_mpi 1`.
337 
338 - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently `PetscSF` has these backends: cuda - hip and Kokkos.
339                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
340                               the only available is kokkos.
341 
342   Level: intermediate
343 
344 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFSetType()`
345 @*/
346 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
347 {
348   PetscSFType deft;
349   char        type[256];
350   PetscBool   flg;
351 
352   PetscFunctionBegin;
353   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
354   PetscObjectOptionsBegin((PetscObject)sf);
355   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
356   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
357   PetscCall(PetscSFSetType(sf, flg ? type : deft));
358   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
359 #if defined(PETSC_HAVE_DEVICE)
360   {
361     char      backendstr[32] = {0};
362     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
363     /* Change the defaults set in PetscSFCreate() with command line options */
364     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitrary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
365     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
366     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
367     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
368     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
369     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
370   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
371     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
374     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
375   #elif defined(PETSC_HAVE_KOKKOS)
376     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
377   #endif
378 
379   #if defined(PETSC_HAVE_CUDA) && defined(PETSC_HAVE_MPIX_STREAM)
380     if (sf->use_stream_aware_mpi) {
381       MPI_Info info;
382 
383       PetscCallMPI(MPI_Info_create(&info));
384       PetscCallMPI(MPI_Info_set(info, "type", "cudaStream_t"));
385       PetscCallMPI(MPIX_Info_set_hex(info, "value", &PetscDefaultCudaStream, sizeof(PetscDefaultCudaStream)));
386       PetscCallMPI(MPIX_Stream_create(info, &sf->mpi_stream));
387       PetscCallMPI(MPI_Info_free(&info));
388       PetscCallMPI(MPIX_Stream_comm_create(PetscObjectComm((PetscObject)sf), sf->mpi_stream, &sf->stream_comm));
389     }
390   #endif
391   }
392 #endif
393   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
394   PetscOptionsEnd();
395   PetscFunctionReturn(PETSC_SUCCESS);
396 }
397 
398 /*@
399   PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
400 
401   Logically Collective
402 
403   Input Parameters:
404 + sf  - star forest
405 - flg - `PETSC_TRUE` to sort, `PETSC_FALSE` to skip sorting (lower setup cost, but non-deterministic)
406 
407   Level: advanced
408 
409 .seealso: `PetscSF`, `PetscSFType`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
410 @*/
411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg)
412 {
413   PetscFunctionBegin;
414   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
415   PetscValidLogicalCollectiveBool(sf, flg, 2);
416   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
417   sf->rankorder = flg;
418   PetscFunctionReturn(PETSC_SUCCESS);
419 }
420 
421 /*@C
422   PetscSFSetGraph - Set a parallel star forest
423 
424   Collective
425 
426   Input Parameters:
427 + sf         - star forest
428 . nroots     - number of root vertices on the current process (these are possible targets for other process to attach leaves)
429 . nleaves    - number of leaf vertices on the current process, each of these references a root on any process
430 . ilocal     - locations of leaves in leafdata buffers, pass `NULL` for contiguous storage (locations must be >= 0, enforced
431 during setup in debug mode)
432 . localmode  - copy mode for `ilocal`
433 . iremote    - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
434 during setup in debug mode)
435 - remotemode - copy mode for `iremote`
436 
437   Level: intermediate
438 
439   Notes:
440   Leaf indices in `ilocal` must be unique, otherwise an error occurs.
441 
442   Input arrays `ilocal` and `iremote` follow the `PetscCopyMode` semantics.
443   In particular, if `localmode` or `remotemode` is `PETSC_OWN_POINTER` or `PETSC_USE_POINTER`,
444   PETSc might modify the respective array;
445   if `PETSC_USE_POINTER`, the user must delete the array after `PetscSFDestroy()`.
446   Only if `PETSC_COPY_VALUES` is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
447 
448   Fortran Notes:
449   In Fortran you must use `PETSC_COPY_VALUES` for `localmode` and `remotemode`.
450 
451   Developer Notes:
452   We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
453   This also allows to compare leaf sets of two `PetscSF`s easily.
454 
455 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
456 @*/
457 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode)
458 {
459   PetscBool unique, contiguous;
460 
461   PetscFunctionBegin;
462   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
463   if (nleaves > 0 && ilocal) PetscAssertPointer(ilocal, 4);
464   if (nleaves > 0) PetscAssertPointer(iremote, 6);
465   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
466   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
467   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
468    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
469   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
470   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
471 
472   if (sf->nroots >= 0) { /* Reset only if graph already set */
473     PetscCall(PetscSFReset(sf));
474   }
475 
476   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
477 
478   sf->nroots  = nroots;
479   sf->nleaves = nleaves;
480 
481   if (localmode == PETSC_COPY_VALUES && ilocal) {
482     PetscInt *tlocal = NULL;
483 
484     PetscCall(PetscMalloc1(nleaves, &tlocal));
485     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
486     ilocal = tlocal;
487   }
488   if (remotemode == PETSC_COPY_VALUES) {
489     PetscSFNode *tremote = NULL;
490 
491     PetscCall(PetscMalloc1(nleaves, &tremote));
492     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
493     iremote = tremote;
494   }
495 
496   if (nleaves && ilocal) {
497     PetscSFNode work;
498 
499     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
500     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
501     unique = PetscNot(unique);
502     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
503     sf->minleaf = ilocal[0];
504     sf->maxleaf = ilocal[nleaves - 1];
505     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
506   } else {
507     sf->minleaf = 0;
508     sf->maxleaf = nleaves - 1;
509     unique      = PETSC_TRUE;
510     contiguous  = PETSC_TRUE;
511   }
512 
513   if (contiguous) {
514     if (localmode == PETSC_USE_POINTER) {
515       ilocal = NULL;
516     } else {
517       PetscCall(PetscFree(ilocal));
518     }
519   }
520   sf->mine = ilocal;
521   if (localmode == PETSC_USE_POINTER) {
522     sf->mine_alloc = NULL;
523   } else {
524     sf->mine_alloc = ilocal;
525   }
526   sf->remote = iremote;
527   if (remotemode == PETSC_USE_POINTER) {
528     sf->remote_alloc = NULL;
529   } else {
530     sf->remote_alloc = iremote;
531   }
532   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
533   sf->graphset = PETSC_TRUE;
534   PetscFunctionReturn(PETSC_SUCCESS);
535 }
536 
537 /*@
538   PetscSFSetGraphWithPattern - Sets the graph of a `PetscSF` with a specific pattern
539 
540   Collective
541 
542   Input Parameters:
543 + sf      - The `PetscSF`
544 . map     - Layout of roots over all processes (insignificant when pattern is `PETSCSF_PATTERN_ALLTOALL`)
545 - pattern - One of `PETSCSF_PATTERN_ALLGATHER`, `PETSCSF_PATTERN_GATHER`, `PETSCSF_PATTERN_ALLTOALL`
546 
547   Level: intermediate
548 
549   Notes:
550   It is easier to explain `PetscSFPattern` using vectors. Suppose we have an MPI vector `x` and its `PetscLayout` is `map`.
551   `n` and `N` are the local and global sizes of `x` respectively.
552 
553   With `PETSCSF_PATTERN_ALLGATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to
554   sequential vectors `y` on all MPI processes.
555 
556   With `PETSCSF_PATTERN_GATHER`, the routine creates a graph that if one does `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on it, it will copy `x` to a
557   sequential vector `y` on rank 0.
558 
559   In above cases, entries of `x` are roots and entries of `y` are leaves.
560 
561   With `PETSCSF_PATTERN_ALLTOALL`, map is insignificant. Suppose NP is size of `sf`'s communicator. The routine
562   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
563   of rank j. Here 0 <=i,j<NP. It is a kind of `MPI_Alltoall()` with sendcount/recvcount being 1. Note that it does
564   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
565   items with `MPI_Type_contiguous` and use that as the <unit> argument in SF routines.
566 
567   In this case, roots and leaves are symmetric.
568 
569 .seealso: `PetscSF`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
570  @*/
571 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern)
572 {
573   MPI_Comm    comm;
574   PetscInt    n, N, res[2];
575   PetscMPIInt rank, size;
576   PetscSFType type;
577 
578   PetscFunctionBegin;
579   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
580   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscAssertPointer(map, 2);
581   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
582   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
583   PetscCallMPI(MPI_Comm_rank(comm, &rank));
584   PetscCallMPI(MPI_Comm_size(comm, &size));
585 
586   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
587     type = PETSCSFALLTOALL;
588     PetscCall(PetscLayoutCreate(comm, &sf->map));
589     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
590     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
591     PetscCall(PetscLayoutSetUp(sf->map));
592   } else {
593     PetscCall(PetscLayoutGetLocalSize(map, &n));
594     PetscCall(PetscLayoutGetSize(map, &N));
595     res[0] = n;
596     res[1] = -n;
597     /* Check if n are same over all ranks so that we can optimize it */
598     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
599     if (res[0] == -res[1]) { /* same n */
600       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
601     } else {
602       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
603     }
604     PetscCall(PetscLayoutReference(map, &sf->map));
605   }
606   PetscCall(PetscSFSetType(sf, type));
607 
608   sf->pattern = pattern;
609   sf->mine    = NULL; /* Contiguous */
610 
611   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
612      Also set other easy stuff.
613    */
614   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
615     sf->nleaves = N;
616     sf->nroots  = n;
617     sf->nranks  = size;
618     sf->minleaf = 0;
619     sf->maxleaf = N - 1;
620   } else if (pattern == PETSCSF_PATTERN_GATHER) {
621     sf->nleaves = rank ? 0 : N;
622     sf->nroots  = n;
623     sf->nranks  = rank ? 0 : size;
624     sf->minleaf = 0;
625     sf->maxleaf = rank ? -1 : N - 1;
626   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
627     sf->nleaves = size;
628     sf->nroots  = size;
629     sf->nranks  = size;
630     sf->minleaf = 0;
631     sf->maxleaf = size - 1;
632   }
633   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
634   sf->graphset = PETSC_TRUE;
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*@
639   PetscSFCreateInverseSF - given a `PetscSF` in which all vertices have degree 1, creates the inverse map
640 
641   Collective
642 
643   Input Parameter:
644 . sf - star forest to invert
645 
646   Output Parameter:
647 . isf - inverse of `sf`
648 
649   Level: advanced
650 
651   Notes:
652   All roots must have degree 1.
653 
654   The local space may be a permutation, but cannot be sparse.
655 
656 .seealso: `PetscSF`, `PetscSFType`, `PetscSFSetGraph()`
657 @*/
658 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf)
659 {
660   PetscMPIInt     rank;
661   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
662   const PetscInt *ilocal;
663   PetscSFNode    *roots, *leaves;
664 
665   PetscFunctionBegin;
666   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
667   PetscSFCheckGraphSet(sf, 1);
668   PetscAssertPointer(isf, 2);
669 
670   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
671   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
672 
673   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
674   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
675   for (i = 0; i < maxlocal; i++) {
676     leaves[i].rank  = rank;
677     leaves[i].index = i;
678   }
679   for (i = 0; i < nroots; i++) {
680     roots[i].rank  = -1;
681     roots[i].index = -1;
682   }
683   PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
684   PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
685 
686   /* Check whether our leaves are sparse */
687   for (i = 0, count = 0; i < nroots; i++)
688     if (roots[i].rank >= 0) count++;
689   if (count == nroots) newilocal = NULL;
690   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
691     for (i = 0, count = 0; i < nroots; i++) {
692       if (roots[i].rank >= 0) {
693         newilocal[count]   = i;
694         roots[count].rank  = roots[i].rank;
695         roots[count].index = roots[i].index;
696         count++;
697       }
698     }
699   }
700 
701   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
702   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
703   PetscCall(PetscFree2(roots, leaves));
704   PetscFunctionReturn(PETSC_SUCCESS);
705 }
706 
707 /*@
708   PetscSFDuplicate - duplicate a `PetscSF`, optionally preserving rank connectivity and graph
709 
710   Collective
711 
712   Input Parameters:
713 + sf  - communication object to duplicate
714 - opt - `PETSCSF_DUPLICATE_CONFONLY`, `PETSCSF_DUPLICATE_RANKS`, or `PETSCSF_DUPLICATE_GRAPH` (see `PetscSFDuplicateOption`)
715 
716   Output Parameter:
717 . newsf - new communication object
718 
719   Level: beginner
720 
721 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
722 @*/
723 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf)
724 {
725   PetscSFType  type;
726   MPI_Datatype dtype = MPIU_SCALAR;
727 
728   PetscFunctionBegin;
729   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
730   PetscValidLogicalCollectiveEnum(sf, opt, 2);
731   PetscAssertPointer(newsf, 3);
732   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
733   PetscCall(PetscSFGetType(sf, &type));
734   if (type) PetscCall(PetscSFSetType(*newsf, type));
735   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag earlier since PetscSFSetGraph() below checks on this flag */
736   if (opt == PETSCSF_DUPLICATE_GRAPH) {
737     PetscSFCheckGraphSet(sf, 1);
738     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
739       PetscInt           nroots, nleaves;
740       const PetscInt    *ilocal;
741       const PetscSFNode *iremote;
742       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
743       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
744     } else {
745       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
746     }
747   }
748   /* Since oldtype is committed, so is newtype, according to MPI */
749   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
750   (*newsf)->vscat.bs     = sf->vscat.bs;
751   (*newsf)->vscat.unit   = dtype;
752   (*newsf)->vscat.to_n   = sf->vscat.to_n;
753   (*newsf)->vscat.from_n = sf->vscat.from_n;
754   /* Do not copy lsf. Build it on demand since it is rarely used */
755 
756 #if defined(PETSC_HAVE_DEVICE)
757   (*newsf)->backend              = sf->backend;
758   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
759   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
760   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
761 #endif
762   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
763   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
764   PetscFunctionReturn(PETSC_SUCCESS);
765 }
766 
767 /*@C
768   PetscSFGetGraph - Get the graph specifying a parallel star forest
769 
770   Not Collective
771 
772   Input Parameter:
773 . sf - star forest
774 
775   Output Parameters:
776 + nroots  - number of root vertices on the current process (these are possible targets for other process to attach leaves)
777 . nleaves - number of leaf vertices on the current process, each of these references a root on any process
778 . ilocal  - locations of leaves in leafdata buffers (if returned value is `NULL`, it means leaves are in contiguous storage)
779 - iremote - remote locations of root vertices for each leaf on the current process
780 
781   Level: intermediate
782 
783   Notes:
784   We are not currently requiring that the graph is set, thus returning `nroots` = -1 if it has not been set yet
785 
786   The returned `ilocal` and `iremote` might contain values in different order than the input ones in `PetscSFSetGraph()`
787 
788   Fortran Notes:
789   The returned `iremote` array is a copy and must be deallocated after use. Consequently, if you
790   want to update the graph, you must call `PetscSFSetGraph()` after modifying the `iremote` array.
791 
792   To check for a `NULL` `ilocal` use
793 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
794 
795 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
796 @*/
797 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
798 {
799   PetscFunctionBegin;
800   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
801   if (sf->ops->GetGraph) {
802     PetscCall(sf->ops->GetGraph(sf, nroots, nleaves, ilocal, iremote));
803   } else {
804     if (nroots) *nroots = sf->nroots;
805     if (nleaves) *nleaves = sf->nleaves;
806     if (ilocal) *ilocal = sf->mine;
807     if (iremote) *iremote = sf->remote;
808   }
809   PetscFunctionReturn(PETSC_SUCCESS);
810 }
811 
812 /*@
813   PetscSFGetLeafRange - Get the active leaf ranges
814 
815   Not Collective
816 
817   Input Parameter:
818 . sf - star forest
819 
820   Output Parameters:
821 + minleaf - minimum active leaf on this process. Returns 0 if there are no leaves.
822 - maxleaf - maximum active leaf on this process. Returns -1 if there are no leaves.
823 
824   Level: developer
825 
826 .seealso: `PetscSF`, `PetscSFType`, `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
827 @*/
828 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf)
829 {
830   PetscFunctionBegin;
831   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
832   PetscSFCheckGraphSet(sf, 1);
833   if (minleaf) *minleaf = sf->minleaf;
834   if (maxleaf) *maxleaf = sf->maxleaf;
835   PetscFunctionReturn(PETSC_SUCCESS);
836 }
837 
838 /*@C
839   PetscSFViewFromOptions - View a `PetscSF` based on arguments in the options database
840 
841   Collective
842 
843   Input Parameters:
844 + A    - the star forest
845 . obj  - Optional object that provides the prefix for the option names
846 - name - command line option
847 
848   Level: intermediate
849 
850   Note:
851   See `PetscObjectViewFromOptions()` for possible `PetscViewer` and `PetscViewerFormat`
852 
853 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
854 @*/
855 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[])
856 {
857   PetscFunctionBegin;
858   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
859   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 /*@C
864   PetscSFView - view a star forest
865 
866   Collective
867 
868   Input Parameters:
869 + sf     - star forest
870 - viewer - viewer to display graph, for example `PETSC_VIEWER_STDOUT_WORLD`
871 
872   Level: beginner
873 
874 .seealso: `PetscSF`, `PetscViewer`, `PetscSFCreate()`, `PetscSFSetGraph()`
875 @*/
876 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer)
877 {
878   PetscBool         iascii;
879   PetscViewerFormat format;
880 
881   PetscFunctionBegin;
882   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
883   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
884   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
885   PetscCheckSameComm(sf, 1, viewer, 2);
886   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
887   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
888   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
889     PetscMPIInt rank;
890     PetscInt    ii, i, j;
891 
892     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
893     PetscCall(PetscViewerASCIIPushTab(viewer));
894     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
895       if (!sf->graphset) {
896         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
897         PetscCall(PetscViewerASCIIPopTab(viewer));
898         PetscFunctionReturn(PETSC_SUCCESS);
899       }
900       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
901       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
902       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
903       for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
904       PetscCall(PetscViewerFlush(viewer));
905       PetscCall(PetscViewerGetFormat(viewer, &format));
906       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
907         PetscMPIInt *tmpranks, *perm;
908         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
909         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
910         for (i = 0; i < sf->nranks; i++) perm[i] = i;
911         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
912         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
913         for (ii = 0; ii < sf->nranks; ii++) {
914           i = perm[ii];
915           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
916           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
917         }
918         PetscCall(PetscFree2(tmpranks, perm));
919       }
920       PetscCall(PetscViewerFlush(viewer));
921       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
922     }
923     PetscCall(PetscViewerASCIIPopTab(viewer));
924   }
925   PetscTryTypeMethod(sf, View, viewer);
926   PetscFunctionReturn(PETSC_SUCCESS);
927 }
928 
929 /*@C
930   PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
931 
932   Not Collective
933 
934   Input Parameter:
935 . sf - star forest
936 
937   Output Parameters:
938 + nranks  - number of ranks referenced by local part
939 . ranks   - [`nranks`] array of ranks
940 . roffset - [`nranks`+1] offset in `rmine`/`rremote` for each rank
941 . rmine   - [`roffset`[`nranks`]] concatenated array holding local indices referencing each remote rank
942 - rremote - [`roffset`[`nranks`]] concatenated array holding remote indices referenced for each remote rank
943 
944   Level: developer
945 
946 .seealso: `PetscSF`, `PetscSFGetLeafRanks()`
947 @*/
948 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote)
949 {
950   PetscFunctionBegin;
951   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
952   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
953   if (sf->ops->GetRootRanks) {
954     PetscUseTypeMethod(sf, GetRootRanks, nranks, ranks, roffset, rmine, rremote);
955   } else {
956     /* The generic implementation */
957     if (nranks) *nranks = sf->nranks;
958     if (ranks) *ranks = sf->ranks;
959     if (roffset) *roffset = sf->roffset;
960     if (rmine) *rmine = sf->rmine;
961     if (rremote) *rremote = sf->rremote;
962   }
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965 
966 /*@C
967   PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
968 
969   Not Collective
970 
971   Input Parameter:
972 . sf - star forest
973 
974   Output Parameters:
975 + niranks  - number of leaf ranks referencing roots on this process
976 . iranks   - [`niranks`] array of ranks
977 . ioffset  - [`niranks`+1] offset in `irootloc` for each rank
978 - irootloc - [`ioffset`[`niranks`]] concatenated array holding local indices of roots referenced by each leaf rank
979 
980   Level: developer
981 
982 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
983 @*/
984 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc)
985 {
986   PetscFunctionBegin;
987   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
988   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
989   if (sf->ops->GetLeafRanks) {
990     PetscUseTypeMethod(sf, GetLeafRanks, niranks, iranks, ioffset, irootloc);
991   } else {
992     PetscSFType type;
993     PetscCall(PetscSFGetType(sf, &type));
994     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
995   }
996   PetscFunctionReturn(PETSC_SUCCESS);
997 }
998 
999 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list)
1000 {
1001   PetscInt i;
1002   for (i = 0; i < n; i++) {
1003     if (needle == list[i]) return PETSC_TRUE;
1004   }
1005   return PETSC_FALSE;
1006 }
1007 
1008 /*@C
1009   PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by `PetscSF` implementations.
1010 
1011   Collective
1012 
1013   Input Parameters:
1014 + sf     - `PetscSF` to set up; `PetscSFSetGraph()` must have been called
1015 - dgroup - `MPI_Group` of ranks to be distinguished (e.g., for self or shared memory exchange)
1016 
1017   Level: developer
1018 
1019 .seealso: `PetscSF`, `PetscSFGetRootRanks()`
1020 @*/
1021 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup)
1022 {
1023   PetscHMapI    table;
1024   PetscHashIter pos;
1025   PetscMPIInt   size, groupsize, *groupranks;
1026   PetscInt     *rcount, *ranks;
1027   PetscInt      i, irank = -1, orank = -1;
1028 
1029   PetscFunctionBegin;
1030   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1031   PetscSFCheckGraphSet(sf, 1);
1032   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
1033   PetscCall(PetscHMapICreateWithSize(10, &table));
1034   for (i = 0; i < sf->nleaves; i++) {
1035     /* Log 1-based rank */
1036     PetscCall(PetscHMapISetWithMode(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
1037   }
1038   PetscCall(PetscHMapIGetSize(table, &sf->nranks));
1039   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
1040   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
1041   PetscHashIterBegin(table, pos);
1042   for (i = 0; i < sf->nranks; i++) {
1043     PetscHashIterGetKey(table, pos, ranks[i]);
1044     PetscHashIterGetVal(table, pos, rcount[i]);
1045     PetscHashIterNext(table, pos);
1046     ranks[i]--; /* Convert back to 0-based */
1047   }
1048   PetscCall(PetscHMapIDestroy(&table));
1049 
1050   /* We expect that dgroup is reliably "small" while nranks could be large */
1051   {
1052     MPI_Group    group = MPI_GROUP_NULL;
1053     PetscMPIInt *dgroupranks;
1054     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1055     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1056     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1057     PetscCall(PetscMalloc1(groupsize, &groupranks));
1058     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1059     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1060     PetscCallMPI(MPI_Group_free(&group));
1061     PetscCall(PetscFree(dgroupranks));
1062   }
1063 
1064   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1065   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1066     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1067       if (InList(ranks[i], groupsize, groupranks)) break;
1068     }
1069     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1070       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1071     }
1072     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1073       PetscInt tmprank, tmpcount;
1074 
1075       tmprank             = ranks[i];
1076       tmpcount            = rcount[i];
1077       ranks[i]            = ranks[sf->ndranks];
1078       rcount[i]           = rcount[sf->ndranks];
1079       ranks[sf->ndranks]  = tmprank;
1080       rcount[sf->ndranks] = tmpcount;
1081       sf->ndranks++;
1082     }
1083   }
1084   PetscCall(PetscFree(groupranks));
1085   PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
1086   if (rcount) PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1087   sf->roffset[0] = 0;
1088   for (i = 0; i < sf->nranks; i++) {
1089     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1090     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1091     rcount[i]          = 0;
1092   }
1093   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1094     /* short circuit */
1095     if (orank != sf->remote[i].rank) {
1096       /* Search for index of iremote[i].rank in sf->ranks */
1097       PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1098       if (irank < 0) {
1099         PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1100         if (irank >= 0) irank += sf->ndranks;
1101       }
1102       orank = sf->remote[i].rank;
1103     }
1104     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
1105     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1106     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1107     rcount[irank]++;
1108   }
1109   PetscCall(PetscFree2(rcount, ranks));
1110   PetscFunctionReturn(PETSC_SUCCESS);
1111 }
1112 
1113 /*@C
1114   PetscSFGetGroups - gets incoming and outgoing process groups
1115 
1116   Collective
1117 
1118   Input Parameter:
1119 . sf - star forest
1120 
1121   Output Parameters:
1122 + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1123 - outgoing - group of destination processes for outgoing edges (roots that I reference)
1124 
1125   Level: developer
1126 
1127 .seealso: `PetscSF`, `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1128 @*/
1129 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing)
1130 {
1131   MPI_Group group = MPI_GROUP_NULL;
1132 
1133   PetscFunctionBegin;
1134   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1135   if (sf->ingroup == MPI_GROUP_NULL) {
1136     PetscInt        i;
1137     const PetscInt *indegree;
1138     PetscMPIInt     rank, *outranks, *inranks;
1139     PetscSFNode    *remote;
1140     PetscSF         bgcount;
1141 
1142     /* Compute the number of incoming ranks */
1143     PetscCall(PetscMalloc1(sf->nranks, &remote));
1144     for (i = 0; i < sf->nranks; i++) {
1145       remote[i].rank  = sf->ranks[i];
1146       remote[i].index = 0;
1147     }
1148     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1149     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1150     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1151     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1152     /* Enumerate the incoming ranks */
1153     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1154     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1155     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1156     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1157     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1158     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1159     PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
1160     PetscCallMPI(MPI_Group_free(&group));
1161     PetscCall(PetscFree2(inranks, outranks));
1162     PetscCall(PetscSFDestroy(&bgcount));
1163   }
1164   *incoming = sf->ingroup;
1165 
1166   if (sf->outgroup == MPI_GROUP_NULL) {
1167     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1168     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1169     PetscCallMPI(MPI_Group_free(&group));
1170   }
1171   *outgoing = sf->outgroup;
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 /*@
1176   PetscSFGetRanksSF - gets the `PetscSF` to perform communications with root ranks
1177 
1178   Collective
1179 
1180   Input Parameter:
1181 . sf - star forest
1182 
1183   Output Parameter:
1184 . rsf - the star forest with a single root per process to perform communications
1185 
1186   Level: developer
1187 
1188 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetRootRanks()`
1189 @*/
1190 PetscErrorCode PetscSFGetRanksSF(PetscSF sf, PetscSF *rsf)
1191 {
1192   PetscFunctionBegin;
1193   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1194   PetscAssertPointer(rsf, 2);
1195   if (!sf->rankssf) {
1196     PetscSFNode       *rremotes;
1197     const PetscMPIInt *ranks;
1198     PetscInt           nranks;
1199 
1200     PetscCall(PetscSFGetRootRanks(sf, &nranks, &ranks, NULL, NULL, NULL));
1201     PetscCall(PetscMalloc1(nranks, &rremotes));
1202     for (PetscInt i = 0; i < nranks; i++) {
1203       rremotes[i].rank  = ranks[i];
1204       rremotes[i].index = 0;
1205     }
1206     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &sf->rankssf));
1207     PetscCall(PetscSFSetGraph(sf->rankssf, 1, nranks, NULL, PETSC_OWN_POINTER, rremotes, PETSC_OWN_POINTER));
1208   }
1209   *rsf = sf->rankssf;
1210   PetscFunctionReturn(PETSC_SUCCESS);
1211 }
1212 
1213 /*@
1214   PetscSFGetMultiSF - gets the inner `PetscSF` implementing gathers and scatters
1215 
1216   Collective
1217 
1218   Input Parameter:
1219 . sf - star forest that may contain roots with 0 or with more than 1 vertex
1220 
1221   Output Parameter:
1222 . multi - star forest with split roots, such that each root has degree exactly 1
1223 
1224   Level: developer
1225 
1226   Note:
1227   In most cases, users should use `PetscSFGatherBegin()` and `PetscSFScatterBegin()` instead of manipulating multi
1228   directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1229   edge, it is a candidate for future optimization that might involve its removal.
1230 
1231 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1232 @*/
1233 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi)
1234 {
1235   PetscFunctionBegin;
1236   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1237   PetscAssertPointer(multi, 2);
1238   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1239     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1240     *multi           = sf->multi;
1241     sf->multi->multi = sf->multi;
1242     PetscFunctionReturn(PETSC_SUCCESS);
1243   }
1244   if (!sf->multi) {
1245     const PetscInt *indegree;
1246     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1247     PetscSFNode    *remote;
1248     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1249     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1250     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1251     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1252     inoffset[0] = 0;
1253     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1254     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1255     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1256     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1257     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1258     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1259       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1260     }
1261     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1262     for (i = 0; i < sf->nleaves; i++) {
1263       remote[i].rank  = sf->remote[i].rank;
1264       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1265     }
1266     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1267     sf->multi->multi = sf->multi;
1268     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1269     if (sf->rankorder) { /* Sort the ranks */
1270       PetscMPIInt  rank;
1271       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1272       PetscSFNode *newremote;
1273       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1274       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1275       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1276       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1277       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1278       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1279       /* Sort the incoming ranks at each vertex, build the inverse map */
1280       for (i = 0; i < sf->nroots; i++) {
1281         PetscInt j;
1282         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1283         PetscCall(PetscSortIntWithArray(indegree[i], PetscSafePointerPlusOffset(inranks, inoffset[i]), tmpoffset));
1284         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1285       }
1286       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1287       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1288       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1289       for (i = 0; i < sf->nleaves; i++) {
1290         newremote[i].rank  = sf->remote[i].rank;
1291         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1292       }
1293       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1294       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1295     }
1296     PetscCall(PetscFree3(inoffset, outones, outoffset));
1297   }
1298   *multi = sf->multi;
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 /*@C
1303   PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots of a `PetscSF`, does not remap indices
1304 
1305   Collective
1306 
1307   Input Parameters:
1308 + sf        - original star forest
1309 . nselected - number of selected roots on this process
1310 - selected  - indices of the selected roots on this process
1311 
1312   Output Parameter:
1313 . esf - new star forest
1314 
1315   Level: advanced
1316 
1317   Note:
1318   To use the new `PetscSF`, it may be necessary to know the indices of the leaves that are still participating. This can
1319   be done by calling PetscSFGetGraph().
1320 
1321 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1322 @*/
1323 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf)
1324 {
1325   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1326   const PetscInt    *ilocal;
1327   signed char       *rootdata, *leafdata, *leafmem;
1328   const PetscSFNode *iremote;
1329   PetscSFNode       *new_iremote;
1330   MPI_Comm           comm;
1331 
1332   PetscFunctionBegin;
1333   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1334   PetscSFCheckGraphSet(sf, 1);
1335   if (nselected) PetscAssertPointer(selected, 3);
1336   PetscAssertPointer(esf, 4);
1337 
1338   PetscCall(PetscSFSetUp(sf));
1339   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1340   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1341   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1342 
1343   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1344     PetscBool dups;
1345     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1346     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1347     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root index %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1348   }
1349 
1350   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1351   else {
1352     /* A generic version of creating embedded sf */
1353     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1354     maxlocal = maxleaf - minleaf + 1;
1355     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1356     leafdata = PetscSafePointerPlusOffset(leafmem, -minleaf);
1357     /* Tag selected roots and bcast to leaves */
1358     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1359     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1360     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1361 
1362     /* Build esf with leaves that are still connected */
1363     esf_nleaves = 0;
1364     for (i = 0; i < nleaves; i++) {
1365       j = ilocal ? ilocal[i] : i;
1366       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1367          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1368       */
1369       esf_nleaves += (leafdata[j] ? 1 : 0);
1370     }
1371     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1372     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1373     for (i = n = 0; i < nleaves; i++) {
1374       j = ilocal ? ilocal[i] : i;
1375       if (leafdata[j]) {
1376         new_ilocal[n]        = j;
1377         new_iremote[n].rank  = iremote[i].rank;
1378         new_iremote[n].index = iremote[i].index;
1379         ++n;
1380       }
1381     }
1382     PetscCall(PetscSFCreate(comm, esf));
1383     PetscCall(PetscSFSetFromOptions(*esf));
1384     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1385     PetscCall(PetscFree2(rootdata, leafmem));
1386   }
1387   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1388   PetscFunctionReturn(PETSC_SUCCESS);
1389 }
1390 
1391 /*@C
1392   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves of a `PetscSF`, does not remap indices
1393 
1394   Collective
1395 
1396   Input Parameters:
1397 + sf        - original star forest
1398 . nselected - number of selected leaves on this process
1399 - selected  - indices of the selected leaves on this process
1400 
1401   Output Parameter:
1402 . newsf - new star forest
1403 
1404   Level: advanced
1405 
1406 .seealso: `PetscSF`, `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1407 @*/
1408 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
1409 {
1410   const PetscSFNode *iremote;
1411   PetscSFNode       *new_iremote;
1412   const PetscInt    *ilocal;
1413   PetscInt           i, nroots, *leaves, *new_ilocal;
1414   MPI_Comm           comm;
1415 
1416   PetscFunctionBegin;
1417   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1418   PetscSFCheckGraphSet(sf, 1);
1419   if (nselected) PetscAssertPointer(selected, 3);
1420   PetscAssertPointer(newsf, 4);
1421 
1422   /* Uniq selected[] and put results in leaves[] */
1423   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1424   PetscCall(PetscMalloc1(nselected, &leaves));
1425   PetscCall(PetscArraycpy(leaves, selected, nselected));
1426   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1427   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1428 
1429   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1430   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1431   else {
1432     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1433     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1434     PetscCall(PetscMalloc1(nselected, &new_iremote));
1435     for (i = 0; i < nselected; ++i) {
1436       const PetscInt l     = leaves[i];
1437       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1438       new_iremote[i].rank  = iremote[l].rank;
1439       new_iremote[i].index = iremote[l].index;
1440     }
1441     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1442     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1443   }
1444   PetscCall(PetscFree(leaves));
1445   PetscFunctionReturn(PETSC_SUCCESS);
1446 }
1447 
1448 /*@C
1449   PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to `PetscSFBcastEnd()`
1450 
1451   Collective
1452 
1453   Input Parameters:
1454 + sf       - star forest on which to communicate
1455 . unit     - data type associated with each node
1456 . rootdata - buffer to broadcast
1457 - op       - operation to use for reduction
1458 
1459   Output Parameter:
1460 . leafdata - buffer to be reduced with values from each leaf's respective root
1461 
1462   Level: intermediate
1463 
1464   Note:
1465   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1466   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1467   use `PetscSFBcastWithMemTypeBegin()` instead.
1468 
1469 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1470 @*/
1471 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1472 {
1473   PetscMemType rootmtype, leafmtype;
1474 
1475   PetscFunctionBegin;
1476   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1477   PetscCall(PetscSFSetUp(sf));
1478   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1479   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1480   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1481   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1482   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1483   PetscFunctionReturn(PETSC_SUCCESS);
1484 }
1485 
1486 /*@C
1487   PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call
1488   to `PetscSFBcastEnd()`
1489 
1490   Collective
1491 
1492   Input Parameters:
1493 + sf        - star forest on which to communicate
1494 . unit      - data type associated with each node
1495 . rootmtype - memory type of rootdata
1496 . rootdata  - buffer to broadcast
1497 . leafmtype - memory type of leafdata
1498 - op        - operation to use for reduction
1499 
1500   Output Parameter:
1501 . leafdata - buffer to be reduced with values from each leaf's respective root
1502 
1503   Level: intermediate
1504 
1505 .seealso: `PetscSF`, `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1506 @*/
1507 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op)
1508 {
1509   PetscFunctionBegin;
1510   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1511   PetscCall(PetscSFSetUp(sf));
1512   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1513   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1514   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1515   PetscFunctionReturn(PETSC_SUCCESS);
1516 }
1517 
1518 /*@C
1519   PetscSFBcastEnd - end a broadcast and reduce operation started with `PetscSFBcastBegin()` or `PetscSFBcastWithMemTypeBegin()`
1520 
1521   Collective
1522 
1523   Input Parameters:
1524 + sf       - star forest
1525 . unit     - data type
1526 . rootdata - buffer to broadcast
1527 - op       - operation to use for reduction
1528 
1529   Output Parameter:
1530 . leafdata - buffer to be reduced with values from each leaf's respective root
1531 
1532   Level: intermediate
1533 
1534 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1535 @*/
1536 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op)
1537 {
1538   PetscFunctionBegin;
1539   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1540   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1541   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1542   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1543   PetscFunctionReturn(PETSC_SUCCESS);
1544 }
1545 
1546 /*@C
1547   PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to `PetscSFReduceEnd()`
1548 
1549   Collective
1550 
1551   Input Parameters:
1552 + sf       - star forest
1553 . unit     - data type
1554 . leafdata - values to reduce
1555 - op       - reduction operation
1556 
1557   Output Parameter:
1558 . rootdata - result of reduction of values from all leaves of each root
1559 
1560   Level: intermediate
1561 
1562   Note:
1563   When PETSc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1564   are host pointers or device pointers, which may incur a noticeable cost. If you already knew the info, you should
1565   use `PetscSFReduceWithMemTypeBegin()` instead.
1566 
1567 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`, `PetscSFReduceEnd()`
1568 @*/
1569 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1570 {
1571   PetscMemType rootmtype, leafmtype;
1572 
1573   PetscFunctionBegin;
1574   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1575   PetscCall(PetscSFSetUp(sf));
1576   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1577   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1578   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1579   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1580   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 
1584 /*@C
1585   PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to `PetscSFReduceEnd()`
1586 
1587   Collective
1588 
1589   Input Parameters:
1590 + sf        - star forest
1591 . unit      - data type
1592 . leafmtype - memory type of leafdata
1593 . leafdata  - values to reduce
1594 . rootmtype - memory type of rootdata
1595 - op        - reduction operation
1596 
1597   Output Parameter:
1598 . rootdata - result of reduction of values from all leaves of each root
1599 
1600   Level: intermediate
1601 
1602 .seealso: `PetscSF`, `PetscSFBcastBegin()`, `PetscSFReduceBegin()`, `PetscSFReduceEnd()`
1603 @*/
1604 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op)
1605 {
1606   PetscFunctionBegin;
1607   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1608   PetscCall(PetscSFSetUp(sf));
1609   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1610   PetscCall(sf->ops->ReduceBegin(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1611   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 /*@C
1616   PetscSFReduceEnd - end a reduction operation started with `PetscSFReduceBegin()` or `PetscSFReduceWithMemTypeBegin()`
1617 
1618   Collective
1619 
1620   Input Parameters:
1621 + sf       - star forest
1622 . unit     - data type
1623 . leafdata - values to reduce
1624 - op       - reduction operation
1625 
1626   Output Parameter:
1627 . rootdata - result of reduction of values from all leaves of each root
1628 
1629   Level: intermediate
1630 
1631 .seealso: `PetscSF`, `PetscSFSetGraph()`, `PetscSFBcastEnd()`, `PetscSFReduceBegin()`, `PetscSFReduceWithMemTypeBegin()`
1632 @*/
1633 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op)
1634 {
1635   PetscFunctionBegin;
1636   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1637   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1638   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1639   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1640   PetscFunctionReturn(PETSC_SUCCESS);
1641 }
1642 
1643 /*@C
1644   PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value,
1645   to be completed with `PetscSFFetchAndOpEnd()`
1646 
1647   Collective
1648 
1649   Input Parameters:
1650 + sf       - star forest
1651 . unit     - data type
1652 . leafdata - leaf values to use in reduction
1653 - op       - operation to use for reduction
1654 
1655   Output Parameters:
1656 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1657 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1658 
1659   Level: advanced
1660 
1661   Note:
1662   The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1663   might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1664   not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1665   integers.
1666 
1667 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1668 @*/
1669 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1670 {
1671   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1672 
1673   PetscFunctionBegin;
1674   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1675   PetscCall(PetscSFSetUp(sf));
1676   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1677   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1678   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1679   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1680   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1681   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1682   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 /*@C
1687   PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by
1688   applying operation using my leaf value, to be completed with `PetscSFFetchAndOpEnd()`
1689 
1690   Collective
1691 
1692   Input Parameters:
1693 + sf              - star forest
1694 . unit            - data type
1695 . rootmtype       - memory type of rootdata
1696 . leafmtype       - memory type of leafdata
1697 . leafdata        - leaf values to use in reduction
1698 . leafupdatemtype - memory type of leafupdate
1699 - op              - operation to use for reduction
1700 
1701   Output Parameters:
1702 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1703 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1704 
1705   Level: advanced
1706 
1707   Note:
1708   See `PetscSFFetchAndOpBegin()` for more details.
1709 
1710 .seealso: `PetscSF`, `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpEnd()`
1711 @*/
1712 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op)
1713 {
1714   PetscFunctionBegin;
1715   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1716   PetscCall(PetscSFSetUp(sf));
1717   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1718   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1719   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1720   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1721   PetscFunctionReturn(PETSC_SUCCESS);
1722 }
1723 
1724 /*@C
1725   PetscSFFetchAndOpEnd - end operation started in matching call to `PetscSFFetchAndOpBegin()` or `PetscSFFetchAndOpWithMemTypeBegin()`
1726   to fetch values from roots and update atomically by applying operation using my leaf value
1727 
1728   Collective
1729 
1730   Input Parameters:
1731 + sf       - star forest
1732 . unit     - data type
1733 . leafdata - leaf values to use in reduction
1734 - op       - operation to use for reduction
1735 
1736   Output Parameters:
1737 + rootdata   - root values to be updated, input state is seen by first process to perform an update
1738 - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1739 
1740   Level: advanced
1741 
1742 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`, `PetscSFFetchAndOpBegin()`, `PetscSFFetchAndOpWithMemTypeBegin()`
1743 @*/
1744 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op)
1745 {
1746   PetscFunctionBegin;
1747   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1748   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1749   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1750   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1751   PetscFunctionReturn(PETSC_SUCCESS);
1752 }
1753 
1754 /*@C
1755   PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with `PetscSFComputeDegreeEnd()`
1756 
1757   Collective
1758 
1759   Input Parameter:
1760 . sf - star forest
1761 
1762   Output Parameter:
1763 . degree - degree of each root vertex
1764 
1765   Level: advanced
1766 
1767   Note:
1768   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1769 
1770 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeEnd()`
1771 @*/
1772 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree)
1773 {
1774   PetscFunctionBegin;
1775   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1776   PetscSFCheckGraphSet(sf, 1);
1777   PetscAssertPointer(degree, 2);
1778   if (!sf->degreeknown) {
1779     PetscInt i, nroots = sf->nroots, maxlocal;
1780     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1781     maxlocal = sf->maxleaf - sf->minleaf + 1;
1782     PetscCall(PetscMalloc1(nroots, &sf->degree));
1783     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1784     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1785     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1786     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1787   }
1788   *degree = NULL;
1789   PetscFunctionReturn(PETSC_SUCCESS);
1790 }
1791 
1792 /*@C
1793   PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with `PetscSFComputeDegreeBegin()`
1794 
1795   Collective
1796 
1797   Input Parameter:
1798 . sf - star forest
1799 
1800   Output Parameter:
1801 . degree - degree of each root vertex
1802 
1803   Level: developer
1804 
1805   Note:
1806   The returned array is owned by `PetscSF` and automatically freed by `PetscSFDestroy()`. Hence there is no need to call `PetscFree()` on it.
1807 
1808 .seealso: `PetscSF`, `PetscSFGatherBegin()`, `PetscSFComputeDegreeBegin()`
1809 @*/
1810 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree)
1811 {
1812   PetscFunctionBegin;
1813   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1814   PetscSFCheckGraphSet(sf, 1);
1815   PetscAssertPointer(degree, 2);
1816   if (!sf->degreeknown) {
1817     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1818     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1819     PetscCall(PetscFree(sf->degreetmp));
1820     sf->degreeknown = PETSC_TRUE;
1821   }
1822   *degree = sf->degree;
1823   PetscFunctionReturn(PETSC_SUCCESS);
1824 }
1825 
1826 /*@C
1827   PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-`PetscSF` returned by `PetscSFGetMultiSF()`).
1828   Each multi-root is assigned index of the corresponding original root.
1829 
1830   Collective
1831 
1832   Input Parameters:
1833 + sf     - star forest
1834 - degree - degree of each root vertex, computed with `PetscSFComputeDegreeBegin()`/`PetscSFComputeDegreeEnd()`
1835 
1836   Output Parameters:
1837 + nMultiRoots             - (optional) number of multi-roots (roots of multi-`PetscSF`)
1838 - multiRootsOrigNumbering - original indices of multi-roots; length of this array is `nMultiRoots`
1839 
1840   Level: developer
1841 
1842   Note:
1843   The returned array `multiRootsOrigNumbering` is newly allocated and should be destroyed with `PetscFree()` when no longer needed.
1844 
1845 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1846 @*/
1847 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1848 {
1849   PetscSF  msf;
1850   PetscInt i, j, k, nroots, nmroots;
1851 
1852   PetscFunctionBegin;
1853   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1854   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1855   if (nroots) PetscAssertPointer(degree, 2);
1856   if (nMultiRoots) PetscAssertPointer(nMultiRoots, 3);
1857   PetscAssertPointer(multiRootsOrigNumbering, 4);
1858   PetscCall(PetscSFGetMultiSF(sf, &msf));
1859   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1860   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1861   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1862     if (!degree[i]) continue;
1863     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1864   }
1865   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1866   if (nMultiRoots) *nMultiRoots = nmroots;
1867   PetscFunctionReturn(PETSC_SUCCESS);
1868 }
1869 
1870 /*@C
1871   PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with `PetscSFGatherEnd()`
1872 
1873   Collective
1874 
1875   Input Parameters:
1876 + sf       - star forest
1877 . unit     - data type
1878 - leafdata - leaf data to gather to roots
1879 
1880   Output Parameter:
1881 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1882 
1883   Level: intermediate
1884 
1885 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1886 @*/
1887 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1888 {
1889   PetscSF multi = NULL;
1890 
1891   PetscFunctionBegin;
1892   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1893   PetscCall(PetscSFSetUp(sf));
1894   PetscCall(PetscSFGetMultiSF(sf, &multi));
1895   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1896   PetscFunctionReturn(PETSC_SUCCESS);
1897 }
1898 
1899 /*@C
1900   PetscSFGatherEnd - ends pointwise gather operation that was started with `PetscSFGatherBegin()`
1901 
1902   Collective
1903 
1904   Input Parameters:
1905 + sf       - star forest
1906 . unit     - data type
1907 - leafdata - leaf data to gather to roots
1908 
1909   Output Parameter:
1910 . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1911 
1912   Level: intermediate
1913 
1914 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1915 @*/
1916 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata)
1917 {
1918   PetscSF multi = NULL;
1919 
1920   PetscFunctionBegin;
1921   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1922   PetscCall(PetscSFGetMultiSF(sf, &multi));
1923   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1924   PetscFunctionReturn(PETSC_SUCCESS);
1925 }
1926 
1927 /*@C
1928   PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with `PetscSFScatterEnd()`
1929 
1930   Collective
1931 
1932   Input Parameters:
1933 + sf            - star forest
1934 . unit          - data type
1935 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1936 
1937   Output Parameter:
1938 . leafdata - leaf data to be update with personal data from each respective root
1939 
1940   Level: intermediate
1941 
1942 .seealso: `PetscSF`, `PetscSFComputeDegreeBegin()`, `PetscSFScatterEnd()`
1943 @*/
1944 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1945 {
1946   PetscSF multi = NULL;
1947 
1948   PetscFunctionBegin;
1949   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1950   PetscCall(PetscSFSetUp(sf));
1951   PetscCall(PetscSFGetMultiSF(sf, &multi));
1952   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1953   PetscFunctionReturn(PETSC_SUCCESS);
1954 }
1955 
1956 /*@C
1957   PetscSFScatterEnd - ends pointwise scatter operation that was started with `PetscSFScatterBegin()`
1958 
1959   Collective
1960 
1961   Input Parameters:
1962 + sf            - star forest
1963 . unit          - data type
1964 - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1965 
1966   Output Parameter:
1967 . leafdata - leaf data to be update with personal data from each respective root
1968 
1969   Level: intermediate
1970 
1971 .seealso: `PetscSF`, `PetscSFComputeDegreeEnd()`, `PetscSFScatterBegin()`
1972 @*/
1973 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata)
1974 {
1975   PetscSF multi = NULL;
1976 
1977   PetscFunctionBegin;
1978   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1979   PetscCall(PetscSFGetMultiSF(sf, &multi));
1980   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1981   PetscFunctionReturn(PETSC_SUCCESS);
1982 }
1983 
1984 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1985 {
1986   PetscInt        i, n, nleaves;
1987   const PetscInt *ilocal = NULL;
1988   PetscHSetI      seen;
1989 
1990   PetscFunctionBegin;
1991   if (PetscDefined(USE_DEBUG)) {
1992     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
1993     PetscCall(PetscHSetICreate(&seen));
1994     for (i = 0; i < nleaves; i++) {
1995       const PetscInt leaf = ilocal ? ilocal[i] : i;
1996       PetscCall(PetscHSetIAdd(seen, leaf));
1997     }
1998     PetscCall(PetscHSetIGetSize(seen, &n));
1999     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
2000     PetscCall(PetscHSetIDestroy(&seen));
2001   }
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 /*@
2006   PetscSFCompose - Compose a new `PetscSF` by putting the second `PetscSF` under the first one in a top (roots) down (leaves) view
2007 
2008   Input Parameters:
2009 + sfA - The first `PetscSF`
2010 - sfB - The second `PetscSF`
2011 
2012   Output Parameter:
2013 . sfBA - The composite `PetscSF`
2014 
2015   Level: developer
2016 
2017   Notes:
2018   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2019   forests, i.e. the same leaf is not connected with different roots.
2020 
2021   `sfA`'s leaf space and `sfB`'s root space might be partially overlapped. The composition builds
2022   a graph with `sfA`'s roots and `sfB`'s leaves only when there is a path between them. Unconnected
2023   nodes (roots or leaves) are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a
2024   `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfA`, then a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on `sfB`, on connected nodes.
2025 
2026 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2027 @*/
2028 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2029 {
2030   const PetscSFNode *remotePointsA, *remotePointsB;
2031   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
2032   const PetscInt    *localPointsA, *localPointsB;
2033   PetscInt          *localPointsBA;
2034   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
2035   PetscBool          denseB;
2036 
2037   PetscFunctionBegin;
2038   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2039   PetscSFCheckGraphSet(sfA, 1);
2040   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2041   PetscSFCheckGraphSet(sfB, 2);
2042   PetscCheckSameComm(sfA, 1, sfB, 2);
2043   PetscAssertPointer(sfBA, 3);
2044   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2045   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2046 
2047   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2048   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2049   /* Make sure that PetscSFBcast{Begin, End}(sfB, ...) works with root data of size
2050      numRootsB; otherwise, garbage will be broadcasted.
2051      Example (comm size = 1):
2052      sfA: 0 <- (0, 0)
2053      sfB: 100 <- (0, 0)
2054           101 <- (0, 1)
2055      Here, we have remotePointsA = [(0, 0)], but for remotePointsA to be a valid tartget
2056      of sfB, it has to be recasted as [(0, 0), (-1, -1)] so that points 100 and 101 would
2057      receive (0, 0) and (-1, -1), respectively, when PetscSFBcast(sfB, ...) is called on
2058      remotePointsA; if not recasted, point 101 would receive a garbage value.             */
2059   PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
2060   for (i = 0; i < numRootsB; i++) {
2061     reorderedRemotePointsA[i].rank  = -1;
2062     reorderedRemotePointsA[i].index = -1;
2063   }
2064   for (i = 0; i < numLeavesA; i++) {
2065     PetscInt localp = localPointsA ? localPointsA[i] : i;
2066 
2067     if (localp >= numRootsB) continue;
2068     reorderedRemotePointsA[localp] = remotePointsA[i];
2069   }
2070   remotePointsA = reorderedRemotePointsA;
2071   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2072   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
2073   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2074     leafdataB[i].rank  = -1;
2075     leafdataB[i].index = -1;
2076   }
2077   PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2078   PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, PetscSafePointerPlusOffset(leafdataB, -minleaf), MPI_REPLACE));
2079   PetscCall(PetscFree(reorderedRemotePointsA));
2080 
2081   denseB = (PetscBool)!localPointsB;
2082   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2083     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
2084     else numLeavesBA++;
2085   }
2086   if (denseB) {
2087     localPointsBA  = NULL;
2088     remotePointsBA = leafdataB;
2089   } else {
2090     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
2091     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
2092     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
2093       const PetscInt l = localPointsB ? localPointsB[i] : i;
2094 
2095       if (leafdataB[l - minleaf].rank == -1) continue;
2096       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
2097       localPointsBA[numLeavesBA]  = l;
2098       numLeavesBA++;
2099     }
2100     PetscCall(PetscFree(leafdataB));
2101   }
2102   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2103   PetscCall(PetscSFSetFromOptions(*sfBA));
2104   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2105   PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107 
2108 /*@
2109   PetscSFComposeInverse - Compose a new `PetscSF` by putting the inverse of the second `PetscSF` under the first one
2110 
2111   Input Parameters:
2112 + sfA - The first `PetscSF`
2113 - sfB - The second `PetscSF`
2114 
2115   Output Parameter:
2116 . sfBA - The composite `PetscSF`.
2117 
2118   Level: developer
2119 
2120   Notes:
2121   Currently, the two `PetscSF`s must be defined on congruent communicators and they must be true star
2122   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2123   second `PetscSF` must have a degree of 1, i.e., no roots have more than one leaf connected.
2124 
2125   `sfA`'s leaf space and `sfB`'s leaf space might be partially overlapped. The composition builds
2126   a graph with `sfA`'s roots and `sfB`'s roots only when there is a path between them. Unconnected
2127   roots are not in `sfBA`. Doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()` on the new `PetscSF` is equivalent to doing a `PetscSFBcastBegin()`/`PetscSFBcastEnd()`
2128   on `sfA`, then
2129   a `PetscSFReduceBegin()`/`PetscSFReduceEnd()` on `sfB`, on connected roots.
2130 
2131 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2132 @*/
2133 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
2134 {
2135   const PetscSFNode *remotePointsA, *remotePointsB;
2136   PetscSFNode       *remotePointsBA;
2137   const PetscInt    *localPointsA, *localPointsB;
2138   PetscSFNode       *reorderedRemotePointsA = NULL;
2139   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2140   MPI_Op             op;
2141 #if defined(PETSC_USE_64BIT_INDICES)
2142   PetscBool iswin;
2143 #endif
2144 
2145   PetscFunctionBegin;
2146   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2147   PetscSFCheckGraphSet(sfA, 1);
2148   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2149   PetscSFCheckGraphSet(sfB, 2);
2150   PetscCheckSameComm(sfA, 1, sfB, 2);
2151   PetscAssertPointer(sfBA, 3);
2152   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2153   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2154 
2155   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2156   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2157 
2158   /* TODO: Check roots of sfB have degree of 1 */
2159   /* Once we implement it, we can replace the MPI_MAXLOC
2160      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2161      We use MPI_MAXLOC only to have a deterministic output from this routine if
2162      the root condition is not meet.
2163    */
2164   op = MPI_MAXLOC;
2165 #if defined(PETSC_USE_64BIT_INDICES)
2166   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2167   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2168   if (iswin) op = MPI_REPLACE;
2169 #endif
2170 
2171   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2172   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2173   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2174     reorderedRemotePointsA[i].rank  = -1;
2175     reorderedRemotePointsA[i].index = -1;
2176   }
2177   if (localPointsA) {
2178     for (i = 0; i < numLeavesA; i++) {
2179       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2180       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2181     }
2182   } else {
2183     for (i = 0; i < numLeavesA; i++) {
2184       if (i > maxleaf || i < minleaf) continue;
2185       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2186     }
2187   }
2188 
2189   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2190   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2191   for (i = 0; i < numRootsB; i++) {
2192     remotePointsBA[i].rank  = -1;
2193     remotePointsBA[i].index = -1;
2194   }
2195 
2196   PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2197   PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, PetscSafePointerPlusOffset(reorderedRemotePointsA, -minleaf), remotePointsBA, op));
2198   PetscCall(PetscFree(reorderedRemotePointsA));
2199   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2200     if (remotePointsBA[i].rank == -1) continue;
2201     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2202     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2203     localPointsBA[numLeavesBA]        = i;
2204     numLeavesBA++;
2205   }
2206   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2207   PetscCall(PetscSFSetFromOptions(*sfBA));
2208   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2209   PetscFunctionReturn(PETSC_SUCCESS);
2210 }
2211 
2212 /*
2213   PetscSFCreateLocalSF_Private - Creates a local `PetscSF` that only has intra-process edges of the global `PetscSF`
2214 
2215   Input Parameter:
2216 . sf - The global `PetscSF`
2217 
2218   Output Parameter:
2219 . out - The local `PetscSF`
2220 
2221 .seealso: `PetscSF`, `PetscSFCreate()`
2222  */
2223 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out)
2224 {
2225   MPI_Comm           comm;
2226   PetscMPIInt        myrank;
2227   const PetscInt    *ilocal;
2228   const PetscSFNode *iremote;
2229   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2230   PetscSFNode       *liremote;
2231   PetscSF            lsf;
2232 
2233   PetscFunctionBegin;
2234   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2235   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2236   else {
2237     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2238     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2239     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2240 
2241     /* Find out local edges and build a local SF */
2242     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2243     for (i = lnleaves = 0; i < nleaves; i++) {
2244       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2245     }
2246     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2247     PetscCall(PetscMalloc1(lnleaves, &liremote));
2248 
2249     for (i = j = 0; i < nleaves; i++) {
2250       if (iremote[i].rank == (PetscInt)myrank) {
2251         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2252         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2253         liremote[j].index = iremote[i].index;
2254         j++;
2255       }
2256     }
2257     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2258     PetscCall(PetscSFSetFromOptions(lsf));
2259     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2260     PetscCall(PetscSFSetUp(lsf));
2261     *out = lsf;
2262   }
2263   PetscFunctionReturn(PETSC_SUCCESS);
2264 }
2265 
2266 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2267 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata)
2268 {
2269   PetscMemType rootmtype, leafmtype;
2270 
2271   PetscFunctionBegin;
2272   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2273   PetscCall(PetscSFSetUp(sf));
2274   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2275   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2276   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2277   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2278   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2279   PetscFunctionReturn(PETSC_SUCCESS);
2280 }
2281 
2282 /*@
2283   PetscSFConcatenate - concatenate multiple `PetscSF` into one
2284 
2285   Input Parameters:
2286 + comm        - the communicator
2287 . nsfs        - the number of input `PetscSF`
2288 . sfs         - the array of input `PetscSF`
2289 . rootMode    - the root mode specifying how roots are handled
2290 - leafOffsets - the array of local leaf offsets, one for each input `PetscSF`, or `NULL` for contiguous storage
2291 
2292   Output Parameter:
2293 . newsf - The resulting `PetscSF`
2294 
2295   Level: advanced
2296 
2297   Notes:
2298   The communicator of all `PetscSF`s in `sfs` must be comm.
2299 
2300   Leaves are always concatenated locally, keeping them ordered by the input `PetscSF` index and original local order.
2301 
2302   The offsets in `leafOffsets` are added to the original leaf indices.
2303 
2304   If all input SFs use contiguous leaf storage (`ilocal` = `NULL`), `leafOffsets` can be passed as `NULL` as well.
2305   In this case, `NULL` is also passed as `ilocal` to the resulting `PetscSF`.
2306 
2307   If any input `PetscSF` has non-null `ilocal`, `leafOffsets` is needed to distinguish leaves from different input `PetscSF`s.
2308   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2309 
2310   All root modes retain the essential connectivity condition.
2311   If two leaves of the same input `PetscSF` are connected (sharing the same root), they are also connected in the output `PetscSF`.
2312   Parameter `rootMode` controls how the input root spaces are combined.
2313   For `PETSCSF_CONCATENATE_ROOTMODE_SHARED`, the root space is considered the same for each input `PetscSF` (checked in debug mode)
2314   and is also the same in the output `PetscSF`.
2315   For `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, the input root spaces are taken as separate and joined.
2316   `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` joins the root spaces locally;
2317   roots of sfs[0], sfs[1], sfs[2], ... are joined on each rank separately, ordered by input `PetscSF` and original local index, and renumbered contiguously.
2318   `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL` joins the root spaces globally;
2319   roots of sfs[0], sfs[1], sfs[2], ... are joined globally, ordered by input `PetscSF` index and original global index, and renumbered contiguously;
2320   the original root ranks are ignored.
2321   For both `PETSCSF_CONCATENATE_ROOTMODE_LOCAL` and `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`,
2322   the output `PetscSF`'s root layout is such that the local number of roots is a sum of the input `PetscSF`'s local numbers of roots on each rank
2323   to keep the load balancing.
2324   However, for `PETSCSF_CONCATENATE_ROOTMODE_GLOBAL`, roots can move to different ranks.
2325 
2326   Example:
2327   We can use src/vec/is/sf/tests/ex18.c to compare the root modes. By running
2328 .vb
2329   make -C $PETSC_DIR/src/vec/is/sf/tests ex18
2330   for m in {local,global,shared}; do
2331     mpirun -n 2 $PETSC_DIR/src/vec/is/sf/tests/ex18 -nsfs 2 -n 2 -root_mode $m -sf_view
2332   done
2333 .ve
2334   we generate two identical `PetscSF`s sf_0 and sf_1,
2335 .vb
2336   PetscSF Object: sf_0 2 MPI processes
2337     type: basic
2338     rank #leaves #roots
2339     [ 0]       4      2
2340     [ 1]       4      2
2341     leaves      roots       roots in global numbering
2342     ( 0,  0) <- ( 0,  0)  =   0
2343     ( 0,  1) <- ( 0,  1)  =   1
2344     ( 0,  2) <- ( 1,  0)  =   2
2345     ( 0,  3) <- ( 1,  1)  =   3
2346     ( 1,  0) <- ( 0,  0)  =   0
2347     ( 1,  1) <- ( 0,  1)  =   1
2348     ( 1,  2) <- ( 1,  0)  =   2
2349     ( 1,  3) <- ( 1,  1)  =   3
2350 .ve
2351   and pass them to `PetscSFConcatenate()` along with different choices of `rootMode`, yielding different result_sf\:
2352 .vb
2353   rootMode = local:
2354   PetscSF Object: result_sf 2 MPI processes
2355     type: basic
2356     rank #leaves #roots
2357     [ 0]       8      4
2358     [ 1]       8      4
2359     leaves      roots       roots in global numbering
2360     ( 0,  0) <- ( 0,  0)  =   0
2361     ( 0,  1) <- ( 0,  1)  =   1
2362     ( 0,  2) <- ( 1,  0)  =   4
2363     ( 0,  3) <- ( 1,  1)  =   5
2364     ( 0,  4) <- ( 0,  2)  =   2
2365     ( 0,  5) <- ( 0,  3)  =   3
2366     ( 0,  6) <- ( 1,  2)  =   6
2367     ( 0,  7) <- ( 1,  3)  =   7
2368     ( 1,  0) <- ( 0,  0)  =   0
2369     ( 1,  1) <- ( 0,  1)  =   1
2370     ( 1,  2) <- ( 1,  0)  =   4
2371     ( 1,  3) <- ( 1,  1)  =   5
2372     ( 1,  4) <- ( 0,  2)  =   2
2373     ( 1,  5) <- ( 0,  3)  =   3
2374     ( 1,  6) <- ( 1,  2)  =   6
2375     ( 1,  7) <- ( 1,  3)  =   7
2376 
2377   rootMode = global:
2378   PetscSF Object: result_sf 2 MPI processes
2379     type: basic
2380     rank #leaves #roots
2381     [ 0]       8      4
2382     [ 1]       8      4
2383     leaves      roots       roots in global numbering
2384     ( 0,  0) <- ( 0,  0)  =   0
2385     ( 0,  1) <- ( 0,  1)  =   1
2386     ( 0,  2) <- ( 0,  2)  =   2
2387     ( 0,  3) <- ( 0,  3)  =   3
2388     ( 0,  4) <- ( 1,  0)  =   4
2389     ( 0,  5) <- ( 1,  1)  =   5
2390     ( 0,  6) <- ( 1,  2)  =   6
2391     ( 0,  7) <- ( 1,  3)  =   7
2392     ( 1,  0) <- ( 0,  0)  =   0
2393     ( 1,  1) <- ( 0,  1)  =   1
2394     ( 1,  2) <- ( 0,  2)  =   2
2395     ( 1,  3) <- ( 0,  3)  =   3
2396     ( 1,  4) <- ( 1,  0)  =   4
2397     ( 1,  5) <- ( 1,  1)  =   5
2398     ( 1,  6) <- ( 1,  2)  =   6
2399     ( 1,  7) <- ( 1,  3)  =   7
2400 
2401   rootMode = shared:
2402   PetscSF Object: result_sf 2 MPI processes
2403     type: basic
2404     rank #leaves #roots
2405     [ 0]       8      2
2406     [ 1]       8      2
2407     leaves      roots       roots in global numbering
2408     ( 0,  0) <- ( 0,  0)  =   0
2409     ( 0,  1) <- ( 0,  1)  =   1
2410     ( 0,  2) <- ( 1,  0)  =   2
2411     ( 0,  3) <- ( 1,  1)  =   3
2412     ( 0,  4) <- ( 0,  0)  =   0
2413     ( 0,  5) <- ( 0,  1)  =   1
2414     ( 0,  6) <- ( 1,  0)  =   2
2415     ( 0,  7) <- ( 1,  1)  =   3
2416     ( 1,  0) <- ( 0,  0)  =   0
2417     ( 1,  1) <- ( 0,  1)  =   1
2418     ( 1,  2) <- ( 1,  0)  =   2
2419     ( 1,  3) <- ( 1,  1)  =   3
2420     ( 1,  4) <- ( 0,  0)  =   0
2421     ( 1,  5) <- ( 0,  1)  =   1
2422     ( 1,  6) <- ( 1,  0)  =   2
2423     ( 1,  7) <- ( 1,  1)  =   3
2424 .ve
2425 
2426 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFConcatenateRootMode`
2427 @*/
2428 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscSFConcatenateRootMode rootMode, PetscInt leafOffsets[], PetscSF *newsf)
2429 {
2430   PetscInt     i, s, nLeaves, nRoots;
2431   PetscInt    *leafArrayOffsets;
2432   PetscInt    *ilocal_new;
2433   PetscSFNode *iremote_new;
2434   PetscBool    all_ilocal_null = PETSC_FALSE;
2435   PetscLayout  glayout         = NULL;
2436   PetscInt    *gremote         = NULL;
2437   PetscMPIInt  rank, size;
2438 
2439   PetscFunctionBegin;
2440   if (PetscDefined(USE_DEBUG)) {
2441     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2442 
2443     PetscCall(PetscSFCreate(comm, &dummy));
2444     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2445     PetscAssertPointer(sfs, 3);
2446     for (i = 0; i < nsfs; i++) {
2447       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2448       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2449     }
2450     PetscValidLogicalCollectiveEnum(dummy, rootMode, 4);
2451     if (leafOffsets) PetscAssertPointer(leafOffsets, 5);
2452     PetscAssertPointer(newsf, 6);
2453     PetscCall(PetscSFDestroy(&dummy));
2454   }
2455   if (!nsfs) {
2456     PetscCall(PetscSFCreate(comm, newsf));
2457     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2458     PetscFunctionReturn(PETSC_SUCCESS);
2459   }
2460   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2461   PetscCallMPI(MPI_Comm_size(comm, &size));
2462 
2463   /* Calculate leaf array offsets */
2464   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2465   leafArrayOffsets[0] = 0;
2466   for (s = 0; s < nsfs; s++) {
2467     PetscInt nl;
2468 
2469     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2470     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2471   }
2472   nLeaves = leafArrayOffsets[nsfs];
2473 
2474   /* Calculate number of roots */
2475   switch (rootMode) {
2476   case PETSCSF_CONCATENATE_ROOTMODE_SHARED: {
2477     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2478     if (PetscDefined(USE_DEBUG)) {
2479       for (s = 1; s < nsfs; s++) {
2480         PetscInt nr;
2481 
2482         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2483         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "rootMode = %s but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", PetscSFConcatenateRootModes[rootMode], s, nr, nRoots);
2484       }
2485     }
2486   } break;
2487   case PETSCSF_CONCATENATE_ROOTMODE_GLOBAL: {
2488     /* Calculate also global layout in this case */
2489     PetscInt    *nls;
2490     PetscLayout *lts;
2491     PetscInt   **inds;
2492     PetscInt     j;
2493     PetscInt     rootOffset = 0;
2494 
2495     PetscCall(PetscCalloc3(nsfs, &lts, nsfs, &nls, nsfs, &inds));
2496     PetscCall(PetscLayoutCreate(comm, &glayout));
2497     glayout->bs = 1;
2498     glayout->n  = 0;
2499     glayout->N  = 0;
2500     for (s = 0; s < nsfs; s++) {
2501       PetscCall(PetscSFGetGraphLayout(sfs[s], &lts[s], &nls[s], NULL, &inds[s]));
2502       glayout->n += lts[s]->n;
2503       glayout->N += lts[s]->N;
2504     }
2505     PetscCall(PetscLayoutSetUp(glayout));
2506     PetscCall(PetscMalloc1(nLeaves, &gremote));
2507     for (s = 0, j = 0; s < nsfs; s++) {
2508       for (i = 0; i < nls[s]; i++, j++) gremote[j] = inds[s][i] + rootOffset;
2509       rootOffset += lts[s]->N;
2510       PetscCall(PetscLayoutDestroy(&lts[s]));
2511       PetscCall(PetscFree(inds[s]));
2512     }
2513     PetscCall(PetscFree3(lts, nls, inds));
2514     nRoots = glayout->N;
2515   } break;
2516   case PETSCSF_CONCATENATE_ROOTMODE_LOCAL:
2517     /* nRoots calculated later in this case */
2518     break;
2519   default:
2520     SETERRQ(comm, PETSC_ERR_ARG_WRONG, "Invalid PetscSFConcatenateRootMode %d", rootMode);
2521   }
2522 
2523   if (!leafOffsets) {
2524     all_ilocal_null = PETSC_TRUE;
2525     for (s = 0; s < nsfs; s++) {
2526       const PetscInt *ilocal;
2527 
2528       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2529       if (ilocal) {
2530         all_ilocal_null = PETSC_FALSE;
2531         break;
2532       }
2533     }
2534     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2535   }
2536 
2537   /* Renumber and concatenate local leaves */
2538   ilocal_new = NULL;
2539   if (!all_ilocal_null) {
2540     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2541     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2542     for (s = 0; s < nsfs; s++) {
2543       const PetscInt *ilocal;
2544       PetscInt       *ilocal_l = PetscSafePointerPlusOffset(ilocal_new, leafArrayOffsets[s]);
2545       PetscInt        i, nleaves_l;
2546 
2547       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2548       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2549     }
2550   }
2551 
2552   /* Renumber and concatenate remote roots */
2553   if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL || rootMode == PETSCSF_CONCATENATE_ROOTMODE_SHARED) {
2554     PetscInt rootOffset = 0;
2555 
2556     PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2557     for (i = 0; i < nLeaves; i++) {
2558       iremote_new[i].rank  = -1;
2559       iremote_new[i].index = -1;
2560     }
2561     for (s = 0; s < nsfs; s++) {
2562       PetscInt           i, nl, nr;
2563       PetscSF            tmp_sf;
2564       const PetscSFNode *iremote;
2565       PetscSFNode       *tmp_rootdata;
2566       PetscSFNode       *tmp_leafdata = PetscSafePointerPlusOffset(iremote_new, leafArrayOffsets[s]);
2567 
2568       PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2569       PetscCall(PetscSFCreate(comm, &tmp_sf));
2570       /* create helper SF with contiguous leaves */
2571       PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2572       PetscCall(PetscSFSetUp(tmp_sf));
2573       PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2574       if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) {
2575         for (i = 0; i < nr; i++) {
2576           tmp_rootdata[i].index = i + rootOffset;
2577           tmp_rootdata[i].rank  = (PetscInt)rank;
2578         }
2579         rootOffset += nr;
2580       } else {
2581         for (i = 0; i < nr; i++) {
2582           tmp_rootdata[i].index = i;
2583           tmp_rootdata[i].rank  = (PetscInt)rank;
2584         }
2585       }
2586       PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2587       PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2588       PetscCall(PetscSFDestroy(&tmp_sf));
2589       PetscCall(PetscFree(tmp_rootdata));
2590     }
2591     if (rootMode == PETSCSF_CONCATENATE_ROOTMODE_LOCAL) nRoots = rootOffset; // else nRoots already calculated above
2592 
2593     /* Build the new SF */
2594     PetscCall(PetscSFCreate(comm, newsf));
2595     PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2596   } else {
2597     /* Build the new SF */
2598     PetscCall(PetscSFCreate(comm, newsf));
2599     PetscCall(PetscSFSetGraphLayout(*newsf, glayout, nLeaves, ilocal_new, PETSC_OWN_POINTER, gremote));
2600   }
2601   PetscCall(PetscSFSetUp(*newsf));
2602   PetscCall(PetscSFViewFromOptions(*newsf, NULL, "-sf_concat_view"));
2603   PetscCall(PetscLayoutDestroy(&glayout));
2604   PetscCall(PetscFree(gremote));
2605   PetscCall(PetscFree(leafArrayOffsets));
2606   PetscFunctionReturn(PETSC_SUCCESS);
2607 }
2608