xref: /petsc/src/vec/is/sf/interface/sf.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7 #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11 #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF, int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #define PetscSFCheckGraphSet(sf, arg) PetscCheck((sf)->graphset, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()", (arg), #sf, PETSC_FUNCTION_NAME);
19 #else
20 #define PetscSFCheckGraphSet(sf, arg) \
21   do { \
22   } while (0)
23 #endif
24 #endif
25 
26 const char *const PetscSFDuplicateOptions[] = {"CONFONLY", "RANKS", "GRAPH", "PetscSFDuplicateOption", "PETSCSF_DUPLICATE_", NULL};
27 
28 /*@
29    PetscSFCreate - create a star forest communication context
30 
31    Collective
32 
33    Input Parameter:
34 .  comm - communicator on which the star forest will operate
35 
36    Output Parameter:
37 .  sf - new star forest context
38 
39    Options Database Keys:
40 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
41 .  -sf_type window    -Use MPI-3 one-sided window for communication
42 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
43 
44    Level: intermediate
45 
46    Notes:
47    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
48    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
49    SFs are optimized and they have better performance than general SFs.
50 
51 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
52 @*/
53 PetscErrorCode PetscSFCreate(MPI_Comm comm, PetscSF *sf) {
54   PetscSF b;
55 
56   PetscFunctionBegin;
57   PetscValidPointer(sf, 2);
58   PetscCall(PetscSFInitializePackage());
59 
60   PetscCall(PetscHeaderCreate(b, PETSCSF_CLASSID, "PetscSF", "Star Forest", "PetscSF", comm, PetscSFDestroy, PetscSFView));
61 
62   b->nroots    = -1;
63   b->nleaves   = -1;
64   b->minleaf   = PETSC_MAX_INT;
65   b->maxleaf   = PETSC_MIN_INT;
66   b->nranks    = -1;
67   b->rankorder = PETSC_TRUE;
68   b->ingroup   = MPI_GROUP_NULL;
69   b->outgroup  = MPI_GROUP_NULL;
70   b->graphset  = PETSC_FALSE;
71 #if defined(PETSC_HAVE_DEVICE)
72   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
73   b->use_stream_aware_mpi = PETSC_FALSE;
74   b->unknown_input_stream = PETSC_FALSE;
75 #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
76   b->backend = PETSCSF_BACKEND_KOKKOS;
77 #elif defined(PETSC_HAVE_CUDA)
78   b->backend = PETSCSF_BACKEND_CUDA;
79 #elif defined(PETSC_HAVE_HIP)
80   b->backend = PETSCSF_BACKEND_HIP;
81 #endif
82 
83 #if defined(PETSC_HAVE_NVSHMEM)
84   b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
85   b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
86   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem", &b->use_nvshmem, NULL));
87   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_nvshmem_get", &b->use_nvshmem_get, NULL));
88 #endif
89 #endif
90   b->vscat.from_n = -1;
91   b->vscat.to_n   = -1;
92   b->vscat.unit   = MPIU_SCALAR;
93   *sf             = b;
94   PetscFunctionReturn(0);
95 }
96 
97 /*@
98    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
99 
100    Collective
101 
102    Input Parameter:
103 .  sf - star forest
104 
105    Level: advanced
106 
107 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
108 @*/
109 PetscErrorCode PetscSFReset(PetscSF sf) {
110   PetscFunctionBegin;
111   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
112   PetscTryTypeMethod(sf, Reset);
113   sf->nroots   = -1;
114   sf->nleaves  = -1;
115   sf->minleaf  = PETSC_MAX_INT;
116   sf->maxleaf  = PETSC_MIN_INT;
117   sf->mine     = NULL;
118   sf->remote   = NULL;
119   sf->graphset = PETSC_FALSE;
120   PetscCall(PetscFree(sf->mine_alloc));
121   PetscCall(PetscFree(sf->remote_alloc));
122   sf->nranks = -1;
123   PetscCall(PetscFree4(sf->ranks, sf->roffset, sf->rmine, sf->rremote));
124   sf->degreeknown = PETSC_FALSE;
125   PetscCall(PetscFree(sf->degree));
126   if (sf->ingroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
127   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
128   if (sf->multi) sf->multi->multi = NULL;
129   PetscCall(PetscSFDestroy(&sf->multi));
130   PetscCall(PetscLayoutDestroy(&sf->map));
131 
132 #if defined(PETSC_HAVE_DEVICE)
133   for (PetscInt i = 0; i < 2; i++) PetscCall(PetscSFFree(sf, PETSC_MEMTYPE_DEVICE, sf->rmine_d[i]));
134 #endif
135 
136   sf->setupcalled = PETSC_FALSE;
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141    PetscSFSetType - Set the PetscSF communication implementation
142 
143    Collective on PetscSF
144 
145    Input Parameters:
146 +  sf - the PetscSF context
147 -  type - a known method
148 
149    Options Database Key:
150 .  -sf_type <type> - Sets the method; use -help for a list
151    of available methods (for instance, window, basic, neighbor)
152 
153    Notes:
154    See "include/petscsf.h" for available methods (for instance)
155 +    PETSCSFWINDOW - MPI-2/3 one-sided
156 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
157 
158   Level: intermediate
159 
160 .seealso: `PetscSFType`, `PetscSFCreate()`
161 @*/
162 PetscErrorCode PetscSFSetType(PetscSF sf, PetscSFType type) {
163   PetscBool match;
164   PetscErrorCode (*r)(PetscSF);
165 
166   PetscFunctionBegin;
167   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
168   PetscValidCharPointer(type, 2);
169 
170   PetscCall(PetscObjectTypeCompare((PetscObject)sf, type, &match));
171   if (match) PetscFunctionReturn(0);
172 
173   PetscCall(PetscFunctionListFind(PetscSFList, type, &r));
174   PetscCheck(r, PETSC_COMM_SELF, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unable to find requested PetscSF type %s", type);
175   /* Destroy the previous PetscSF implementation context */
176   PetscTryTypeMethod(sf, Destroy);
177   PetscCall(PetscMemzero(sf->ops, sizeof(*sf->ops)));
178   PetscCall(PetscObjectChangeTypeName((PetscObject)sf, type));
179   PetscCall((*r)(sf));
180   PetscFunctionReturn(0);
181 }
182 
183 /*@C
184   PetscSFGetType - Get the PetscSF communication implementation
185 
186   Not Collective
187 
188   Input Parameter:
189 . sf  - the PetscSF context
190 
191   Output Parameter:
192 . type - the PetscSF type name
193 
194   Level: intermediate
195 
196 .seealso: `PetscSFSetType()`, `PetscSFCreate()`
197 @*/
198 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) {
199   PetscFunctionBegin;
200   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
201   PetscValidPointer(type, 2);
202   *type = ((PetscObject)sf)->type_name;
203   PetscFunctionReturn(0);
204 }
205 
206 /*@C
207    PetscSFDestroy - destroy star forest
208 
209    Collective
210 
211    Input Parameter:
212 .  sf - address of star forest
213 
214    Level: intermediate
215 
216 .seealso: `PetscSFCreate()`, `PetscSFReset()`
217 @*/
218 PetscErrorCode PetscSFDestroy(PetscSF *sf) {
219   PetscFunctionBegin;
220   if (!*sf) PetscFunctionReturn(0);
221   PetscValidHeaderSpecific((*sf), PETSCSF_CLASSID, 1);
222   if (--((PetscObject)(*sf))->refct > 0) {
223     *sf = NULL;
224     PetscFunctionReturn(0);
225   }
226   PetscCall(PetscSFReset(*sf));
227   PetscTryTypeMethod((*sf), Destroy);
228   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
229   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
230   PetscCall(PetscHeaderDestroy(sf));
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) {
235   PetscInt           i, nleaves;
236   PetscMPIInt        size;
237   const PetscInt    *ilocal;
238   const PetscSFNode *iremote;
239 
240   PetscFunctionBegin;
241   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
242   PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, &iremote));
243   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
244   for (i = 0; i < nleaves; i++) {
245     const PetscInt rank   = iremote[i].rank;
246     const PetscInt remote = iremote[i].index;
247     const PetscInt leaf   = ilocal ? ilocal[i] : i;
248     PetscCheck(rank >= 0 && rank < size, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)", rank, i, size);
249     PetscCheck(remote >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0", remote, i);
250     PetscCheck(leaf >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0", leaf, i);
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 /*@
256    PetscSFSetUp - set up communication structures
257 
258    Collective
259 
260    Input Parameter:
261 .  sf - star forest communication object
262 
263    Level: beginner
264 
265 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()`
266 @*/
267 PetscErrorCode PetscSFSetUp(PetscSF sf) {
268   PetscFunctionBegin;
269   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
270   PetscSFCheckGraphSet(sf, 1);
271   if (sf->setupcalled) PetscFunctionReturn(0);
272   PetscCall(PetscLogEventBegin(PETSCSF_SetUp, sf, 0, 0, 0));
273   PetscCall(PetscSFCheckGraphValid_Private(sf));
274   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf, PETSCSFBASIC)); /* Zero all sf->ops */
275   PetscTryTypeMethod(sf, SetUp);
276 #if defined(PETSC_HAVE_CUDA)
277   if (sf->backend == PETSCSF_BACKEND_CUDA) {
278     sf->ops->Malloc = PetscSFMalloc_CUDA;
279     sf->ops->Free   = PetscSFFree_CUDA;
280   }
281 #endif
282 #if defined(PETSC_HAVE_HIP)
283   if (sf->backend == PETSCSF_BACKEND_HIP) {
284     sf->ops->Malloc = PetscSFMalloc_HIP;
285     sf->ops->Free   = PetscSFFree_HIP;
286   }
287 #endif
288 
289 #
290 #if defined(PETSC_HAVE_KOKKOS)
291   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
292     sf->ops->Malloc = PetscSFMalloc_Kokkos;
293     sf->ops->Free   = PetscSFFree_Kokkos;
294   }
295 #endif
296   PetscCall(PetscLogEventEnd(PETSCSF_SetUp, sf, 0, 0, 0));
297   sf->setupcalled = PETSC_TRUE;
298   PetscFunctionReturn(0);
299 }
300 
301 /*@
302    PetscSFSetFromOptions - set PetscSF options using the options database
303 
304    Logically Collective
305 
306    Input Parameter:
307 .  sf - star forest
308 
309    Options Database Keys:
310 +  -sf_type               - implementation type, see PetscSFSetType()
311 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
312 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
313                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
314                             If true, this option only works with -use_gpu_aware_mpi 1.
315 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
316                                If true, this option only works with -use_gpu_aware_mpi 1.
317 
318 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
319                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
320                               the only available is kokkos.
321 
322    Level: intermediate
323 @*/
324 PetscErrorCode PetscSFSetFromOptions(PetscSF sf) {
325   PetscSFType deft;
326   char        type[256];
327   PetscBool   flg;
328 
329   PetscFunctionBegin;
330   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
331   PetscObjectOptionsBegin((PetscObject)sf);
332   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
333   PetscCall(PetscOptionsFList("-sf_type", "PetscSF implementation type", "PetscSFSetType", PetscSFList, deft, type, sizeof(type), &flg));
334   PetscCall(PetscSFSetType(sf, flg ? type : deft));
335   PetscCall(PetscOptionsBool("-sf_rank_order", "sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise", "PetscSFSetRankOrder", sf->rankorder, &sf->rankorder, NULL));
336 #if defined(PETSC_HAVE_DEVICE)
337   {
338     char      backendstr[32] = {0};
339     PetscBool isCuda = PETSC_FALSE, isHip = PETSC_FALSE, isKokkos = PETSC_FALSE, set;
340     /* Change the defaults set in PetscSFCreate() with command line options */
341     PetscCall(PetscOptionsBool("-sf_unknown_input_stream", "SF root/leafdata is computed on arbitary streams unknown to SF", "PetscSFSetFromOptions", sf->unknown_input_stream, &sf->unknown_input_stream, NULL));
342     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi", "Assume the underlying MPI is cuda-stream aware", "PetscSFSetFromOptions", sf->use_stream_aware_mpi, &sf->use_stream_aware_mpi, NULL));
343     PetscCall(PetscOptionsString("-sf_backend", "Select the device backend SF uses", "PetscSFSetFromOptions", NULL, backendstr, sizeof(backendstr), &set));
344     PetscCall(PetscStrcasecmp("cuda", backendstr, &isCuda));
345     PetscCall(PetscStrcasecmp("kokkos", backendstr, &isKokkos));
346     PetscCall(PetscStrcasecmp("hip", backendstr, &isHip));
347 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
348     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
349     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
350     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
351     else PetscCheck(!set, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
352 #elif defined(PETSC_HAVE_KOKKOS)
353     PetscCheck(!set || isKokkos, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-sf_backend %s is not supported. You can only choose kokkos", backendstr);
354 #endif
355   }
356 #endif
357   PetscTryTypeMethod(sf, SetFromOptions, PetscOptionsObject);
358   PetscOptionsEnd();
359   PetscFunctionReturn(0);
360 }
361 
362 /*@
363    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
364 
365    Logically Collective
366 
367    Input Parameters:
368 +  sf - star forest
369 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
370 
371    Level: advanced
372 
373 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
374 @*/
375 PetscErrorCode PetscSFSetRankOrder(PetscSF sf, PetscBool flg) {
376   PetscFunctionBegin;
377   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
378   PetscValidLogicalCollectiveBool(sf, flg, 2);
379   PetscCheck(!sf->multi, PetscObjectComm((PetscObject)sf), PETSC_ERR_ARG_WRONGSTATE, "Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
380   sf->rankorder = flg;
381   PetscFunctionReturn(0);
382 }
383 
384 /*@C
385    PetscSFSetGraph - Set a parallel star forest
386 
387    Collective
388 
389    Input Parameters:
390 +  sf - star forest
391 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
392 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
393 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
394 during setup in debug mode)
395 .  localmode - copy mode for ilocal
396 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
397 during setup in debug mode)
398 -  remotemode - copy mode for iremote
399 
400    Level: intermediate
401 
402    Notes:
403    Leaf indices in ilocal must be unique, otherwise an error occurs.
404 
405    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
406    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
407    PETSc might modify the respective array;
408    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
409    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
410 
411    Fortran Notes:
412    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
413 
414    Developer Notes:
415    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
416    This also allows to compare leaf sets of two SFs easily.
417 
418 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
419 @*/
420 PetscErrorCode PetscSFSetGraph(PetscSF sf, PetscInt nroots, PetscInt nleaves, PetscInt *ilocal, PetscCopyMode localmode, PetscSFNode *iremote, PetscCopyMode remotemode) {
421   PetscBool unique, contiguous;
422 
423   PetscFunctionBegin;
424   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
425   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal, 4);
426   if (nleaves > 0) PetscValidPointer(iremote, 6);
427   PetscCheck(nroots >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nroots %" PetscInt_FMT ", cannot be negative", nroots);
428   PetscCheck(nleaves >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nleaves %" PetscInt_FMT ", cannot be negative", nleaves);
429   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
430    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
431   PetscCheck((int)localmode >= PETSC_COPY_VALUES && localmode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong localmode %d", localmode);
432   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Wrong remotemode %d", remotemode);
433 
434   if (sf->nroots >= 0) { /* Reset only if graph already set */
435     PetscCall(PetscSFReset(sf));
436   }
437 
438   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph, sf, 0, 0, 0));
439 
440   sf->nroots  = nroots;
441   sf->nleaves = nleaves;
442 
443   if (localmode == PETSC_COPY_VALUES && ilocal) {
444     PetscInt *tlocal = NULL;
445 
446     PetscCall(PetscMalloc1(nleaves, &tlocal));
447     PetscCall(PetscArraycpy(tlocal, ilocal, nleaves));
448     ilocal = tlocal;
449   }
450   if (remotemode == PETSC_COPY_VALUES) {
451     PetscSFNode *tremote = NULL;
452 
453     PetscCall(PetscMalloc1(nleaves, &tremote));
454     PetscCall(PetscArraycpy(tremote, iremote, nleaves));
455     iremote = tremote;
456   }
457 
458   if (nleaves && ilocal) {
459     PetscSFNode work;
460 
461     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
462     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
463     unique = PetscNot(unique);
464     PetscCheck(sf->allow_multi_leaves || unique, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Input ilocal has duplicate entries which is not allowed for this PetscSF");
465     sf->minleaf = ilocal[0];
466     sf->maxleaf = ilocal[nleaves - 1];
467     contiguous  = (PetscBool)(unique && ilocal[0] == 0 && ilocal[nleaves - 1] == nleaves - 1);
468   } else {
469     sf->minleaf = 0;
470     sf->maxleaf = nleaves - 1;
471     unique      = PETSC_TRUE;
472     contiguous  = PETSC_TRUE;
473   }
474 
475   if (contiguous) {
476     if (localmode == PETSC_USE_POINTER) {
477       ilocal = NULL;
478     } else {
479       PetscCall(PetscFree(ilocal));
480     }
481   }
482   sf->mine = ilocal;
483   if (localmode == PETSC_USE_POINTER) {
484     sf->mine_alloc = NULL;
485   } else {
486     sf->mine_alloc = ilocal;
487   }
488   sf->remote = iremote;
489   if (remotemode == PETSC_USE_POINTER) {
490     sf->remote_alloc = NULL;
491   } else {
492     sf->remote_alloc = iremote;
493   }
494   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph, sf, 0, 0, 0));
495   sf->graphset = PETSC_TRUE;
496   PetscFunctionReturn(0);
497 }
498 
499 /*@
500   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
501 
502   Collective
503 
504   Input Parameters:
505 + sf      - The PetscSF
506 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
507 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
508 
509   Notes:
510   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
511   n and N are local and global sizes of x respectively.
512 
513   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
514   sequential vectors y on all ranks.
515 
516   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
517   sequential vector y on rank 0.
518 
519   In above cases, entries of x are roots and entries of y are leaves.
520 
521   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
522   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
523   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
524   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
525   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
526 
527   In this case, roots and leaves are symmetric.
528 
529   Level: intermediate
530  @*/
531 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf, PetscLayout map, PetscSFPattern pattern) {
532   MPI_Comm    comm;
533   PetscInt    n, N, res[2];
534   PetscMPIInt rank, size;
535   PetscSFType type;
536 
537   PetscFunctionBegin;
538   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
539   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map, 2);
540   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
541   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL, comm, PETSC_ERR_ARG_OUTOFRANGE, "Unsupported PetscSFPattern %d", pattern);
542   PetscCallMPI(MPI_Comm_rank(comm, &rank));
543   PetscCallMPI(MPI_Comm_size(comm, &size));
544 
545   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
546     type = PETSCSFALLTOALL;
547     PetscCall(PetscLayoutCreate(comm, &sf->map));
548     PetscCall(PetscLayoutSetLocalSize(sf->map, size));
549     PetscCall(PetscLayoutSetSize(sf->map, ((PetscInt)size) * size));
550     PetscCall(PetscLayoutSetUp(sf->map));
551   } else {
552     PetscCall(PetscLayoutGetLocalSize(map, &n));
553     PetscCall(PetscLayoutGetSize(map, &N));
554     res[0] = n;
555     res[1] = -n;
556     /* Check if n are same over all ranks so that we can optimize it */
557     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, res, 2, MPIU_INT, MPI_MAX, comm));
558     if (res[0] == -res[1]) { /* same n */
559       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
560     } else {
561       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
562     }
563     PetscCall(PetscLayoutReference(map, &sf->map));
564   }
565   PetscCall(PetscSFSetType(sf, type));
566 
567   sf->pattern = pattern;
568   sf->mine    = NULL; /* Contiguous */
569 
570   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
571      Also set other easy stuff.
572    */
573   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
574     sf->nleaves = N;
575     sf->nroots  = n;
576     sf->nranks  = size;
577     sf->minleaf = 0;
578     sf->maxleaf = N - 1;
579   } else if (pattern == PETSCSF_PATTERN_GATHER) {
580     sf->nleaves = rank ? 0 : N;
581     sf->nroots  = n;
582     sf->nranks  = rank ? 0 : size;
583     sf->minleaf = 0;
584     sf->maxleaf = rank ? -1 : N - 1;
585   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
586     sf->nleaves = size;
587     sf->nroots  = size;
588     sf->nranks  = size;
589     sf->minleaf = 0;
590     sf->maxleaf = size - 1;
591   }
592   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
593   sf->graphset = PETSC_TRUE;
594   PetscFunctionReturn(0);
595 }
596 
597 /*@
598    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
599 
600    Collective
601 
602    Input Parameter:
603 .  sf - star forest to invert
604 
605    Output Parameter:
606 .  isf - inverse of sf
607 
608    Level: advanced
609 
610    Notes:
611    All roots must have degree 1.
612 
613    The local space may be a permutation, but cannot be sparse.
614 
615 .seealso: `PetscSFSetGraph()`
616 @*/
617 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf, PetscSF *isf) {
618   PetscMPIInt     rank;
619   PetscInt        i, nroots, nleaves, maxlocal, count, *newilocal;
620   const PetscInt *ilocal;
621   PetscSFNode    *roots, *leaves;
622 
623   PetscFunctionBegin;
624   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
625   PetscSFCheckGraphSet(sf, 1);
626   PetscValidPointer(isf, 2);
627 
628   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, NULL));
629   maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
630 
631   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
632   PetscCall(PetscMalloc2(nroots, &roots, maxlocal, &leaves));
633   for (i = 0; i < maxlocal; i++) {
634     leaves[i].rank  = rank;
635     leaves[i].index = i;
636   }
637   for (i = 0; i < nroots; i++) {
638     roots[i].rank  = -1;
639     roots[i].index = -1;
640   }
641   PetscCall(PetscSFReduceBegin(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
642   PetscCall(PetscSFReduceEnd(sf, MPIU_2INT, leaves, roots, MPI_REPLACE));
643 
644   /* Check whether our leaves are sparse */
645   for (i = 0, count = 0; i < nroots; i++)
646     if (roots[i].rank >= 0) count++;
647   if (count == nroots) newilocal = NULL;
648   else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ PetscCall(PetscMalloc1(count, &newilocal));
649     for (i = 0, count = 0; i < nroots; i++) {
650       if (roots[i].rank >= 0) {
651         newilocal[count]   = i;
652         roots[count].rank  = roots[i].rank;
653         roots[count].index = roots[i].index;
654         count++;
655       }
656     }
657   }
658 
659   PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, isf));
660   PetscCall(PetscSFSetGraph(*isf, maxlocal, count, newilocal, PETSC_OWN_POINTER, roots, PETSC_COPY_VALUES));
661   PetscCall(PetscFree2(roots, leaves));
662   PetscFunctionReturn(0);
663 }
664 
665 /*@
666    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
667 
668    Collective
669 
670    Input Parameters:
671 +  sf - communication object to duplicate
672 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
673 
674    Output Parameter:
675 .  newsf - new communication object
676 
677    Level: beginner
678 
679 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
680 @*/
681 PetscErrorCode PetscSFDuplicate(PetscSF sf, PetscSFDuplicateOption opt, PetscSF *newsf) {
682   PetscSFType  type;
683   MPI_Datatype dtype = MPIU_SCALAR;
684 
685   PetscFunctionBegin;
686   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
687   PetscValidLogicalCollectiveEnum(sf, opt, 2);
688   PetscValidPointer(newsf, 3);
689   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf), newsf));
690   PetscCall(PetscSFGetType(sf, &type));
691   if (type) PetscCall(PetscSFSetType(*newsf, type));
692   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
693   if (opt == PETSCSF_DUPLICATE_GRAPH) {
694     PetscSFCheckGraphSet(sf, 1);
695     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
696       PetscInt           nroots, nleaves;
697       const PetscInt    *ilocal;
698       const PetscSFNode *iremote;
699       PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
700       PetscCall(PetscSFSetGraph(*newsf, nroots, nleaves, (PetscInt *)ilocal, PETSC_COPY_VALUES, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
701     } else {
702       PetscCall(PetscSFSetGraphWithPattern(*newsf, sf->map, sf->pattern));
703     }
704   }
705   /* Since oldtype is committed, so is newtype, according to MPI */
706   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit, &dtype));
707   (*newsf)->vscat.bs     = sf->vscat.bs;
708   (*newsf)->vscat.unit   = dtype;
709   (*newsf)->vscat.to_n   = sf->vscat.to_n;
710   (*newsf)->vscat.from_n = sf->vscat.from_n;
711   /* Do not copy lsf. Build it on demand since it is rarely used */
712 
713 #if defined(PETSC_HAVE_DEVICE)
714   (*newsf)->backend              = sf->backend;
715   (*newsf)->unknown_input_stream = sf->unknown_input_stream;
716   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
717   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
718 #endif
719   PetscTryTypeMethod(sf, Duplicate, opt, *newsf);
720   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
721   PetscFunctionReturn(0);
722 }
723 
724 /*@C
725    PetscSFGetGraph - Get the graph specifying a parallel star forest
726 
727    Not Collective
728 
729    Input Parameter:
730 .  sf - star forest
731 
732    Output Parameters:
733 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
734 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
735 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
736 -  iremote - remote locations of root vertices for each leaf on the current process
737 
738    Notes:
739      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
740 
741      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
742      see its manpage for details.
743 
744    Fortran Notes:
745      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
746      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
747 
748      To check for a NULL ilocal use
749 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
750 
751    Level: intermediate
752 
753 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
754 @*/
755 PetscErrorCode PetscSFGetGraph(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) {
756   PetscFunctionBegin;
757   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
758   if (sf->ops->GetGraph) {
759     PetscCall((sf->ops->GetGraph)(sf, nroots, nleaves, ilocal, iremote));
760   } else {
761     if (nroots) *nroots = sf->nroots;
762     if (nleaves) *nleaves = sf->nleaves;
763     if (ilocal) *ilocal = sf->mine;
764     if (iremote) *iremote = sf->remote;
765   }
766   PetscFunctionReturn(0);
767 }
768 
769 /*@
770    PetscSFGetLeafRange - Get the active leaf ranges
771 
772    Not Collective
773 
774    Input Parameter:
775 .  sf - star forest
776 
777    Output Parameters:
778 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
779 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
780 
781    Level: developer
782 
783 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
784 @*/
785 PetscErrorCode PetscSFGetLeafRange(PetscSF sf, PetscInt *minleaf, PetscInt *maxleaf) {
786   PetscFunctionBegin;
787   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
788   PetscSFCheckGraphSet(sf, 1);
789   if (minleaf) *minleaf = sf->minleaf;
790   if (maxleaf) *maxleaf = sf->maxleaf;
791   PetscFunctionReturn(0);
792 }
793 
794 /*@C
795    PetscSFViewFromOptions - View from Options
796 
797    Collective on PetscSF
798 
799    Input Parameters:
800 +  A - the star forest
801 .  obj - Optional object
802 -  name - command line option
803 
804    Level: intermediate
805 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
806 @*/
807 PetscErrorCode PetscSFViewFromOptions(PetscSF A, PetscObject obj, const char name[]) {
808   PetscFunctionBegin;
809   PetscValidHeaderSpecific(A, PETSCSF_CLASSID, 1);
810   PetscCall(PetscObjectViewFromOptions((PetscObject)A, obj, name));
811   PetscFunctionReturn(0);
812 }
813 
814 /*@C
815    PetscSFView - view a star forest
816 
817    Collective
818 
819    Input Parameters:
820 +  sf - star forest
821 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
822 
823    Level: beginner
824 
825 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`
826 @*/
827 PetscErrorCode PetscSFView(PetscSF sf, PetscViewer viewer) {
828   PetscBool         iascii;
829   PetscViewerFormat format;
830 
831   PetscFunctionBegin;
832   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
833   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf), &viewer));
834   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
835   PetscCheckSameComm(sf, 1, viewer, 2);
836   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
837   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
838   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
839     PetscMPIInt rank;
840     PetscInt    ii, i, j;
841 
842     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf, viewer));
843     PetscCall(PetscViewerASCIIPushTab(viewer));
844     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
845       if (!sf->graphset) {
846         PetscCall(PetscViewerASCIIPrintf(viewer, "PetscSFSetGraph() has not been called yet\n"));
847         PetscCall(PetscViewerASCIIPopTab(viewer));
848         PetscFunctionReturn(0);
849       }
850       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
851       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
852       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n", rank, sf->nroots, sf->nleaves, sf->nranks));
853       for (i = 0; i < sf->nleaves; i++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", rank, sf->mine ? sf->mine[i] : i, sf->remote[i].rank, sf->remote[i].index));
854       PetscCall(PetscViewerFlush(viewer));
855       PetscCall(PetscViewerGetFormat(viewer, &format));
856       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
857         PetscMPIInt *tmpranks, *perm;
858         PetscCall(PetscMalloc2(sf->nranks, &tmpranks, sf->nranks, &perm));
859         PetscCall(PetscArraycpy(tmpranks, sf->ranks, sf->nranks));
860         for (i = 0; i < sf->nranks; i++) perm[i] = i;
861         PetscCall(PetscSortMPIIntWithArray(sf->nranks, tmpranks, perm));
862         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Roots referenced by my leaves, by rank\n", rank));
863         for (ii = 0; ii < sf->nranks; ii++) {
864           i = perm[ii];
865           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] %d: %" PetscInt_FMT " edges\n", rank, sf->ranks[i], sf->roffset[i + 1] - sf->roffset[i]));
866           for (j = sf->roffset[i]; j < sf->roffset[i + 1]; j++) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n", rank, sf->rmine[j], sf->rremote[j]));
867         }
868         PetscCall(PetscFree2(tmpranks, perm));
869       }
870       PetscCall(PetscViewerFlush(viewer));
871       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
872     }
873     PetscCall(PetscViewerASCIIPopTab(viewer));
874   }
875   PetscTryTypeMethod(sf, View, viewer);
876   PetscFunctionReturn(0);
877 }
878 
879 /*@C
880    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
881 
882    Not Collective
883 
884    Input Parameter:
885 .  sf - star forest
886 
887    Output Parameters:
888 +  nranks - number of ranks referenced by local part
889 .  ranks - array of ranks
890 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
891 .  rmine - concatenated array holding local indices referencing each remote rank
892 -  rremote - concatenated array holding remote indices referenced for each remote rank
893 
894    Level: developer
895 
896 .seealso: `PetscSFGetLeafRanks()`
897 @*/
898 PetscErrorCode PetscSFGetRootRanks(PetscSF sf, PetscInt *nranks, const PetscMPIInt **ranks, const PetscInt **roffset, const PetscInt **rmine, const PetscInt **rremote) {
899   PetscFunctionBegin;
900   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
901   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
902   if (sf->ops->GetRootRanks) {
903     PetscCall((sf->ops->GetRootRanks)(sf, nranks, ranks, roffset, rmine, rremote));
904   } else {
905     /* The generic implementation */
906     if (nranks) *nranks = sf->nranks;
907     if (ranks) *ranks = sf->ranks;
908     if (roffset) *roffset = sf->roffset;
909     if (rmine) *rmine = sf->rmine;
910     if (rremote) *rremote = sf->rremote;
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 /*@C
916    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
917 
918    Not Collective
919 
920    Input Parameter:
921 .  sf - star forest
922 
923    Output Parameters:
924 +  niranks - number of leaf ranks referencing roots on this process
925 .  iranks - array of ranks
926 .  ioffset - offset in irootloc for each rank (length niranks+1)
927 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
928 
929    Level: developer
930 
931 .seealso: `PetscSFGetRootRanks()`
932 @*/
933 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf, PetscInt *niranks, const PetscMPIInt **iranks, const PetscInt **ioffset, const PetscInt **irootloc) {
934   PetscFunctionBegin;
935   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
936   PetscCheck(sf->setupcalled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUp() before obtaining ranks");
937   if (sf->ops->GetLeafRanks) {
938     PetscCall((sf->ops->GetLeafRanks)(sf, niranks, iranks, ioffset, irootloc));
939   } else {
940     PetscSFType type;
941     PetscCall(PetscSFGetType(sf, &type));
942     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 static PetscBool InList(PetscMPIInt needle, PetscMPIInt n, const PetscMPIInt *list) {
948   PetscInt i;
949   for (i = 0; i < n; i++) {
950     if (needle == list[i]) return PETSC_TRUE;
951   }
952   return PETSC_FALSE;
953 }
954 
955 /*@C
956    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
957 
958    Collective
959 
960    Input Parameters:
961 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
962 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
963 
964    Level: developer
965 
966 .seealso: `PetscSFGetRootRanks()`
967 @*/
968 PetscErrorCode PetscSFSetUpRanks(PetscSF sf, MPI_Group dgroup) {
969   PetscTable         table;
970   PetscTablePosition pos;
971   PetscMPIInt        size, groupsize, *groupranks;
972   PetscInt          *rcount, *ranks;
973   PetscInt           i, irank = -1, orank = -1;
974 
975   PetscFunctionBegin;
976   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
977   PetscSFCheckGraphSet(sf, 1);
978   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf), &size));
979   PetscCall(PetscTableCreate(10, size, &table));
980   for (i = 0; i < sf->nleaves; i++) {
981     /* Log 1-based rank */
982     PetscCall(PetscTableAdd(table, sf->remote[i].rank + 1, 1, ADD_VALUES));
983   }
984   PetscCall(PetscTableGetCount(table, &sf->nranks));
985   PetscCall(PetscMalloc4(sf->nranks, &sf->ranks, sf->nranks + 1, &sf->roffset, sf->nleaves, &sf->rmine, sf->nleaves, &sf->rremote));
986   PetscCall(PetscMalloc2(sf->nranks, &rcount, sf->nranks, &ranks));
987   PetscCall(PetscTableGetHeadPosition(table, &pos));
988   for (i = 0; i < sf->nranks; i++) {
989     PetscCall(PetscTableGetNext(table, &pos, &ranks[i], &rcount[i]));
990     ranks[i]--; /* Convert back to 0-based */
991   }
992   PetscCall(PetscTableDestroy(&table));
993 
994   /* We expect that dgroup is reliably "small" while nranks could be large */
995   {
996     MPI_Group    group = MPI_GROUP_NULL;
997     PetscMPIInt *dgroupranks;
998     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
999     PetscCallMPI(MPI_Group_size(dgroup, &groupsize));
1000     PetscCall(PetscMalloc1(groupsize, &dgroupranks));
1001     PetscCall(PetscMalloc1(groupsize, &groupranks));
1002     for (i = 0; i < groupsize; i++) dgroupranks[i] = i;
1003     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup, groupsize, dgroupranks, group, groupranks));
1004     PetscCallMPI(MPI_Group_free(&group));
1005     PetscCall(PetscFree(dgroupranks));
1006   }
1007 
1008   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1009   for (sf->ndranks = 0, i = sf->nranks; sf->ndranks < i;) {
1010     for (i--; sf->ndranks < i; i--) { /* Scan i backward looking for distinguished rank */
1011       if (InList(ranks[i], groupsize, groupranks)) break;
1012     }
1013     for (; sf->ndranks <= i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1014       if (!InList(ranks[sf->ndranks], groupsize, groupranks)) break;
1015     }
1016     if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1017       PetscInt tmprank, tmpcount;
1018 
1019       tmprank             = ranks[i];
1020       tmpcount            = rcount[i];
1021       ranks[i]            = ranks[sf->ndranks];
1022       rcount[i]           = rcount[sf->ndranks];
1023       ranks[sf->ndranks]  = tmprank;
1024       rcount[sf->ndranks] = tmpcount;
1025       sf->ndranks++;
1026     }
1027   }
1028   PetscCall(PetscFree(groupranks));
1029   PetscCall(PetscSortIntWithArray(sf->ndranks, ranks, rcount));
1030   PetscCall(PetscSortIntWithArray(sf->nranks - sf->ndranks, ranks + sf->ndranks, rcount + sf->ndranks));
1031   sf->roffset[0] = 0;
1032   for (i = 0; i < sf->nranks; i++) {
1033     PetscCall(PetscMPIIntCast(ranks[i], sf->ranks + i));
1034     sf->roffset[i + 1] = sf->roffset[i] + rcount[i];
1035     rcount[i]          = 0;
1036   }
1037   for (i = 0, irank = -1, orank = -1; i < sf->nleaves; i++) {
1038     /* short circuit */
1039     if (orank != sf->remote[i].rank) {
1040       /* Search for index of iremote[i].rank in sf->ranks */
1041       PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->ndranks, sf->ranks, &irank));
1042       if (irank < 0) {
1043         PetscCall(PetscFindMPIInt(sf->remote[i].rank, sf->nranks - sf->ndranks, sf->ranks + sf->ndranks, &irank));
1044         if (irank >= 0) irank += sf->ndranks;
1045       }
1046       orank = sf->remote[i].rank;
1047     }
1048     PetscCheck(irank >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not find rank %" PetscInt_FMT " in array", sf->remote[i].rank);
1049     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1050     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1051     rcount[irank]++;
1052   }
1053   PetscCall(PetscFree2(rcount, ranks));
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 /*@C
1058    PetscSFGetGroups - gets incoming and outgoing process groups
1059 
1060    Collective
1061 
1062    Input Parameter:
1063 .  sf - star forest
1064 
1065    Output Parameters:
1066 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1067 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1068 
1069    Level: developer
1070 
1071 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1072 @*/
1073 PetscErrorCode PetscSFGetGroups(PetscSF sf, MPI_Group *incoming, MPI_Group *outgoing) {
1074   MPI_Group group = MPI_GROUP_NULL;
1075 
1076   PetscFunctionBegin;
1077   PetscCheck(sf->nranks >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFSetUpRanks() before obtaining groups");
1078   if (sf->ingroup == MPI_GROUP_NULL) {
1079     PetscInt        i;
1080     const PetscInt *indegree;
1081     PetscMPIInt     rank, *outranks, *inranks;
1082     PetscSFNode    *remote;
1083     PetscSF         bgcount;
1084 
1085     /* Compute the number of incoming ranks */
1086     PetscCall(PetscMalloc1(sf->nranks, &remote));
1087     for (i = 0; i < sf->nranks; i++) {
1088       remote[i].rank  = sf->ranks[i];
1089       remote[i].index = 0;
1090     }
1091     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, &bgcount));
1092     PetscCall(PetscSFSetGraph(bgcount, 1, sf->nranks, NULL, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1093     PetscCall(PetscSFComputeDegreeBegin(bgcount, &indegree));
1094     PetscCall(PetscSFComputeDegreeEnd(bgcount, &indegree));
1095     /* Enumerate the incoming ranks */
1096     PetscCall(PetscMalloc2(indegree[0], &inranks, sf->nranks, &outranks));
1097     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1098     for (i = 0; i < sf->nranks; i++) outranks[i] = rank;
1099     PetscCall(PetscSFGatherBegin(bgcount, MPI_INT, outranks, inranks));
1100     PetscCall(PetscSFGatherEnd(bgcount, MPI_INT, outranks, inranks));
1101     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1102     PetscCallMPI(MPI_Group_incl(group, indegree[0], inranks, &sf->ingroup));
1103     PetscCallMPI(MPI_Group_free(&group));
1104     PetscCall(PetscFree2(inranks, outranks));
1105     PetscCall(PetscSFDestroy(&bgcount));
1106   }
1107   *incoming = sf->ingroup;
1108 
1109   if (sf->outgroup == MPI_GROUP_NULL) {
1110     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf), &group));
1111     PetscCallMPI(MPI_Group_incl(group, sf->nranks, sf->ranks, &sf->outgroup));
1112     PetscCallMPI(MPI_Group_free(&group));
1113   }
1114   *outgoing = sf->outgroup;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*@
1119    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1120 
1121    Collective
1122 
1123    Input Parameter:
1124 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1125 
1126    Output Parameter:
1127 .  multi - star forest with split roots, such that each root has degree exactly 1
1128 
1129    Level: developer
1130 
1131    Notes:
1132 
1133    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1134    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1135    edge, it is a candidate for future optimization that might involve its removal.
1136 
1137 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1138 @*/
1139 PetscErrorCode PetscSFGetMultiSF(PetscSF sf, PetscSF *multi) {
1140   PetscFunctionBegin;
1141   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1142   PetscValidPointer(multi, 2);
1143   if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1144     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1145     *multi           = sf->multi;
1146     sf->multi->multi = sf->multi;
1147     PetscFunctionReturn(0);
1148   }
1149   if (!sf->multi) {
1150     const PetscInt *indegree;
1151     PetscInt        i, *inoffset, *outones, *outoffset, maxlocal;
1152     PetscSFNode    *remote;
1153     maxlocal = sf->maxleaf + 1; /* TODO: We should use PetscSFGetLeafRange() */
1154     PetscCall(PetscSFComputeDegreeBegin(sf, &indegree));
1155     PetscCall(PetscSFComputeDegreeEnd(sf, &indegree));
1156     PetscCall(PetscMalloc3(sf->nroots + 1, &inoffset, maxlocal, &outones, maxlocal, &outoffset));
1157     inoffset[0] = 0;
1158     for (i = 0; i < sf->nroots; i++) inoffset[i + 1] = inoffset[i] + indegree[i];
1159     for (i = 0; i < maxlocal; i++) outones[i] = 1;
1160     PetscCall(PetscSFFetchAndOpBegin(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1161     PetscCall(PetscSFFetchAndOpEnd(sf, MPIU_INT, inoffset, outones, outoffset, MPI_SUM));
1162     for (i = 0; i < sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1163     if (PetscDefined(USE_DEBUG)) {                               /* Check that the expected number of increments occurred */
1164       for (i = 0; i < sf->nroots; i++) PetscCheck(inoffset[i] + indegree[i] == inoffset[i + 1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect result after PetscSFFetchAndOp");
1165     }
1166     PetscCall(PetscMalloc1(sf->nleaves, &remote));
1167     for (i = 0; i < sf->nleaves; i++) {
1168       remote[i].rank  = sf->remote[i].rank;
1169       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1170     }
1171     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, &sf->multi));
1172     sf->multi->multi = sf->multi;
1173     PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, remote, PETSC_OWN_POINTER));
1174     if (sf->rankorder) { /* Sort the ranks */
1175       PetscMPIInt  rank;
1176       PetscInt    *inranks, *newoffset, *outranks, *newoutoffset, *tmpoffset, maxdegree;
1177       PetscSFNode *newremote;
1178       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
1179       for (i = 0, maxdegree = 0; i < sf->nroots; i++) maxdegree = PetscMax(maxdegree, indegree[i]);
1180       PetscCall(PetscMalloc5(sf->multi->nroots, &inranks, sf->multi->nroots, &newoffset, maxlocal, &outranks, maxlocal, &newoutoffset, maxdegree, &tmpoffset));
1181       for (i = 0; i < maxlocal; i++) outranks[i] = rank;
1182       PetscCall(PetscSFReduceBegin(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1183       PetscCall(PetscSFReduceEnd(sf->multi, MPIU_INT, outranks, inranks, MPI_REPLACE));
1184       /* Sort the incoming ranks at each vertex, build the inverse map */
1185       for (i = 0; i < sf->nroots; i++) {
1186         PetscInt j;
1187         for (j = 0; j < indegree[i]; j++) tmpoffset[j] = j;
1188         PetscCall(PetscSortIntWithArray(indegree[i], inranks + inoffset[i], tmpoffset));
1189         for (j = 0; j < indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1190       }
1191       PetscCall(PetscSFBcastBegin(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1192       PetscCall(PetscSFBcastEnd(sf->multi, MPIU_INT, newoffset, newoutoffset, MPI_REPLACE));
1193       PetscCall(PetscMalloc1(sf->nleaves, &newremote));
1194       for (i = 0; i < sf->nleaves; i++) {
1195         newremote[i].rank  = sf->remote[i].rank;
1196         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1197       }
1198       PetscCall(PetscSFSetGraph(sf->multi, inoffset[sf->nroots], sf->nleaves, sf->mine, PETSC_COPY_VALUES, newremote, PETSC_OWN_POINTER));
1199       PetscCall(PetscFree5(inranks, newoffset, outranks, newoutoffset, tmpoffset));
1200     }
1201     PetscCall(PetscFree3(inoffset, outones, outoffset));
1202   }
1203   *multi = sf->multi;
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 /*@C
1208    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1209 
1210    Collective
1211 
1212    Input Parameters:
1213 +  sf - original star forest
1214 .  nselected  - number of selected roots on this process
1215 -  selected   - indices of the selected roots on this process
1216 
1217    Output Parameter:
1218 .  esf - new star forest
1219 
1220    Level: advanced
1221 
1222    Note:
1223    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1224    be done by calling PetscSFGetGraph().
1225 
1226 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()`
1227 @*/
1228 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *esf) {
1229   PetscInt           i, j, n, nroots, nleaves, esf_nleaves, *new_ilocal, minleaf, maxleaf, maxlocal;
1230   const PetscInt    *ilocal;
1231   signed char       *rootdata, *leafdata, *leafmem;
1232   const PetscSFNode *iremote;
1233   PetscSFNode       *new_iremote;
1234   MPI_Comm           comm;
1235 
1236   PetscFunctionBegin;
1237   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1238   PetscSFCheckGraphSet(sf, 1);
1239   if (nselected) PetscValidIntPointer(selected, 3);
1240   PetscValidPointer(esf, 4);
1241 
1242   PetscCall(PetscSFSetUp(sf));
1243   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF, sf, 0, 0, 0));
1244   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1245   PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
1246 
1247   if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or  out of range indices */
1248     PetscBool dups;
1249     PetscCall(PetscCheckDupsInt(nselected, selected, &dups));
1250     PetscCheck(!dups, comm, PETSC_ERR_ARG_WRONG, "selected[] has dups");
1251     for (i = 0; i < nselected; i++) PetscCheck(selected[i] >= 0 && selected[i] < nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")", selected[i], nroots);
1252   }
1253 
1254   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf, CreateEmbeddedRootSF, nselected, selected, esf);
1255   else {
1256     /* A generic version of creating embedded sf */
1257     PetscCall(PetscSFGetLeafRange(sf, &minleaf, &maxleaf));
1258     maxlocal = maxleaf - minleaf + 1;
1259     PetscCall(PetscCalloc2(nroots, &rootdata, maxlocal, &leafmem));
1260     leafdata = leafmem - minleaf;
1261     /* Tag selected roots and bcast to leaves */
1262     for (i = 0; i < nselected; i++) rootdata[selected[i]] = 1;
1263     PetscCall(PetscSFBcastBegin(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1264     PetscCall(PetscSFBcastEnd(sf, MPI_SIGNED_CHAR, rootdata, leafdata, MPI_REPLACE));
1265 
1266     /* Build esf with leaves that are still connected */
1267     esf_nleaves = 0;
1268     for (i = 0; i < nleaves; i++) {
1269       j = ilocal ? ilocal[i] : i;
1270       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1271          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1272       */
1273       esf_nleaves += (leafdata[j] ? 1 : 0);
1274     }
1275     PetscCall(PetscMalloc1(esf_nleaves, &new_ilocal));
1276     PetscCall(PetscMalloc1(esf_nleaves, &new_iremote));
1277     for (i = n = 0; i < nleaves; i++) {
1278       j = ilocal ? ilocal[i] : i;
1279       if (leafdata[j]) {
1280         new_ilocal[n]        = j;
1281         new_iremote[n].rank  = iremote[i].rank;
1282         new_iremote[n].index = iremote[i].index;
1283         ++n;
1284       }
1285     }
1286     PetscCall(PetscSFCreate(comm, esf));
1287     PetscCall(PetscSFSetFromOptions(*esf));
1288     PetscCall(PetscSFSetGraph(*esf, nroots, esf_nleaves, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1289     PetscCall(PetscFree2(rootdata, leafmem));
1290   }
1291   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF, sf, 0, 0, 0));
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 /*@C
1296   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1297 
1298   Collective
1299 
1300   Input Parameters:
1301 + sf - original star forest
1302 . nselected  - number of selected leaves on this process
1303 - selected   - indices of the selected leaves on this process
1304 
1305   Output Parameter:
1306 .  newsf - new star forest
1307 
1308   Level: advanced
1309 
1310 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1311 @*/
1312 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) {
1313   const PetscSFNode *iremote;
1314   PetscSFNode       *new_iremote;
1315   const PetscInt    *ilocal;
1316   PetscInt           i, nroots, *leaves, *new_ilocal;
1317   MPI_Comm           comm;
1318 
1319   PetscFunctionBegin;
1320   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1321   PetscSFCheckGraphSet(sf, 1);
1322   if (nselected) PetscValidIntPointer(selected, 3);
1323   PetscValidPointer(newsf, 4);
1324 
1325   /* Uniq selected[] and put results in leaves[] */
1326   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1327   PetscCall(PetscMalloc1(nselected, &leaves));
1328   PetscCall(PetscArraycpy(leaves, selected, nselected));
1329   PetscCall(PetscSortedRemoveDupsInt(&nselected, leaves));
1330   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected - 1] >= sf->nleaves), comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", leaves[0], leaves[nselected - 1], sf->nleaves);
1331 
1332   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1333   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf, CreateEmbeddedLeafSF, nselected, leaves, newsf);
1334   else {
1335     PetscCall(PetscSFGetGraph(sf, &nroots, NULL, &ilocal, &iremote));
1336     PetscCall(PetscMalloc1(nselected, &new_ilocal));
1337     PetscCall(PetscMalloc1(nselected, &new_iremote));
1338     for (i = 0; i < nselected; ++i) {
1339       const PetscInt l     = leaves[i];
1340       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1341       new_iremote[i].rank  = iremote[l].rank;
1342       new_iremote[i].index = iremote[l].index;
1343     }
1344     PetscCall(PetscSFDuplicate(sf, PETSCSF_DUPLICATE_CONFONLY, newsf));
1345     PetscCall(PetscSFSetGraph(*newsf, nroots, nselected, new_ilocal, PETSC_OWN_POINTER, new_iremote, PETSC_OWN_POINTER));
1346   }
1347   PetscCall(PetscFree(leaves));
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 /*@C
1352    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1353 
1354    Collective on PetscSF
1355 
1356    Input Parameters:
1357 +  sf - star forest on which to communicate
1358 .  unit - data type associated with each node
1359 .  rootdata - buffer to broadcast
1360 -  op - operation to use for reduction
1361 
1362    Output Parameter:
1363 .  leafdata - buffer to be reduced with values from each leaf's respective root
1364 
1365    Level: intermediate
1366 
1367    Notes:
1368     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1369     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1370     use PetscSFBcastWithMemTypeBegin() instead.
1371 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1372 @*/
1373 PetscErrorCode PetscSFBcastBegin(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) {
1374   PetscMemType rootmtype, leafmtype;
1375 
1376   PetscFunctionBegin;
1377   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1378   PetscCall(PetscSFSetUp(sf));
1379   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1380   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1381   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1382   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1383   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 /*@C
1388    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1389 
1390    Collective on PetscSF
1391 
1392    Input Parameters:
1393 +  sf - star forest on which to communicate
1394 .  unit - data type associated with each node
1395 .  rootmtype - memory type of rootdata
1396 .  rootdata - buffer to broadcast
1397 .  leafmtype - memory type of leafdata
1398 -  op - operation to use for reduction
1399 
1400    Output Parameter:
1401 .  leafdata - buffer to be reduced with values from each leaf's respective root
1402 
1403    Level: intermediate
1404 
1405 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1406 @*/
1407 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) {
1408   PetscFunctionBegin;
1409   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1410   PetscCall(PetscSFSetUp(sf));
1411   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
1412   PetscUseTypeMethod(sf, BcastBegin, unit, rootmtype, rootdata, leafmtype, leafdata, op);
1413   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 /*@C
1418    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1419 
1420    Collective
1421 
1422    Input Parameters:
1423 +  sf - star forest
1424 .  unit - data type
1425 .  rootdata - buffer to broadcast
1426 -  op - operation to use for reduction
1427 
1428    Output Parameter:
1429 .  leafdata - buffer to be reduced with values from each leaf's respective root
1430 
1431    Level: intermediate
1432 
1433 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1434 @*/
1435 PetscErrorCode PetscSFBcastEnd(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata, MPI_Op op) {
1436   PetscFunctionBegin;
1437   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1438   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd, sf, 0, 0, 0));
1439   PetscUseTypeMethod(sf, BcastEnd, unit, rootdata, leafdata, op);
1440   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd, sf, 0, 0, 0));
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 /*@C
1445    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1446 
1447    Collective
1448 
1449    Input Parameters:
1450 +  sf - star forest
1451 .  unit - data type
1452 .  leafdata - values to reduce
1453 -  op - reduction operation
1454 
1455    Output Parameter:
1456 .  rootdata - result of reduction of values from all leaves of each root
1457 
1458    Level: intermediate
1459 
1460    Notes:
1461     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1462     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1463     use PetscSFReduceWithMemTypeBegin() instead.
1464 
1465 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1466 @*/
1467 PetscErrorCode PetscSFReduceBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) {
1468   PetscMemType rootmtype, leafmtype;
1469 
1470   PetscFunctionBegin;
1471   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1472   PetscCall(PetscSFSetUp(sf));
1473   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1474   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1475   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1476   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1477   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /*@C
1482    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1483 
1484    Collective
1485 
1486    Input Parameters:
1487 +  sf - star forest
1488 .  unit - data type
1489 .  leafmtype - memory type of leafdata
1490 .  leafdata - values to reduce
1491 .  rootmtype - memory type of rootdata
1492 -  op - reduction operation
1493 
1494    Output Parameter:
1495 .  rootdata - result of reduction of values from all leaves of each root
1496 
1497    Level: intermediate
1498 
1499 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1500 @*/
1501 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) {
1502   PetscFunctionBegin;
1503   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1504   PetscCall(PetscSFSetUp(sf));
1505   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1506   PetscCall((sf->ops->ReduceBegin)(sf, unit, leafmtype, leafdata, rootmtype, rootdata, op));
1507   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin, sf, 0, 0, 0));
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 /*@C
1512    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1513 
1514    Collective
1515 
1516    Input Parameters:
1517 +  sf - star forest
1518 .  unit - data type
1519 .  leafdata - values to reduce
1520 -  op - reduction operation
1521 
1522    Output Parameter:
1523 .  rootdata - result of reduction of values from all leaves of each root
1524 
1525    Level: intermediate
1526 
1527 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1528 @*/
1529 PetscErrorCode PetscSFReduceEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *rootdata, MPI_Op op) {
1530   PetscFunctionBegin;
1531   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1532   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1533   PetscUseTypeMethod(sf, ReduceEnd, unit, leafdata, rootdata, op);
1534   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd, sf, 0, 0, 0));
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 /*@C
1539    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1540 
1541    Collective
1542 
1543    Input Parameters:
1544 +  sf - star forest
1545 .  unit - data type
1546 .  leafdata - leaf values to use in reduction
1547 -  op - operation to use for reduction
1548 
1549    Output Parameters:
1550 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1551 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1552 
1553    Level: advanced
1554 
1555    Note:
1556    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1557    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1558    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1559    integers.
1560 
1561 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1562 @*/
1563 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) {
1564   PetscMemType rootmtype, leafmtype, leafupdatemtype;
1565 
1566   PetscFunctionBegin;
1567   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1568   PetscCall(PetscSFSetUp(sf));
1569   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1570   PetscCall(PetscGetMemType(rootdata, &rootmtype));
1571   PetscCall(PetscGetMemType(leafdata, &leafmtype));
1572   PetscCall(PetscGetMemType(leafupdate, &leafupdatemtype));
1573   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1574   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1575   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 /*@C
1580    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1581 
1582    Collective
1583 
1584    Input Parameters:
1585 +  sf - star forest
1586 .  unit - data type
1587 .  rootmtype - memory type of rootdata
1588 .  leafmtype - memory type of leafdata
1589 .  leafdata - leaf values to use in reduction
1590 .  leafupdatemtype - memory type of leafupdate
1591 -  op - operation to use for reduction
1592 
1593    Output Parameters:
1594 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1595 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1596 
1597    Level: advanced
1598 
1599    Note: See PetscSFFetchAndOpBegin() for more details.
1600 
1601 .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1602 @*/
1603 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, void *rootdata, PetscMemType leafmtype, const void *leafdata, PetscMemType leafupdatemtype, void *leafupdate, MPI_Op op) {
1604   PetscFunctionBegin;
1605   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1606   PetscCall(PetscSFSetUp(sf));
1607   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1608   PetscCheck(leafmtype == leafupdatemtype, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for leafdata and leafupdate in different memory types");
1609   PetscUseTypeMethod(sf, FetchAndOpBegin, unit, rootmtype, rootdata, leafmtype, leafdata, leafupdate, op);
1610   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin, sf, 0, 0, 0));
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 /*@C
1615    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1616 
1617    Collective
1618 
1619    Input Parameters:
1620 +  sf - star forest
1621 .  unit - data type
1622 .  leafdata - leaf values to use in reduction
1623 -  op - operation to use for reduction
1624 
1625    Output Parameters:
1626 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1627 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1628 
1629    Level: advanced
1630 
1631 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1632 @*/
1633 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf, MPI_Datatype unit, void *rootdata, const void *leafdata, void *leafupdate, MPI_Op op) {
1634   PetscFunctionBegin;
1635   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1636   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1637   PetscUseTypeMethod(sf, FetchAndOpEnd, unit, rootdata, leafdata, leafupdate, op);
1638   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd, sf, 0, 0, 0));
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 /*@C
1643    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1644 
1645    Collective
1646 
1647    Input Parameter:
1648 .  sf - star forest
1649 
1650    Output Parameter:
1651 .  degree - degree of each root vertex
1652 
1653    Level: advanced
1654 
1655    Notes:
1656    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1657 
1658 .seealso: `PetscSFGatherBegin()`
1659 @*/
1660 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf, const PetscInt **degree) {
1661   PetscFunctionBegin;
1662   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1663   PetscSFCheckGraphSet(sf, 1);
1664   PetscValidPointer(degree, 2);
1665   if (!sf->degreeknown) {
1666     PetscInt i, nroots = sf->nroots, maxlocal;
1667     PetscCheck(!sf->degree, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1668     maxlocal = sf->maxleaf - sf->minleaf + 1;
1669     PetscCall(PetscMalloc1(nroots, &sf->degree));
1670     PetscCall(PetscMalloc1(PetscMax(maxlocal, 1), &sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1671     for (i = 0; i < nroots; i++) sf->degree[i] = 0;
1672     for (i = 0; i < maxlocal; i++) sf->degreetmp[i] = 1;
1673     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1674   }
1675   *degree = NULL;
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 /*@C
1680    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1681 
1682    Collective
1683 
1684    Input Parameter:
1685 .  sf - star forest
1686 
1687    Output Parameter:
1688 .  degree - degree of each root vertex
1689 
1690    Level: developer
1691 
1692    Notes:
1693    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1694 
1695 .seealso:
1696 @*/
1697 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf, const PetscInt **degree) {
1698   PetscFunctionBegin;
1699   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1700   PetscSFCheckGraphSet(sf, 1);
1701   PetscValidPointer(degree, 2);
1702   if (!sf->degreeknown) {
1703     PetscCheck(sf->degreetmp, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1704     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, sf->degreetmp - sf->minleaf, sf->degree, MPI_SUM));
1705     PetscCall(PetscFree(sf->degreetmp));
1706     sf->degreeknown = PETSC_TRUE;
1707   }
1708   *degree = sf->degree;
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 /*@C
1713    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1714    Each multi-root is assigned index of the corresponding original root.
1715 
1716    Collective
1717 
1718    Input Parameters:
1719 +  sf - star forest
1720 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1721 
1722    Output Parameters:
1723 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1724 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1725 
1726    Level: developer
1727 
1728    Notes:
1729    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1730 
1731 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1732 @*/
1733 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) {
1734   PetscSF  msf;
1735   PetscInt i, j, k, nroots, nmroots;
1736 
1737   PetscFunctionBegin;
1738   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1739   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1740   if (nroots) PetscValidIntPointer(degree, 2);
1741   if (nMultiRoots) PetscValidIntPointer(nMultiRoots, 3);
1742   PetscValidPointer(multiRootsOrigNumbering, 4);
1743   PetscCall(PetscSFGetMultiSF(sf, &msf));
1744   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1745   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1746   for (i = 0, j = 0, k = 0; i < nroots; i++) {
1747     if (!degree[i]) continue;
1748     for (j = 0; j < degree[i]; j++, k++) (*multiRootsOrigNumbering)[k] = i;
1749   }
1750   PetscCheck(k == nmroots, PETSC_COMM_SELF, PETSC_ERR_PLIB, "sanity check fail");
1751   if (nMultiRoots) *nMultiRoots = nmroots;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 /*@C
1756    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1757 
1758    Collective
1759 
1760    Input Parameters:
1761 +  sf - star forest
1762 .  unit - data type
1763 -  leafdata - leaf data to gather to roots
1764 
1765    Output Parameter:
1766 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1767 
1768    Level: intermediate
1769 
1770 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1771 @*/
1772 PetscErrorCode PetscSFGatherBegin(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) {
1773   PetscSF multi = NULL;
1774 
1775   PetscFunctionBegin;
1776   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1777   PetscCall(PetscSFSetUp(sf));
1778   PetscCall(PetscSFGetMultiSF(sf, &multi));
1779   PetscCall(PetscSFReduceBegin(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 /*@C
1784    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1785 
1786    Collective
1787 
1788    Input Parameters:
1789 +  sf - star forest
1790 .  unit - data type
1791 -  leafdata - leaf data to gather to roots
1792 
1793    Output Parameter:
1794 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1795 
1796    Level: intermediate
1797 
1798 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1799 @*/
1800 PetscErrorCode PetscSFGatherEnd(PetscSF sf, MPI_Datatype unit, const void *leafdata, void *multirootdata) {
1801   PetscSF multi = NULL;
1802 
1803   PetscFunctionBegin;
1804   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1805   PetscCall(PetscSFGetMultiSF(sf, &multi));
1806   PetscCall(PetscSFReduceEnd(multi, unit, leafdata, multirootdata, MPI_REPLACE));
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 /*@C
1811    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1812 
1813    Collective
1814 
1815    Input Parameters:
1816 +  sf - star forest
1817 .  unit - data type
1818 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1819 
1820    Output Parameter:
1821 .  leafdata - leaf data to be update with personal data from each respective root
1822 
1823    Level: intermediate
1824 
1825 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1826 @*/
1827 PetscErrorCode PetscSFScatterBegin(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) {
1828   PetscSF multi = NULL;
1829 
1830   PetscFunctionBegin;
1831   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1832   PetscCall(PetscSFSetUp(sf));
1833   PetscCall(PetscSFGetMultiSF(sf, &multi));
1834   PetscCall(PetscSFBcastBegin(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 /*@C
1839    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1840 
1841    Collective
1842 
1843    Input Parameters:
1844 +  sf - star forest
1845 .  unit - data type
1846 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1847 
1848    Output Parameter:
1849 .  leafdata - leaf data to be update with personal data from each respective root
1850 
1851    Level: intermediate
1852 
1853 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1854 @*/
1855 PetscErrorCode PetscSFScatterEnd(PetscSF sf, MPI_Datatype unit, const void *multirootdata, void *leafdata) {
1856   PetscSF multi = NULL;
1857 
1858   PetscFunctionBegin;
1859   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
1860   PetscCall(PetscSFGetMultiSF(sf, &multi));
1861   PetscCall(PetscSFBcastEnd(multi, unit, multirootdata, leafdata, MPI_REPLACE));
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) {
1866   PetscInt        i, n, nleaves;
1867   const PetscInt *ilocal = NULL;
1868   PetscHSetI      seen;
1869 
1870   PetscFunctionBegin;
1871   if (PetscDefined(USE_DEBUG)) {
1872     PetscCall(PetscSFGetGraph(sf, NULL, &nleaves, &ilocal, NULL));
1873     PetscCall(PetscHSetICreate(&seen));
1874     for (i = 0; i < nleaves; i++) {
1875       const PetscInt leaf = ilocal ? ilocal[i] : i;
1876       PetscCall(PetscHSetIAdd(seen, leaf));
1877     }
1878     PetscCall(PetscHSetIGetSize(seen, &n));
1879     PetscCheck(n == nleaves, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Provided leaves have repeated values: all leaves must be unique");
1880     PetscCall(PetscHSetIDestroy(&seen));
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 /*@
1886   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1887 
1888   Input Parameters:
1889 + sfA - The first PetscSF
1890 - sfB - The second PetscSF
1891 
1892   Output Parameters:
1893 . sfBA - The composite SF
1894 
1895   Level: developer
1896 
1897   Notes:
1898   Currently, the two SFs must be defined on congruent communicators and they must be true star
1899   forests, i.e. the same leaf is not connected with different roots.
1900 
1901   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1902   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1903   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1904   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1905 
1906 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1907 @*/
1908 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) {
1909   const PetscSFNode *remotePointsA, *remotePointsB;
1910   PetscSFNode       *remotePointsBA = NULL, *reorderedRemotePointsA = NULL, *leafdataB;
1911   const PetscInt    *localPointsA, *localPointsB;
1912   PetscInt          *localPointsBA;
1913   PetscInt           i, numRootsA, numLeavesA, numRootsB, numLeavesB, minleaf, maxleaf, numLeavesBA;
1914   PetscBool          denseB;
1915 
1916   PetscFunctionBegin;
1917   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
1918   PetscSFCheckGraphSet(sfA, 1);
1919   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
1920   PetscSFCheckGraphSet(sfB, 2);
1921   PetscCheckSameComm(sfA, 1, sfB, 2);
1922   PetscValidPointer(sfBA, 3);
1923   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1924   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1925 
1926   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
1927   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
1928   if (localPointsA) {
1929     PetscCall(PetscMalloc1(numRootsB, &reorderedRemotePointsA));
1930     for (i = 0; i < numRootsB; i++) {
1931       reorderedRemotePointsA[i].rank  = -1;
1932       reorderedRemotePointsA[i].index = -1;
1933     }
1934     for (i = 0; i < numLeavesA; i++) {
1935       if (localPointsA[i] >= numRootsB) continue;
1936       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1937     }
1938     remotePointsA = reorderedRemotePointsA;
1939   }
1940   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
1941   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &leafdataB));
1942   PetscCall(PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
1943   PetscCall(PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, leafdataB - minleaf, MPI_REPLACE));
1944   PetscCall(PetscFree(reorderedRemotePointsA));
1945 
1946   denseB = (PetscBool)!localPointsB;
1947   for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1948     if (leafdataB[localPointsB ? localPointsB[i] - minleaf : i].rank == -1) denseB = PETSC_FALSE;
1949     else numLeavesBA++;
1950   }
1951   if (denseB) {
1952     localPointsBA  = NULL;
1953     remotePointsBA = leafdataB;
1954   } else {
1955     PetscCall(PetscMalloc1(numLeavesBA, &localPointsBA));
1956     PetscCall(PetscMalloc1(numLeavesBA, &remotePointsBA));
1957     for (i = 0, numLeavesBA = 0; i < numLeavesB; i++) {
1958       const PetscInt l = localPointsB ? localPointsB[i] : i;
1959 
1960       if (leafdataB[l - minleaf].rank == -1) continue;
1961       remotePointsBA[numLeavesBA] = leafdataB[l - minleaf];
1962       localPointsBA[numLeavesBA]  = l;
1963       numLeavesBA++;
1964     }
1965     PetscCall(PetscFree(leafdataB));
1966   }
1967   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
1968   PetscCall(PetscSFSetFromOptions(*sfBA));
1969   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 /*@
1974   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1975 
1976   Input Parameters:
1977 + sfA - The first PetscSF
1978 - sfB - The second PetscSF
1979 
1980   Output Parameters:
1981 . sfBA - The composite SF.
1982 
1983   Level: developer
1984 
1985   Notes:
1986   Currently, the two SFs must be defined on congruent communicators and they must be true star
1987   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1988   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1989 
1990   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1991   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1992   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1993   a Reduce on sfB, on connected roots.
1994 
1995 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
1996 @*/
1997 PetscErrorCode PetscSFComposeInverse(PetscSF sfA, PetscSF sfB, PetscSF *sfBA) {
1998   const PetscSFNode *remotePointsA, *remotePointsB;
1999   PetscSFNode       *remotePointsBA;
2000   const PetscInt    *localPointsA, *localPointsB;
2001   PetscSFNode       *reorderedRemotePointsA = NULL;
2002   PetscInt           i, numRootsA, numLeavesA, numLeavesBA, numRootsB, numLeavesB, minleaf, maxleaf, *localPointsBA;
2003   MPI_Op             op;
2004 #if defined(PETSC_USE_64BIT_INDICES)
2005   PetscBool iswin;
2006 #endif
2007 
2008   PetscFunctionBegin;
2009   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
2010   PetscSFCheckGraphSet(sfA, 1);
2011   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
2012   PetscSFCheckGraphSet(sfB, 2);
2013   PetscCheckSameComm(sfA, 1, sfB, 2);
2014   PetscValidPointer(sfBA, 3);
2015   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2016   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2017 
2018   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2019   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2020 
2021   /* TODO: Check roots of sfB have degree of 1 */
2022   /* Once we implement it, we can replace the MPI_MAXLOC
2023      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2024      We use MPI_MAXLOC only to have a deterministic output from this routine if
2025      the root condition is not meet.
2026    */
2027   op = MPI_MAXLOC;
2028 #if defined(PETSC_USE_64BIT_INDICES)
2029   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2030   PetscCall(PetscObjectTypeCompare((PetscObject)sfB, PETSCSFWINDOW, &iswin));
2031   if (iswin) op = MPI_REPLACE;
2032 #endif
2033 
2034   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2035   PetscCall(PetscMalloc1(maxleaf - minleaf + 1, &reorderedRemotePointsA));
2036   for (i = 0; i < maxleaf - minleaf + 1; i++) {
2037     reorderedRemotePointsA[i].rank  = -1;
2038     reorderedRemotePointsA[i].index = -1;
2039   }
2040   if (localPointsA) {
2041     for (i = 0; i < numLeavesA; i++) {
2042       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2043       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2044     }
2045   } else {
2046     for (i = 0; i < numLeavesA; i++) {
2047       if (i > maxleaf || i < minleaf) continue;
2048       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2049     }
2050   }
2051 
2052   PetscCall(PetscMalloc1(numRootsB, &localPointsBA));
2053   PetscCall(PetscMalloc1(numRootsB, &remotePointsBA));
2054   for (i = 0; i < numRootsB; i++) {
2055     remotePointsBA[i].rank  = -1;
2056     remotePointsBA[i].index = -1;
2057   }
2058 
2059   PetscCall(PetscSFReduceBegin(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2060   PetscCall(PetscSFReduceEnd(sfB, MPIU_2INT, reorderedRemotePointsA - minleaf, remotePointsBA, op));
2061   PetscCall(PetscFree(reorderedRemotePointsA));
2062   for (i = 0, numLeavesBA = 0; i < numRootsB; i++) {
2063     if (remotePointsBA[i].rank == -1) continue;
2064     remotePointsBA[numLeavesBA].rank  = remotePointsBA[i].rank;
2065     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2066     localPointsBA[numLeavesBA]        = i;
2067     numLeavesBA++;
2068   }
2069   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA), sfBA));
2070   PetscCall(PetscSFSetFromOptions(*sfBA));
2071   PetscCall(PetscSFSetGraph(*sfBA, numRootsA, numLeavesBA, localPointsBA, PETSC_OWN_POINTER, remotePointsBA, PETSC_OWN_POINTER));
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 /*
2076   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2077 
2078   Input Parameters:
2079 . sf - The global PetscSF
2080 
2081   Output Parameters:
2082 . out - The local PetscSF
2083  */
2084 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf, PetscSF *out) {
2085   MPI_Comm           comm;
2086   PetscMPIInt        myrank;
2087   const PetscInt    *ilocal;
2088   const PetscSFNode *iremote;
2089   PetscInt           i, j, nroots, nleaves, lnleaves, *lilocal;
2090   PetscSFNode       *liremote;
2091   PetscSF            lsf;
2092 
2093   PetscFunctionBegin;
2094   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2095   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf, CreateLocalSF, out);
2096   else {
2097     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2098     PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
2099     PetscCallMPI(MPI_Comm_rank(comm, &myrank));
2100 
2101     /* Find out local edges and build a local SF */
2102     PetscCall(PetscSFGetGraph(sf, &nroots, &nleaves, &ilocal, &iremote));
2103     for (i = lnleaves = 0; i < nleaves; i++) {
2104       if (iremote[i].rank == (PetscInt)myrank) lnleaves++;
2105     }
2106     PetscCall(PetscMalloc1(lnleaves, &lilocal));
2107     PetscCall(PetscMalloc1(lnleaves, &liremote));
2108 
2109     for (i = j = 0; i < nleaves; i++) {
2110       if (iremote[i].rank == (PetscInt)myrank) {
2111         lilocal[j]        = ilocal ? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2112         liremote[j].rank  = 0;                      /* rank in PETSC_COMM_SELF */
2113         liremote[j].index = iremote[i].index;
2114         j++;
2115       }
2116     }
2117     PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
2118     PetscCall(PetscSFSetFromOptions(lsf));
2119     PetscCall(PetscSFSetGraph(lsf, nroots, lnleaves, lilocal, PETSC_OWN_POINTER, liremote, PETSC_OWN_POINTER));
2120     PetscCall(PetscSFSetUp(lsf));
2121     *out = lsf;
2122   }
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2127 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf, MPI_Datatype unit, const void *rootdata, void *leafdata) {
2128   PetscMemType rootmtype, leafmtype;
2129 
2130   PetscFunctionBegin;
2131   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
2132   PetscCall(PetscSFSetUp(sf));
2133   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin, sf, 0, 0, 0));
2134   PetscCall(PetscGetMemType(rootdata, &rootmtype));
2135   PetscCall(PetscGetMemType(leafdata, &leafmtype));
2136   PetscUseTypeMethod(sf, BcastToZero, unit, rootmtype, rootdata, leafmtype, leafdata);
2137   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin, sf, 0, 0, 0));
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 /*@
2142   PetscSFConcatenate - concatenate multiple SFs into one
2143 
2144   Input Parameters:
2145 + comm - the communicator
2146 . nsfs - the number of input PetscSF
2147 . sfs  - the array of input PetscSF
2148 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2149 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2150 
2151   Output Parameters:
2152 . newsf - The resulting PetscSF
2153 
2154   Level: developer
2155 
2156   Notes:
2157   The communicator of all SFs in sfs must be comm.
2158 
2159   The offsets in leafOffsets are added to the original leaf indices.
2160 
2161   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2162   In this case, NULL is also passed as ilocal to the resulting SF.
2163 
2164   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2165   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2166 
2167 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2168 @*/
2169 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf) {
2170   PetscInt     i, s, nLeaves, nRoots;
2171   PetscInt    *leafArrayOffsets;
2172   PetscInt    *ilocal_new;
2173   PetscSFNode *iremote_new;
2174   PetscInt    *rootOffsets;
2175   PetscBool    all_ilocal_null = PETSC_FALSE;
2176   PetscMPIInt  rank;
2177 
2178   PetscFunctionBegin;
2179   {
2180     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2181 
2182     PetscCall(PetscSFCreate(comm, &dummy));
2183     PetscValidLogicalCollectiveInt(dummy, nsfs, 2);
2184     PetscValidPointer(sfs, 3);
2185     for (i = 0; i < nsfs; i++) {
2186       PetscValidHeaderSpecific(sfs[i], PETSCSF_CLASSID, 3);
2187       PetscCheckSameComm(dummy, 1, sfs[i], 3);
2188     }
2189     PetscValidLogicalCollectiveBool(dummy, shareRoots, 4);
2190     if (leafOffsets) PetscValidIntPointer(leafOffsets, 5);
2191     PetscValidPointer(newsf, 6);
2192     PetscCall(PetscSFDestroy(&dummy));
2193   }
2194   if (!nsfs) {
2195     PetscCall(PetscSFCreate(comm, newsf));
2196     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2197     PetscFunctionReturn(0);
2198   }
2199   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2200 
2201   PetscCall(PetscCalloc1(nsfs + 1, &rootOffsets));
2202   if (shareRoots) {
2203     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2204     if (PetscDefined(USE_DEBUG)) {
2205       for (s = 1; s < nsfs; s++) {
2206         PetscInt nr;
2207 
2208         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2209         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2210       }
2211     }
2212   } else {
2213     for (s = 0; s < nsfs; s++) {
2214       PetscInt nr;
2215 
2216       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2217       rootOffsets[s + 1] = rootOffsets[s] + nr;
2218     }
2219     nRoots = rootOffsets[nsfs];
2220   }
2221 
2222   /* Calculate leaf array offsets and automatic root offsets */
2223   PetscCall(PetscMalloc1(nsfs + 1, &leafArrayOffsets));
2224   leafArrayOffsets[0] = 0;
2225   for (s = 0; s < nsfs; s++) {
2226     PetscInt nl;
2227 
2228     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2229     leafArrayOffsets[s + 1] = leafArrayOffsets[s] + nl;
2230   }
2231   nLeaves = leafArrayOffsets[nsfs];
2232 
2233   if (!leafOffsets) {
2234     all_ilocal_null = PETSC_TRUE;
2235     for (s = 0; s < nsfs; s++) {
2236       const PetscInt *ilocal;
2237 
2238       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2239       if (ilocal) {
2240         all_ilocal_null = PETSC_FALSE;
2241         break;
2242       }
2243     }
2244     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2245   }
2246 
2247   /* Renumber and concatenate local leaves */
2248   ilocal_new = NULL;
2249   if (!all_ilocal_null) {
2250     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2251     for (i = 0; i < nLeaves; i++) ilocal_new[i] = -1;
2252     for (s = 0; s < nsfs; s++) {
2253       const PetscInt *ilocal;
2254       PetscInt       *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2255       PetscInt        i, nleaves_l;
2256 
2257       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2258       for (i = 0; i < nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2259     }
2260   }
2261 
2262   /* Renumber and concatenate remote roots */
2263   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2264   for (i = 0; i < nLeaves; i++) {
2265     iremote_new[i].rank  = -1;
2266     iremote_new[i].index = -1;
2267   }
2268   for (s = 0; s < nsfs; s++) {
2269     PetscInt           i, nl, nr;
2270     PetscSF            tmp_sf;
2271     const PetscSFNode *iremote;
2272     PetscSFNode       *tmp_rootdata;
2273     PetscSFNode       *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2274 
2275     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2276     PetscCall(PetscSFCreate(comm, &tmp_sf));
2277     /* create helper SF with contiguous leaves */
2278     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode *)iremote, PETSC_COPY_VALUES));
2279     PetscCall(PetscSFSetUp(tmp_sf));
2280     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2281     for (i = 0; i < nr; i++) {
2282       tmp_rootdata[i].index = i + rootOffsets[s];
2283       tmp_rootdata[i].rank  = (PetscInt)rank;
2284     }
2285     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2286     PetscCall(PetscSFBcastEnd(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2287     PetscCall(PetscSFDestroy(&tmp_sf));
2288     PetscCall(PetscFree(tmp_rootdata));
2289   }
2290 
2291   /* Build the new SF */
2292   PetscCall(PetscSFCreate(comm, newsf));
2293   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2294   PetscCall(PetscSFSetUp(*newsf));
2295   PetscCall(PetscFree(rootOffsets));
2296   PetscCall(PetscFree(leafArrayOffsets));
2297   PetscFunctionReturn(0);
2298 }
2299