xref: /petsc/src/vec/is/sf/interface/sf.c (revision f97672e55eacc8688507b9471cd7ec2664d7f203)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscSF        b;
57 
58   PetscFunctionBegin;
59   PetscValidPointer(sf,2);
60   PetscCall(PetscSFInitializePackage());
61 
62   PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView));
63 
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_MAX_INT;
67   b->maxleaf   = PETSC_MIN_INT;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream= PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78     b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80     b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82     b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL));
89     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95  *sf = b;
96   PetscFunctionReturn(0);
97 }
98 
99 /*@
100    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102    Collective
103 
104    Input Parameter:
105 .  sf - star forest
106 
107    Level: advanced
108 
109 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
115   if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf));
116   sf->nroots   = -1;
117   sf->nleaves  = -1;
118   sf->minleaf  = PETSC_MAX_INT;
119   sf->maxleaf  = PETSC_MIN_INT;
120   sf->mine     = NULL;
121   sf->remote   = NULL;
122   sf->graphset = PETSC_FALSE;
123   PetscCall(PetscFree(sf->mine_alloc));
124   PetscCall(PetscFree(sf->remote_alloc));
125   sf->nranks = -1;
126   PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote));
127   sf->degreeknown = PETSC_FALSE;
128   PetscCall(PetscFree(sf->degree));
129   if (sf->ingroup  != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
130   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
131   if (sf->multi) sf->multi->multi = NULL;
132   PetscCall(PetscSFDestroy(&sf->multi));
133   PetscCall(PetscLayoutDestroy(&sf->map));
134 
135  #if defined(PETSC_HAVE_DEVICE)
136   for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]));
137  #endif
138 
139   sf->setupcalled = PETSC_FALSE;
140   PetscFunctionReturn(0);
141 }
142 
143 /*@C
144    PetscSFSetType - Set the PetscSF communication implementation
145 
146    Collective on PetscSF
147 
148    Input Parameters:
149 +  sf - the PetscSF context
150 -  type - a known method
151 
152    Options Database Key:
153 .  -sf_type <type> - Sets the method; use -help for a list
154    of available methods (for instance, window, basic, neighbor)
155 
156    Notes:
157    See "include/petscsf.h" for available methods (for instance)
158 +    PETSCSFWINDOW - MPI-2/3 one-sided
159 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
160 
161   Level: intermediate
162 
163 .seealso: `PetscSFType`, `PetscSFCreate()`
164 @*/
165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
166 {
167   PetscBool      match;
168   PetscErrorCode (*r)(PetscSF);
169 
170   PetscFunctionBegin;
171   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
172   PetscValidCharPointer(type,2);
173 
174   PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match));
175   if (match) PetscFunctionReturn(0);
176 
177   PetscCall(PetscFunctionListFind(PetscSFList,type,&r));
178   PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
179   /* Destroy the previous PetscSF implementation context */
180   if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf));
181   PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops)));
182   PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type));
183   PetscCall((*r)(sf));
184   PetscFunctionReturn(0);
185 }
186 
187 /*@C
188   PetscSFGetType - Get the PetscSF communication implementation
189 
190   Not Collective
191 
192   Input Parameter:
193 . sf  - the PetscSF context
194 
195   Output Parameter:
196 . type - the PetscSF type name
197 
198   Level: intermediate
199 
200 .seealso: `PetscSFSetType()`, `PetscSFCreate()`
201 @*/
202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
203 {
204   PetscFunctionBegin;
205   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
206   PetscValidPointer(type,2);
207   *type = ((PetscObject)sf)->type_name;
208   PetscFunctionReturn(0);
209 }
210 
211 /*@C
212    PetscSFDestroy - destroy star forest
213 
214    Collective
215 
216    Input Parameter:
217 .  sf - address of star forest
218 
219    Level: intermediate
220 
221 .seealso: `PetscSFCreate()`, `PetscSFReset()`
222 @*/
223 PetscErrorCode PetscSFDestroy(PetscSF *sf)
224 {
225   PetscFunctionBegin;
226   if (!*sf) PetscFunctionReturn(0);
227   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
228   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
229   PetscCall(PetscSFReset(*sf));
230   if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf));
231   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
232   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
233   PetscCall(PetscHeaderDestroy(sf));
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
238 {
239   PetscInt           i, nleaves;
240   PetscMPIInt        size;
241   const PetscInt    *ilocal;
242   const PetscSFNode *iremote;
243 
244   PetscFunctionBegin;
245   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
246   PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote));
247   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
248   for (i = 0; i < nleaves; i++) {
249     const PetscInt rank = iremote[i].rank;
250     const PetscInt remote = iremote[i].index;
251     const PetscInt leaf = ilocal ? ilocal[i] : i;
252     PetscCheck(rank >= 0 && rank < size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
253     PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
254     PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 /*@
260    PetscSFSetUp - set up communication structures
261 
262    Collective
263 
264    Input Parameter:
265 .  sf - star forest communication object
266 
267    Level: beginner
268 
269 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()`
270 @*/
271 PetscErrorCode PetscSFSetUp(PetscSF sf)
272 {
273   PetscFunctionBegin;
274   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
275   PetscSFCheckGraphSet(sf,1);
276   if (sf->setupcalled) PetscFunctionReturn(0);
277   PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0));
278   PetscCall(PetscSFCheckGraphValid_Private(sf));
279   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */
280   if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf));
281 #if defined(PETSC_HAVE_CUDA)
282   if (sf->backend == PETSCSF_BACKEND_CUDA) {
283     sf->ops->Malloc = PetscSFMalloc_CUDA;
284     sf->ops->Free   = PetscSFFree_CUDA;
285   }
286 #endif
287 #if defined(PETSC_HAVE_HIP)
288   if (sf->backend == PETSCSF_BACKEND_HIP) {
289     sf->ops->Malloc = PetscSFMalloc_HIP;
290     sf->ops->Free   = PetscSFFree_HIP;
291   }
292 #endif
293 
294 #
295 #if defined(PETSC_HAVE_KOKKOS)
296   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
297     sf->ops->Malloc = PetscSFMalloc_Kokkos;
298     sf->ops->Free   = PetscSFFree_Kokkos;
299   }
300 #endif
301   PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0));
302   sf->setupcalled = PETSC_TRUE;
303   PetscFunctionReturn(0);
304 }
305 
306 /*@
307    PetscSFSetFromOptions - set PetscSF options using the options database
308 
309    Logically Collective
310 
311    Input Parameter:
312 .  sf - star forest
313 
314    Options Database Keys:
315 +  -sf_type               - implementation type, see PetscSFSetType()
316 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
317 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
318                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
319                             If true, this option only works with -use_gpu_aware_mpi 1.
320 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
321                                If true, this option only works with -use_gpu_aware_mpi 1.
322 
323 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
324                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
325                               the only available is kokkos.
326 
327    Level: intermediate
328 @*/
329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
330 {
331   PetscSFType    deft;
332   char           type[256];
333   PetscBool      flg;
334 
335   PetscFunctionBegin;
336   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
337   PetscObjectOptionsBegin((PetscObject)sf);
338   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
339   PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg));
340   PetscCall(PetscSFSetType(sf,flg ? type : deft));
341   PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL));
342  #if defined(PETSC_HAVE_DEVICE)
343   {
344     char        backendstr[32] = {0};
345     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
346     /* Change the defaults set in PetscSFCreate() with command line options */
347     PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL));
348     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL));
349     PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set));
350     PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda));
351     PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos));
352     PetscCall(PetscStrcasecmp("hip",backendstr,&isHip));
353   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
354     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
355     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
356     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
357     else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
358   #elif defined(PETSC_HAVE_KOKKOS)
359     PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
360    #endif
361   }
362  #endif
363   if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf));
364   PetscOptionsEnd();
365   PetscFunctionReturn(0);
366 }
367 
368 /*@
369    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
370 
371    Logically Collective
372 
373    Input Parameters:
374 +  sf - star forest
375 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
376 
377    Level: advanced
378 
379 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
380 @*/
381 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
382 {
383   PetscFunctionBegin;
384   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
385   PetscValidLogicalCollectiveBool(sf,flg,2);
386   PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
387   sf->rankorder = flg;
388   PetscFunctionReturn(0);
389 }
390 
391 /*@C
392    PetscSFSetGraph - Set a parallel star forest
393 
394    Collective
395 
396    Input Parameters:
397 +  sf - star forest
398 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
399 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
400 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
401 during setup in debug mode)
402 .  localmode - copy mode for ilocal
403 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
404 during setup in debug mode)
405 -  remotemode - copy mode for iremote
406 
407    Level: intermediate
408 
409    Notes:
410    Leaf indices in ilocal must be unique, otherwise an error occurs.
411 
412    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
413    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
414    PETSc might modify the respective array;
415    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
416    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
417 
418    Fortran Notes:
419    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
420 
421    Developer Notes:
422    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
423    This also allows to compare leaf sets of two SFs easily.
424 
425 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
426 @*/
427 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
428 {
429   PetscBool       unique, contiguous;
430 
431   PetscFunctionBegin;
432   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
433   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
434   if (nleaves > 0) PetscValidPointer(iremote,6);
435   PetscCheck(nroots  >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
436   PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
437   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
438    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
439   PetscCheck((int)localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
440   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
441 
442   if (sf->nroots >= 0) { /* Reset only if graph already set */
443     PetscCall(PetscSFReset(sf));
444   }
445 
446   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0));
447 
448   sf->nroots  = nroots;
449   sf->nleaves = nleaves;
450 
451   if (localmode == PETSC_COPY_VALUES && ilocal) {
452     PetscInt *tlocal = NULL;
453 
454     PetscCall(PetscMalloc1(nleaves,&tlocal));
455     PetscCall(PetscArraycpy(tlocal,ilocal,nleaves));
456     ilocal = tlocal;
457   }
458   if (remotemode == PETSC_COPY_VALUES) {
459     PetscSFNode *tremote = NULL;
460 
461     PetscCall(PetscMalloc1(nleaves,&tremote));
462     PetscCall(PetscArraycpy(tremote,iremote,nleaves));
463     iremote = tremote;
464   }
465 
466   if (nleaves && ilocal) {
467     PetscSFNode   work;
468 
469     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
470     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
471     unique = PetscNot(unique);
472     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
473     sf->minleaf = ilocal[0];
474     sf->maxleaf = ilocal[nleaves-1];
475     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
476   } else {
477     sf->minleaf = 0;
478     sf->maxleaf = nleaves - 1;
479     unique      = PETSC_TRUE;
480     contiguous  = PETSC_TRUE;
481   }
482 
483   if (contiguous) {
484     if (localmode == PETSC_USE_POINTER) {
485       ilocal = NULL;
486     } else {
487       PetscCall(PetscFree(ilocal));
488     }
489   }
490   sf->mine            = ilocal;
491   if (localmode == PETSC_USE_POINTER) {
492     sf->mine_alloc    = NULL;
493   } else {
494     sf->mine_alloc    = ilocal;
495   }
496   sf->remote          = iremote;
497   if (remotemode == PETSC_USE_POINTER) {
498     sf->remote_alloc  = NULL;
499   } else {
500     sf->remote_alloc  = iremote;
501   }
502   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0));
503   sf->graphset = PETSC_TRUE;
504   PetscFunctionReturn(0);
505 }
506 
507 /*@
508   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
509 
510   Collective
511 
512   Input Parameters:
513 + sf      - The PetscSF
514 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
515 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
516 
517   Notes:
518   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
519   n and N are local and global sizes of x respectively.
520 
521   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
522   sequential vectors y on all ranks.
523 
524   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
525   sequential vector y on rank 0.
526 
527   In above cases, entries of x are roots and entries of y are leaves.
528 
529   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
530   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
531   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
532   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
533   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
534 
535   In this case, roots and leaves are symmetric.
536 
537   Level: intermediate
538  @*/
539 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
540 {
541   MPI_Comm       comm;
542   PetscInt       n,N,res[2];
543   PetscMPIInt    rank,size;
544   PetscSFType    type;
545 
546   PetscFunctionBegin;
547   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
548   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
549   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
550   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
551   PetscCallMPI(MPI_Comm_rank(comm,&rank));
552   PetscCallMPI(MPI_Comm_size(comm,&size));
553 
554   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
555     type = PETSCSFALLTOALL;
556     PetscCall(PetscLayoutCreate(comm,&sf->map));
557     PetscCall(PetscLayoutSetLocalSize(sf->map,size));
558     PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size));
559     PetscCall(PetscLayoutSetUp(sf->map));
560   } else {
561     PetscCall(PetscLayoutGetLocalSize(map,&n));
562     PetscCall(PetscLayoutGetSize(map,&N));
563     res[0] = n;
564     res[1] = -n;
565     /* Check if n are same over all ranks so that we can optimize it */
566     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm));
567     if (res[0] == -res[1]) { /* same n */
568       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
569     } else {
570       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
571     }
572     PetscCall(PetscLayoutReference(map,&sf->map));
573   }
574   PetscCall(PetscSFSetType(sf,type));
575 
576   sf->pattern = pattern;
577   sf->mine    = NULL; /* Contiguous */
578 
579   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
580      Also set other easy stuff.
581    */
582   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
583     sf->nleaves      = N;
584     sf->nroots       = n;
585     sf->nranks       = size;
586     sf->minleaf      = 0;
587     sf->maxleaf      = N - 1;
588   } else if (pattern == PETSCSF_PATTERN_GATHER) {
589     sf->nleaves      = rank ? 0 : N;
590     sf->nroots       = n;
591     sf->nranks       = rank ? 0 : size;
592     sf->minleaf      = 0;
593     sf->maxleaf      = rank ? -1 : N - 1;
594   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
595     sf->nleaves      = size;
596     sf->nroots       = size;
597     sf->nranks       = size;
598     sf->minleaf      = 0;
599     sf->maxleaf      = size - 1;
600   }
601   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
602   sf->graphset = PETSC_TRUE;
603   PetscFunctionReturn(0);
604 }
605 
606 /*@
607    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
608 
609    Collective
610 
611    Input Parameter:
612 .  sf - star forest to invert
613 
614    Output Parameter:
615 .  isf - inverse of sf
616 
617    Level: advanced
618 
619    Notes:
620    All roots must have degree 1.
621 
622    The local space may be a permutation, but cannot be sparse.
623 
624 .seealso: `PetscSFSetGraph()`
625 @*/
626 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
627 {
628   PetscMPIInt    rank;
629   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
630   const PetscInt *ilocal;
631   PetscSFNode    *roots,*leaves;
632 
633   PetscFunctionBegin;
634   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
635   PetscSFCheckGraphSet(sf,1);
636   PetscValidPointer(isf,2);
637 
638   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL));
639   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
640 
641   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
642   PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves));
643   for (i=0; i<maxlocal; i++) {
644     leaves[i].rank  = rank;
645     leaves[i].index = i;
646   }
647   for (i=0; i <nroots; i++) {
648     roots[i].rank  = -1;
649     roots[i].index = -1;
650   }
651   PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
652   PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
653 
654   /* Check whether our leaves are sparse */
655   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
656   if (count == nroots) newilocal = NULL;
657   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
658     PetscCall(PetscMalloc1(count,&newilocal));
659     for (i=0,count=0; i<nroots; i++) {
660       if (roots[i].rank >= 0) {
661         newilocal[count]   = i;
662         roots[count].rank  = roots[i].rank;
663         roots[count].index = roots[i].index;
664         count++;
665       }
666     }
667   }
668 
669   PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf));
670   PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES));
671   PetscCall(PetscFree2(roots,leaves));
672   PetscFunctionReturn(0);
673 }
674 
675 /*@
676    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
677 
678    Collective
679 
680    Input Parameters:
681 +  sf - communication object to duplicate
682 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
683 
684    Output Parameter:
685 .  newsf - new communication object
686 
687    Level: beginner
688 
689 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
690 @*/
691 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
692 {
693   PetscSFType    type;
694   MPI_Datatype   dtype=MPIU_SCALAR;
695 
696   PetscFunctionBegin;
697   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
698   PetscValidLogicalCollectiveEnum(sf,opt,2);
699   PetscValidPointer(newsf,3);
700   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf));
701   PetscCall(PetscSFGetType(sf,&type));
702   if (type) PetscCall(PetscSFSetType(*newsf,type));
703   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
704   if (opt == PETSCSF_DUPLICATE_GRAPH) {
705     PetscSFCheckGraphSet(sf,1);
706     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
707       PetscInt          nroots,nleaves;
708       const PetscInt    *ilocal;
709       const PetscSFNode *iremote;
710       PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
711       PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES));
712     } else {
713       PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern));
714     }
715   }
716   /* Since oldtype is committed, so is newtype, according to MPI */
717   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype));
718   (*newsf)->vscat.bs     = sf->vscat.bs;
719   (*newsf)->vscat.unit   = dtype;
720   (*newsf)->vscat.to_n   = sf->vscat.to_n;
721   (*newsf)->vscat.from_n = sf->vscat.from_n;
722   /* Do not copy lsf. Build it on demand since it is rarely used */
723 
724 #if defined(PETSC_HAVE_DEVICE)
725   (*newsf)->backend              = sf->backend;
726   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
727   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
728   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
729 #endif
730   if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf));
731   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
732   PetscFunctionReturn(0);
733 }
734 
735 /*@C
736    PetscSFGetGraph - Get the graph specifying a parallel star forest
737 
738    Not Collective
739 
740    Input Parameter:
741 .  sf - star forest
742 
743    Output Parameters:
744 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
745 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
746 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
747 -  iremote - remote locations of root vertices for each leaf on the current process
748 
749    Notes:
750      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
751 
752      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
753      see its manpage for details.
754 
755    Fortran Notes:
756      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
757      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
758 
759      To check for a NULL ilocal use
760 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
761 
762    Level: intermediate
763 
764 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
765 @*/
766 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
767 {
768   PetscFunctionBegin;
769   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
770   if (sf->ops->GetGraph) {
771     PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote));
772   } else {
773     if (nroots) *nroots = sf->nroots;
774     if (nleaves) *nleaves = sf->nleaves;
775     if (ilocal) *ilocal = sf->mine;
776     if (iremote) *iremote = sf->remote;
777   }
778   PetscFunctionReturn(0);
779 }
780 
781 /*@
782    PetscSFGetLeafRange - Get the active leaf ranges
783 
784    Not Collective
785 
786    Input Parameter:
787 .  sf - star forest
788 
789    Output Parameters:
790 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
791 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
792 
793    Level: developer
794 
795 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
796 @*/
797 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
798 {
799   PetscFunctionBegin;
800   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
801   PetscSFCheckGraphSet(sf,1);
802   if (minleaf) *minleaf = sf->minleaf;
803   if (maxleaf) *maxleaf = sf->maxleaf;
804   PetscFunctionReturn(0);
805 }
806 
807 /*@C
808    PetscSFViewFromOptions - View from Options
809 
810    Collective on PetscSF
811 
812    Input Parameters:
813 +  A - the star forest
814 .  obj - Optional object
815 -  name - command line option
816 
817    Level: intermediate
818 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
819 @*/
820 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
821 {
822   PetscFunctionBegin;
823   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
824   PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name));
825   PetscFunctionReturn(0);
826 }
827 
828 /*@C
829    PetscSFView - view a star forest
830 
831    Collective
832 
833    Input Parameters:
834 +  sf - star forest
835 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
836 
837    Level: beginner
838 
839 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`
840 @*/
841 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
842 {
843   PetscBool         iascii;
844   PetscViewerFormat format;
845 
846   PetscFunctionBegin;
847   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
848   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer));
849   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
850   PetscCheckSameComm(sf,1,viewer,2);
851   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
852   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
853   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
854     PetscMPIInt rank;
855     PetscInt    ii,i,j;
856 
857     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer));
858     PetscCall(PetscViewerASCIIPushTab(viewer));
859     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
860       if (!sf->graphset) {
861         PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n"));
862         PetscCall(PetscViewerASCIIPopTab(viewer));
863         PetscFunctionReturn(0);
864       }
865       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
866       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
867       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks));
868       for (i=0; i<sf->nleaves; i++) {
869         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index));
870       }
871       PetscCall(PetscViewerFlush(viewer));
872       PetscCall(PetscViewerGetFormat(viewer,&format));
873       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
874         PetscMPIInt *tmpranks,*perm;
875         PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm));
876         PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks));
877         for (i=0; i<sf->nranks; i++) perm[i] = i;
878         PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm));
879         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank));
880         for (ii=0; ii<sf->nranks; ii++) {
881           i = perm[ii];
882           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]));
883           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
884             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]));
885           }
886         }
887         PetscCall(PetscFree2(tmpranks,perm));
888       }
889       PetscCall(PetscViewerFlush(viewer));
890       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
891     }
892     PetscCall(PetscViewerASCIIPopTab(viewer));
893   }
894   if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer));
895   PetscFunctionReturn(0);
896 }
897 
898 /*@C
899    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
900 
901    Not Collective
902 
903    Input Parameter:
904 .  sf - star forest
905 
906    Output Parameters:
907 +  nranks - number of ranks referenced by local part
908 .  ranks - array of ranks
909 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
910 .  rmine - concatenated array holding local indices referencing each remote rank
911 -  rremote - concatenated array holding remote indices referenced for each remote rank
912 
913    Level: developer
914 
915 .seealso: `PetscSFGetLeafRanks()`
916 @*/
917 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
918 {
919   PetscFunctionBegin;
920   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
921   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
922   if (sf->ops->GetRootRanks) {
923     PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote));
924   } else {
925     /* The generic implementation */
926     if (nranks)  *nranks  = sf->nranks;
927     if (ranks)   *ranks   = sf->ranks;
928     if (roffset) *roffset = sf->roffset;
929     if (rmine)   *rmine   = sf->rmine;
930     if (rremote) *rremote = sf->rremote;
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 /*@C
936    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
937 
938    Not Collective
939 
940    Input Parameter:
941 .  sf - star forest
942 
943    Output Parameters:
944 +  niranks - number of leaf ranks referencing roots on this process
945 .  iranks - array of ranks
946 .  ioffset - offset in irootloc for each rank (length niranks+1)
947 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
948 
949    Level: developer
950 
951 .seealso: `PetscSFGetRootRanks()`
952 @*/
953 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
954 {
955   PetscFunctionBegin;
956   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
957   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
958   if (sf->ops->GetLeafRanks) {
959     PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc));
960   } else {
961     PetscSFType type;
962     PetscCall(PetscSFGetType(sf,&type));
963     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
964   }
965   PetscFunctionReturn(0);
966 }
967 
968 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
969   PetscInt i;
970   for (i=0; i<n; i++) {
971     if (needle == list[i]) return PETSC_TRUE;
972   }
973   return PETSC_FALSE;
974 }
975 
976 /*@C
977    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
978 
979    Collective
980 
981    Input Parameters:
982 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
983 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
984 
985    Level: developer
986 
987 .seealso: `PetscSFGetRootRanks()`
988 @*/
989 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
990 {
991   PetscTable         table;
992   PetscTablePosition pos;
993   PetscMPIInt        size,groupsize,*groupranks;
994   PetscInt           *rcount,*ranks;
995   PetscInt           i, irank = -1,orank = -1;
996 
997   PetscFunctionBegin;
998   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
999   PetscSFCheckGraphSet(sf,1);
1000   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
1001   PetscCall(PetscTableCreate(10,size,&table));
1002   for (i=0; i<sf->nleaves; i++) {
1003     /* Log 1-based rank */
1004     PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES));
1005   }
1006   PetscCall(PetscTableGetCount(table,&sf->nranks));
1007   PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote));
1008   PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks));
1009   PetscCall(PetscTableGetHeadPosition(table,&pos));
1010   for (i=0; i<sf->nranks; i++) {
1011     PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]));
1012     ranks[i]--;             /* Convert back to 0-based */
1013   }
1014   PetscCall(PetscTableDestroy(&table));
1015 
1016   /* We expect that dgroup is reliably "small" while nranks could be large */
1017   {
1018     MPI_Group group = MPI_GROUP_NULL;
1019     PetscMPIInt *dgroupranks;
1020     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1021     PetscCallMPI(MPI_Group_size(dgroup,&groupsize));
1022     PetscCall(PetscMalloc1(groupsize,&dgroupranks));
1023     PetscCall(PetscMalloc1(groupsize,&groupranks));
1024     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1025     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks));
1026     PetscCallMPI(MPI_Group_free(&group));
1027     PetscCall(PetscFree(dgroupranks));
1028   }
1029 
1030   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1031   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1032     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1033       if (InList(ranks[i],groupsize,groupranks)) break;
1034     }
1035     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1036       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1037     }
1038     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1039       PetscInt tmprank,tmpcount;
1040 
1041       tmprank             = ranks[i];
1042       tmpcount            = rcount[i];
1043       ranks[i]            = ranks[sf->ndranks];
1044       rcount[i]           = rcount[sf->ndranks];
1045       ranks[sf->ndranks]  = tmprank;
1046       rcount[sf->ndranks] = tmpcount;
1047       sf->ndranks++;
1048     }
1049   }
1050   PetscCall(PetscFree(groupranks));
1051   PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount));
1052   PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks));
1053   sf->roffset[0] = 0;
1054   for (i=0; i<sf->nranks; i++) {
1055     PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i));
1056     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1057     rcount[i]        = 0;
1058   }
1059   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1060     /* short circuit */
1061     if (orank != sf->remote[i].rank) {
1062       /* Search for index of iremote[i].rank in sf->ranks */
1063       PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank));
1064       if (irank < 0) {
1065         PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank));
1066         if (irank >= 0) irank += sf->ndranks;
1067       }
1068       orank = sf->remote[i].rank;
1069     }
1070     PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1071     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1072     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1073     rcount[irank]++;
1074   }
1075   PetscCall(PetscFree2(rcount,ranks));
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /*@C
1080    PetscSFGetGroups - gets incoming and outgoing process groups
1081 
1082    Collective
1083 
1084    Input Parameter:
1085 .  sf - star forest
1086 
1087    Output Parameters:
1088 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1089 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1090 
1091    Level: developer
1092 
1093 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1094 @*/
1095 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1096 {
1097   MPI_Group      group = MPI_GROUP_NULL;
1098 
1099   PetscFunctionBegin;
1100   PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1101   if (sf->ingroup == MPI_GROUP_NULL) {
1102     PetscInt       i;
1103     const PetscInt *indegree;
1104     PetscMPIInt    rank,*outranks,*inranks;
1105     PetscSFNode    *remote;
1106     PetscSF        bgcount;
1107 
1108     /* Compute the number of incoming ranks */
1109     PetscCall(PetscMalloc1(sf->nranks,&remote));
1110     for (i=0; i<sf->nranks; i++) {
1111       remote[i].rank  = sf->ranks[i];
1112       remote[i].index = 0;
1113     }
1114     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount));
1115     PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1116     PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree));
1117     PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree));
1118     /* Enumerate the incoming ranks */
1119     PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks));
1120     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1121     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1122     PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks));
1123     PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks));
1124     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1125     PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup));
1126     PetscCallMPI(MPI_Group_free(&group));
1127     PetscCall(PetscFree2(inranks,outranks));
1128     PetscCall(PetscSFDestroy(&bgcount));
1129   }
1130   *incoming = sf->ingroup;
1131 
1132   if (sf->outgroup == MPI_GROUP_NULL) {
1133     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1134     PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup));
1135     PetscCallMPI(MPI_Group_free(&group));
1136   }
1137   *outgoing = sf->outgroup;
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 /*@
1142    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1143 
1144    Collective
1145 
1146    Input Parameter:
1147 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1148 
1149    Output Parameter:
1150 .  multi - star forest with split roots, such that each root has degree exactly 1
1151 
1152    Level: developer
1153 
1154    Notes:
1155 
1156    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1157    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1158    edge, it is a candidate for future optimization that might involve its removal.
1159 
1160 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1161 @*/
1162 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1163 {
1164   PetscFunctionBegin;
1165   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1166   PetscValidPointer(multi,2);
1167   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1168     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1169     *multi = sf->multi;
1170     sf->multi->multi = sf->multi;
1171     PetscFunctionReturn(0);
1172   }
1173   if (!sf->multi) {
1174     const PetscInt *indegree;
1175     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1176     PetscSFNode    *remote;
1177     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1178     PetscCall(PetscSFComputeDegreeBegin(sf,&indegree));
1179     PetscCall(PetscSFComputeDegreeEnd(sf,&indegree));
1180     PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset));
1181     inoffset[0] = 0;
1182     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1183     for (i=0; i<maxlocal; i++) outones[i] = 1;
1184     PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1185     PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1186     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1187     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1188       for (i=0; i<sf->nroots; i++) {
1189         PetscCheck(inoffset[i] + indegree[i] == inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1190       }
1191     }
1192     PetscCall(PetscMalloc1(sf->nleaves,&remote));
1193     for (i=0; i<sf->nleaves; i++) {
1194       remote[i].rank  = sf->remote[i].rank;
1195       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1196     }
1197     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1198     sf->multi->multi = sf->multi;
1199     PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1200     if (sf->rankorder) {        /* Sort the ranks */
1201       PetscMPIInt rank;
1202       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1203       PetscSFNode *newremote;
1204       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1205       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1206       PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset));
1207       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1208       PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1209       PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1210       /* Sort the incoming ranks at each vertex, build the inverse map */
1211       for (i=0; i<sf->nroots; i++) {
1212         PetscInt j;
1213         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1214         PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset));
1215         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1216       }
1217       PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1218       PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1219       PetscCall(PetscMalloc1(sf->nleaves,&newremote));
1220       for (i=0; i<sf->nleaves; i++) {
1221         newremote[i].rank  = sf->remote[i].rank;
1222         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1223       }
1224       PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER));
1225       PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset));
1226     }
1227     PetscCall(PetscFree3(inoffset,outones,outoffset));
1228   }
1229   *multi = sf->multi;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 /*@C
1234    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1235 
1236    Collective
1237 
1238    Input Parameters:
1239 +  sf - original star forest
1240 .  nselected  - number of selected roots on this process
1241 -  selected   - indices of the selected roots on this process
1242 
1243    Output Parameter:
1244 .  esf - new star forest
1245 
1246    Level: advanced
1247 
1248    Note:
1249    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1250    be done by calling PetscSFGetGraph().
1251 
1252 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()`
1253 @*/
1254 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1255 {
1256   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1257   const PetscInt    *ilocal;
1258   signed char       *rootdata,*leafdata,*leafmem;
1259   const PetscSFNode *iremote;
1260   PetscSFNode       *new_iremote;
1261   MPI_Comm          comm;
1262 
1263   PetscFunctionBegin;
1264   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1265   PetscSFCheckGraphSet(sf,1);
1266   if (nselected) PetscValidIntPointer(selected,3);
1267   PetscValidPointer(esf,4);
1268 
1269   PetscCall(PetscSFSetUp(sf));
1270   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0));
1271   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1272   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
1273 
1274   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1275     PetscBool dups;
1276     PetscCall(PetscCheckDupsInt(nselected,selected,&dups));
1277     PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1278     for (i=0; i<nselected; i++)
1279       PetscCheck(selected[i] >= 0 && selected[i] < nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1280   }
1281 
1282   if (sf->ops->CreateEmbeddedRootSF) {
1283     PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf));
1284   } else {
1285     /* A generic version of creating embedded sf */
1286     PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
1287     maxlocal = maxleaf - minleaf + 1;
1288     PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
1289     leafdata = leafmem - minleaf;
1290     /* Tag selected roots and bcast to leaves */
1291     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1292     PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1293     PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1294 
1295     /* Build esf with leaves that are still connected */
1296     esf_nleaves = 0;
1297     for (i=0; i<nleaves; i++) {
1298       j = ilocal ? ilocal[i] : i;
1299       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1300          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1301       */
1302       esf_nleaves += (leafdata[j] ? 1 : 0);
1303     }
1304     PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal));
1305     PetscCall(PetscMalloc1(esf_nleaves,&new_iremote));
1306     for (i=n=0; i<nleaves; i++) {
1307       j = ilocal ? ilocal[i] : i;
1308       if (leafdata[j]) {
1309         new_ilocal[n]        = j;
1310         new_iremote[n].rank  = iremote[i].rank;
1311         new_iremote[n].index = iremote[i].index;
1312         ++n;
1313       }
1314     }
1315     PetscCall(PetscSFCreate(comm,esf));
1316     PetscCall(PetscSFSetFromOptions(*esf));
1317     PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1318     PetscCall(PetscFree2(rootdata,leafmem));
1319   }
1320   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0));
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 /*@C
1325   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1326 
1327   Collective
1328 
1329   Input Parameters:
1330 + sf - original star forest
1331 . nselected  - number of selected leaves on this process
1332 - selected   - indices of the selected leaves on this process
1333 
1334   Output Parameter:
1335 .  newsf - new star forest
1336 
1337   Level: advanced
1338 
1339 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1340 @*/
1341 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1342 {
1343   const PetscSFNode *iremote;
1344   PetscSFNode       *new_iremote;
1345   const PetscInt    *ilocal;
1346   PetscInt          i,nroots,*leaves,*new_ilocal;
1347   MPI_Comm          comm;
1348 
1349   PetscFunctionBegin;
1350   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1351   PetscSFCheckGraphSet(sf,1);
1352   if (nselected) PetscValidIntPointer(selected,3);
1353   PetscValidPointer(newsf,4);
1354 
1355   /* Uniq selected[] and put results in leaves[] */
1356   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1357   PetscCall(PetscMalloc1(nselected,&leaves));
1358   PetscCall(PetscArraycpy(leaves,selected,nselected));
1359   PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves));
1360   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1361 
1362   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1363   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1364     PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf));
1365   } else {
1366     PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote));
1367     PetscCall(PetscMalloc1(nselected,&new_ilocal));
1368     PetscCall(PetscMalloc1(nselected,&new_iremote));
1369     for (i=0; i<nselected; ++i) {
1370       const PetscInt l     = leaves[i];
1371       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1372       new_iremote[i].rank  = iremote[l].rank;
1373       new_iremote[i].index = iremote[l].index;
1374     }
1375     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf));
1376     PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1377   }
1378   PetscCall(PetscFree(leaves));
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 /*@C
1383    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1384 
1385    Collective on PetscSF
1386 
1387    Input Parameters:
1388 +  sf - star forest on which to communicate
1389 .  unit - data type associated with each node
1390 .  rootdata - buffer to broadcast
1391 -  op - operation to use for reduction
1392 
1393    Output Parameter:
1394 .  leafdata - buffer to be reduced with values from each leaf's respective root
1395 
1396    Level: intermediate
1397 
1398    Notes:
1399     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1400     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1401     use PetscSFBcastWithMemTypeBegin() instead.
1402 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1403 @*/
1404 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1405 {
1406   PetscMemType   rootmtype,leafmtype;
1407 
1408   PetscFunctionBegin;
1409   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1410   PetscCall(PetscSFSetUp(sf));
1411   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1412   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1413   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1414   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1415   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 /*@C
1420    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1421 
1422    Collective on PetscSF
1423 
1424    Input Parameters:
1425 +  sf - star forest on which to communicate
1426 .  unit - data type associated with each node
1427 .  rootmtype - memory type of rootdata
1428 .  rootdata - buffer to broadcast
1429 .  leafmtype - memory type of leafdata
1430 -  op - operation to use for reduction
1431 
1432    Output Parameter:
1433 .  leafdata - buffer to be reduced with values from each leaf's respective root
1434 
1435    Level: intermediate
1436 
1437 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1438 @*/
1439 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1440 {
1441   PetscFunctionBegin;
1442   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1443   PetscCall(PetscSFSetUp(sf));
1444   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1445   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1446   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 /*@C
1451    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1452 
1453    Collective
1454 
1455    Input Parameters:
1456 +  sf - star forest
1457 .  unit - data type
1458 .  rootdata - buffer to broadcast
1459 -  op - operation to use for reduction
1460 
1461    Output Parameter:
1462 .  leafdata - buffer to be reduced with values from each leaf's respective root
1463 
1464    Level: intermediate
1465 
1466 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1467 @*/
1468 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1469 {
1470   PetscFunctionBegin;
1471   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1472   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0));
1473   PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op));
1474   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0));
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 /*@C
1479    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1480 
1481    Collective
1482 
1483    Input Parameters:
1484 +  sf - star forest
1485 .  unit - data type
1486 .  leafdata - values to reduce
1487 -  op - reduction operation
1488 
1489    Output Parameter:
1490 .  rootdata - result of reduction of values from all leaves of each root
1491 
1492    Level: intermediate
1493 
1494    Notes:
1495     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1496     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1497     use PetscSFReduceWithMemTypeBegin() instead.
1498 
1499 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1500 @*/
1501 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1502 {
1503   PetscMemType   rootmtype,leafmtype;
1504 
1505   PetscFunctionBegin;
1506   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1507   PetscCall(PetscSFSetUp(sf));
1508   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1509   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1510   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1511   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1512   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 /*@C
1517    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1518 
1519    Collective
1520 
1521    Input Parameters:
1522 +  sf - star forest
1523 .  unit - data type
1524 .  leafmtype - memory type of leafdata
1525 .  leafdata - values to reduce
1526 .  rootmtype - memory type of rootdata
1527 -  op - reduction operation
1528 
1529    Output Parameter:
1530 .  rootdata - result of reduction of values from all leaves of each root
1531 
1532    Level: intermediate
1533 
1534 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1535 @*/
1536 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1537 {
1538   PetscFunctionBegin;
1539   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1540   PetscCall(PetscSFSetUp(sf));
1541   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1542   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1543   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 /*@C
1548    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1549 
1550    Collective
1551 
1552    Input Parameters:
1553 +  sf - star forest
1554 .  unit - data type
1555 .  leafdata - values to reduce
1556 -  op - reduction operation
1557 
1558    Output Parameter:
1559 .  rootdata - result of reduction of values from all leaves of each root
1560 
1561    Level: intermediate
1562 
1563 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1564 @*/
1565 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1566 {
1567   PetscFunctionBegin;
1568   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1569   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0));
1570   PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op));
1571   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0));
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 /*@C
1576    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1577 
1578    Collective
1579 
1580    Input Parameters:
1581 +  sf - star forest
1582 .  unit - data type
1583 .  leafdata - leaf values to use in reduction
1584 -  op - operation to use for reduction
1585 
1586    Output Parameters:
1587 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1588 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1589 
1590    Level: advanced
1591 
1592    Note:
1593    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1594    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1595    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1596    integers.
1597 
1598 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1599 @*/
1600 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1601 {
1602   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1603 
1604   PetscFunctionBegin;
1605   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1606   PetscCall(PetscSFSetUp(sf));
1607   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1608   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1609   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1610   PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype));
1611   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1612   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1613   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 /*@C
1618    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1619 
1620    Collective
1621 
1622    Input Parameters:
1623 +  sf - star forest
1624 .  unit - data type
1625 .  rootmtype - memory type of rootdata
1626 .  leafmtype - memory type of leafdata
1627 .  leafdata - leaf values to use in reduction
1628 .  leafupdatemtype - memory type of leafupdate
1629 -  op - operation to use for reduction
1630 
1631    Output Parameters:
1632 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1633 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1634 
1635    Level: advanced
1636 
1637    Note: See PetscSFFetchAndOpBegin() for more details.
1638 
1639 .seealso: `PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1640 @*/
1641 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1642 {
1643   PetscFunctionBegin;
1644   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1645   PetscCall(PetscSFSetUp(sf));
1646   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1647   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1648   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1649   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 /*@C
1654    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1655 
1656    Collective
1657 
1658    Input Parameters:
1659 +  sf - star forest
1660 .  unit - data type
1661 .  leafdata - leaf values to use in reduction
1662 -  op - operation to use for reduction
1663 
1664    Output Parameters:
1665 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1666 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1667 
1668    Level: advanced
1669 
1670 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1671 @*/
1672 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1673 {
1674   PetscFunctionBegin;
1675   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1676   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1677   PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op));
1678   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 /*@C
1683    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1684 
1685    Collective
1686 
1687    Input Parameter:
1688 .  sf - star forest
1689 
1690    Output Parameter:
1691 .  degree - degree of each root vertex
1692 
1693    Level: advanced
1694 
1695    Notes:
1696    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1697 
1698 .seealso: `PetscSFGatherBegin()`
1699 @*/
1700 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1701 {
1702   PetscFunctionBegin;
1703   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1704   PetscSFCheckGraphSet(sf,1);
1705   PetscValidPointer(degree,2);
1706   if (!sf->degreeknown) {
1707     PetscInt i, nroots = sf->nroots, maxlocal;
1708     PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1709     maxlocal = sf->maxleaf-sf->minleaf+1;
1710     PetscCall(PetscMalloc1(nroots,&sf->degree));
1711     PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1712     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1713     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1714     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1715   }
1716   *degree = NULL;
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 /*@C
1721    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1722 
1723    Collective
1724 
1725    Input Parameter:
1726 .  sf - star forest
1727 
1728    Output Parameter:
1729 .  degree - degree of each root vertex
1730 
1731    Level: developer
1732 
1733    Notes:
1734    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1735 
1736 .seealso:
1737 @*/
1738 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1739 {
1740   PetscFunctionBegin;
1741   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1742   PetscSFCheckGraphSet(sf,1);
1743   PetscValidPointer(degree,2);
1744   if (!sf->degreeknown) {
1745     PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1746     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1747     PetscCall(PetscFree(sf->degreetmp));
1748     sf->degreeknown = PETSC_TRUE;
1749   }
1750   *degree = sf->degree;
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 /*@C
1755    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1756    Each multi-root is assigned index of the corresponding original root.
1757 
1758    Collective
1759 
1760    Input Parameters:
1761 +  sf - star forest
1762 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1763 
1764    Output Parameters:
1765 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1766 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1767 
1768    Level: developer
1769 
1770    Notes:
1771    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1772 
1773 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1774 @*/
1775 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1776 {
1777   PetscSF             msf;
1778   PetscInt            i, j, k, nroots, nmroots;
1779 
1780   PetscFunctionBegin;
1781   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1782   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1783   if (nroots) PetscValidIntPointer(degree,2);
1784   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1785   PetscValidPointer(multiRootsOrigNumbering,4);
1786   PetscCall(PetscSFGetMultiSF(sf,&msf));
1787   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1788   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1789   for (i=0,j=0,k=0; i<nroots; i++) {
1790     if (!degree[i]) continue;
1791     for (j=0; j<degree[i]; j++,k++) {
1792       (*multiRootsOrigNumbering)[k] = i;
1793     }
1794   }
1795   PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1796   if (nMultiRoots) *nMultiRoots = nmroots;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 /*@C
1801    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1802 
1803    Collective
1804 
1805    Input Parameters:
1806 +  sf - star forest
1807 .  unit - data type
1808 -  leafdata - leaf data to gather to roots
1809 
1810    Output Parameter:
1811 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1812 
1813    Level: intermediate
1814 
1815 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1816 @*/
1817 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1818 {
1819   PetscSF        multi = NULL;
1820 
1821   PetscFunctionBegin;
1822   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1823   PetscCall(PetscSFSetUp(sf));
1824   PetscCall(PetscSFGetMultiSF(sf,&multi));
1825   PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 /*@C
1830    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1831 
1832    Collective
1833 
1834    Input Parameters:
1835 +  sf - star forest
1836 .  unit - data type
1837 -  leafdata - leaf data to gather to roots
1838 
1839    Output Parameter:
1840 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1841 
1842    Level: intermediate
1843 
1844 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1845 @*/
1846 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1847 {
1848   PetscSF        multi = NULL;
1849 
1850   PetscFunctionBegin;
1851   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1852   PetscCall(PetscSFGetMultiSF(sf,&multi));
1853   PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 /*@C
1858    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1859 
1860    Collective
1861 
1862    Input Parameters:
1863 +  sf - star forest
1864 .  unit - data type
1865 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1866 
1867    Output Parameter:
1868 .  leafdata - leaf data to be update with personal data from each respective root
1869 
1870    Level: intermediate
1871 
1872 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1873 @*/
1874 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1875 {
1876   PetscSF        multi = NULL;
1877 
1878   PetscFunctionBegin;
1879   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1880   PetscCall(PetscSFSetUp(sf));
1881   PetscCall(PetscSFGetMultiSF(sf,&multi));
1882   PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 /*@C
1887    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1888 
1889    Collective
1890 
1891    Input Parameters:
1892 +  sf - star forest
1893 .  unit - data type
1894 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1895 
1896    Output Parameter:
1897 .  leafdata - leaf data to be update with personal data from each respective root
1898 
1899    Level: intermediate
1900 
1901 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1902 @*/
1903 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1904 {
1905   PetscSF        multi = NULL;
1906 
1907   PetscFunctionBegin;
1908   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1909   PetscCall(PetscSFGetMultiSF(sf,&multi));
1910   PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1915 {
1916   PetscInt        i, n, nleaves;
1917   const PetscInt *ilocal = NULL;
1918   PetscHSetI      seen;
1919 
1920   PetscFunctionBegin;
1921   if (PetscDefined(USE_DEBUG)) {
1922     PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL));
1923     PetscCall(PetscHSetICreate(&seen));
1924     for (i = 0; i < nleaves; i++) {
1925       const PetscInt leaf = ilocal ? ilocal[i] : i;
1926       PetscCall(PetscHSetIAdd(seen,leaf));
1927     }
1928     PetscCall(PetscHSetIGetSize(seen,&n));
1929     PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1930     PetscCall(PetscHSetIDestroy(&seen));
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 /*@
1936   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1937 
1938   Input Parameters:
1939 + sfA - The first PetscSF
1940 - sfB - The second PetscSF
1941 
1942   Output Parameters:
1943 . sfBA - The composite SF
1944 
1945   Level: developer
1946 
1947   Notes:
1948   Currently, the two SFs must be defined on congruent communicators and they must be true star
1949   forests, i.e. the same leaf is not connected with different roots.
1950 
1951   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1952   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1953   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1954   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1955 
1956 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1957 @*/
1958 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1959 {
1960   const PetscSFNode *remotePointsA,*remotePointsB;
1961   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1962   const PetscInt    *localPointsA,*localPointsB;
1963   PetscInt          *localPointsBA;
1964   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1965   PetscBool         denseB;
1966 
1967   PetscFunctionBegin;
1968   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1969   PetscSFCheckGraphSet(sfA,1);
1970   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1971   PetscSFCheckGraphSet(sfB,2);
1972   PetscCheckSameComm(sfA,1,sfB,2);
1973   PetscValidPointer(sfBA,3);
1974   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1975   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1976 
1977   PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA));
1978   PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB));
1979   if (localPointsA) {
1980     PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA));
1981     for (i=0; i<numRootsB; i++) {
1982       reorderedRemotePointsA[i].rank = -1;
1983       reorderedRemotePointsA[i].index = -1;
1984     }
1985     for (i=0; i<numLeavesA; i++) {
1986       if (localPointsA[i] >= numRootsB) continue;
1987       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1988     }
1989     remotePointsA = reorderedRemotePointsA;
1990   }
1991   PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf));
1992   PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB));
1993   PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1994   PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1995   PetscCall(PetscFree(reorderedRemotePointsA));
1996 
1997   denseB = (PetscBool)!localPointsB;
1998   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1999     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2000     else numLeavesBA++;
2001   }
2002   if (denseB) {
2003     localPointsBA  = NULL;
2004     remotePointsBA = leafdataB;
2005   } else {
2006     PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA));
2007     PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA));
2008     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2009       const PetscInt l = localPointsB ? localPointsB[i] : i;
2010 
2011       if (leafdataB[l-minleaf].rank == -1) continue;
2012       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2013       localPointsBA[numLeavesBA] = l;
2014       numLeavesBA++;
2015     }
2016     PetscCall(PetscFree(leafdataB));
2017   }
2018   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2019   PetscCall(PetscSFSetFromOptions(*sfBA));
2020   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 /*@
2025   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2026 
2027   Input Parameters:
2028 + sfA - The first PetscSF
2029 - sfB - The second PetscSF
2030 
2031   Output Parameters:
2032 . sfBA - The composite SF.
2033 
2034   Level: developer
2035 
2036   Notes:
2037   Currently, the two SFs must be defined on congruent communicators and they must be true star
2038   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2039   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2040 
2041   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2042   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2043   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2044   a Reduce on sfB, on connected roots.
2045 
2046 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2047 @*/
2048 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2049 {
2050   const PetscSFNode *remotePointsA,*remotePointsB;
2051   PetscSFNode       *remotePointsBA;
2052   const PetscInt    *localPointsA,*localPointsB;
2053   PetscSFNode       *reorderedRemotePointsA = NULL;
2054   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2055   MPI_Op            op;
2056 #if defined(PETSC_USE_64BIT_INDICES)
2057   PetscBool         iswin;
2058 #endif
2059 
2060   PetscFunctionBegin;
2061   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2062   PetscSFCheckGraphSet(sfA,1);
2063   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2064   PetscSFCheckGraphSet(sfB,2);
2065   PetscCheckSameComm(sfA,1,sfB,2);
2066   PetscValidPointer(sfBA,3);
2067   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2068   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2069 
2070   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2071   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2072 
2073   /* TODO: Check roots of sfB have degree of 1 */
2074   /* Once we implement it, we can replace the MPI_MAXLOC
2075      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2076      We use MPI_MAXLOC only to have a deterministic output from this routine if
2077      the root condition is not meet.
2078    */
2079   op = MPI_MAXLOC;
2080 #if defined(PETSC_USE_64BIT_INDICES)
2081   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2082   PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin));
2083   if (iswin) op = MPI_REPLACE;
2084 #endif
2085 
2086   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2087   PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA));
2088   for (i=0; i<maxleaf - minleaf + 1; i++) {
2089     reorderedRemotePointsA[i].rank = -1;
2090     reorderedRemotePointsA[i].index = -1;
2091   }
2092   if (localPointsA) {
2093     for (i=0; i<numLeavesA; i++) {
2094       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2095       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2096     }
2097   } else {
2098     for (i=0; i<numLeavesA; i++) {
2099       if (i > maxleaf || i < minleaf) continue;
2100       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2101     }
2102   }
2103 
2104   PetscCall(PetscMalloc1(numRootsB,&localPointsBA));
2105   PetscCall(PetscMalloc1(numRootsB,&remotePointsBA));
2106   for (i=0; i<numRootsB; i++) {
2107     remotePointsBA[i].rank = -1;
2108     remotePointsBA[i].index = -1;
2109   }
2110 
2111   PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2112   PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2113   PetscCall(PetscFree(reorderedRemotePointsA));
2114   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2115     if (remotePointsBA[i].rank == -1) continue;
2116     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2117     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2118     localPointsBA[numLeavesBA] = i;
2119     numLeavesBA++;
2120   }
2121   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2122   PetscCall(PetscSFSetFromOptions(*sfBA));
2123   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 /*
2128   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2129 
2130   Input Parameters:
2131 . sf - The global PetscSF
2132 
2133   Output Parameters:
2134 . out - The local PetscSF
2135  */
2136 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2137 {
2138   MPI_Comm           comm;
2139   PetscMPIInt        myrank;
2140   const PetscInt     *ilocal;
2141   const PetscSFNode  *iremote;
2142   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2143   PetscSFNode        *liremote;
2144   PetscSF            lsf;
2145 
2146   PetscFunctionBegin;
2147   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2148   if (sf->ops->CreateLocalSF) {
2149     PetscCall((*sf->ops->CreateLocalSF)(sf,out));
2150   } else {
2151     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2152     PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
2153     PetscCallMPI(MPI_Comm_rank(comm,&myrank));
2154 
2155     /* Find out local edges and build a local SF */
2156     PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
2157     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2158     PetscCall(PetscMalloc1(lnleaves,&lilocal));
2159     PetscCall(PetscMalloc1(lnleaves,&liremote));
2160 
2161     for (i=j=0; i<nleaves; i++) {
2162       if (iremote[i].rank == (PetscInt)myrank) {
2163         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2164         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2165         liremote[j].index = iremote[i].index;
2166         j++;
2167       }
2168     }
2169     PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf));
2170     PetscCall(PetscSFSetFromOptions(lsf));
2171     PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER));
2172     PetscCall(PetscSFSetUp(lsf));
2173     *out = lsf;
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2179 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2180 {
2181   PetscMemType       rootmtype,leafmtype;
2182 
2183   PetscFunctionBegin;
2184   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2185   PetscCall(PetscSFSetUp(sf));
2186   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
2187   PetscCall(PetscGetMemType(rootdata,&rootmtype));
2188   PetscCall(PetscGetMemType(leafdata,&leafmtype));
2189   if (sf->ops->BcastToZero) {
2190     PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata));
2191   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2192   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 /*@
2197   PetscSFConcatenate - concatenate multiple SFs into one
2198 
2199   Input Parameters:
2200 + comm - the communicator
2201 . nsfs - the number of input PetscSF
2202 . sfs  - the array of input PetscSF
2203 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2204 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2205 
2206   Output Parameters:
2207 . newsf - The resulting PetscSF
2208 
2209   Level: developer
2210 
2211   Notes:
2212   The communicator of all SFs in sfs must be comm.
2213 
2214   The offsets in leafOffsets are added to the original leaf indices.
2215 
2216   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2217   In this case, NULL is also passed as ilocal to the resulting SF.
2218 
2219   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2220   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2221 
2222 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2223 @*/
2224 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2225 {
2226   PetscInt            i, s, nLeaves, nRoots;
2227   PetscInt           *leafArrayOffsets;
2228   PetscInt           *ilocal_new;
2229   PetscSFNode        *iremote_new;
2230   PetscInt           *rootOffsets;
2231   PetscBool           all_ilocal_null = PETSC_FALSE;
2232   PetscMPIInt         rank;
2233 
2234   PetscFunctionBegin;
2235   {
2236     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2237 
2238     PetscCall(PetscSFCreate(comm,&dummy));
2239     PetscValidLogicalCollectiveInt(dummy,nsfs,2);
2240     PetscValidPointer(sfs,3);
2241     for (i=0; i<nsfs; i++) {
2242       PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3);
2243       PetscCheckSameComm(dummy,1,sfs[i],3);
2244     }
2245     PetscValidLogicalCollectiveBool(dummy,shareRoots,4);
2246     if (leafOffsets) PetscValidIntPointer(leafOffsets,5);
2247     PetscValidPointer(newsf,6);
2248     PetscCall(PetscSFDestroy(&dummy));
2249   }
2250   if (!nsfs) {
2251     PetscCall(PetscSFCreate(comm, newsf));
2252     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2253     PetscFunctionReturn(0);
2254   }
2255   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2256 
2257   PetscCall(PetscCalloc1(nsfs+1, &rootOffsets));
2258   if (shareRoots) {
2259     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2260     if (PetscDefined(USE_DEBUG)) {
2261       for (s=1; s<nsfs; s++) {
2262         PetscInt nr;
2263 
2264         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2265         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2266       }
2267     }
2268   } else {
2269     for (s=0; s<nsfs; s++) {
2270       PetscInt nr;
2271 
2272       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2273       rootOffsets[s+1] = rootOffsets[s] + nr;
2274     }
2275     nRoots = rootOffsets[nsfs];
2276   }
2277 
2278   /* Calculate leaf array offsets and automatic root offsets */
2279   PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets));
2280   leafArrayOffsets[0] = 0;
2281   for (s=0; s<nsfs; s++) {
2282     PetscInt        nl;
2283 
2284     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2285     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2286   }
2287   nLeaves = leafArrayOffsets[nsfs];
2288 
2289   if (!leafOffsets) {
2290     all_ilocal_null = PETSC_TRUE;
2291     for (s=0; s<nsfs; s++) {
2292       const PetscInt *ilocal;
2293 
2294       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2295       if (ilocal) {
2296         all_ilocal_null = PETSC_FALSE;
2297         break;
2298       }
2299     }
2300     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2301   }
2302 
2303   /* Renumber and concatenate local leaves */
2304   ilocal_new = NULL;
2305   if (!all_ilocal_null) {
2306     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2307     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2308     for (s = 0; s<nsfs; s++) {
2309       const PetscInt   *ilocal;
2310       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2311       PetscInt          i, nleaves_l;
2312 
2313       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2314       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2315     }
2316   }
2317 
2318   /* Renumber and concatenate remote roots */
2319   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2320   for (i = 0; i < nLeaves; i++) {
2321     iremote_new[i].rank   = -1;
2322     iremote_new[i].index  = -1;
2323   }
2324   for (s = 0; s<nsfs; s++) {
2325     PetscInt            i, nl, nr;
2326     PetscSF             tmp_sf;
2327     const PetscSFNode  *iremote;
2328     PetscSFNode        *tmp_rootdata;
2329     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2330 
2331     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2332     PetscCall(PetscSFCreate(comm, &tmp_sf));
2333     /* create helper SF with contiguous leaves */
2334     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES));
2335     PetscCall(PetscSFSetUp(tmp_sf));
2336     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2337     for (i = 0; i < nr; i++) {
2338       tmp_rootdata[i].index = i + rootOffsets[s];
2339       tmp_rootdata[i].rank  = (PetscInt) rank;
2340     }
2341     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2342     PetscCall(PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2343     PetscCall(PetscSFDestroy(&tmp_sf));
2344     PetscCall(PetscFree(tmp_rootdata));
2345   }
2346 
2347   /* Build the new SF */
2348   PetscCall(PetscSFCreate(comm, newsf));
2349   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2350   PetscCall(PetscSFSetUp(*newsf));
2351   PetscCall(PetscFree(rootOffsets));
2352   PetscCall(PetscFree(leafArrayOffsets));
2353   PetscFunctionReturn(0);
2354 }
2355