xref: /petsc/src/vec/is/sf/interface/sf.c (revision dbbe0bcd3f3a8fbab5a45420dc06f8387e5764c6)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) PetscCheck((sf)->graphset,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME);
19 #else
20 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
21 #endif
22 #endif
23 
24 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
25 
26 /*@
27    PetscSFCreate - create a star forest communication context
28 
29    Collective
30 
31    Input Parameter:
32 .  comm - communicator on which the star forest will operate
33 
34    Output Parameter:
35 .  sf - new star forest context
36 
37    Options Database Keys:
38 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
39 .  -sf_type window    -Use MPI-3 one-sided window for communication
40 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
41 
42    Level: intermediate
43 
44    Notes:
45    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
46    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
47    SFs are optimized and they have better performance than general SFs.
48 
49 .seealso: `PetscSFSetGraph()`, `PetscSFSetGraphWithPattern()`, `PetscSFDestroy()`
50 @*/
51 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
52 {
53   PetscSF        b;
54 
55   PetscFunctionBegin;
56   PetscValidPointer(sf,2);
57   PetscCall(PetscSFInitializePackage());
58 
59   PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView));
60 
61   b->nroots    = -1;
62   b->nleaves   = -1;
63   b->minleaf   = PETSC_MAX_INT;
64   b->maxleaf   = PETSC_MIN_INT;
65   b->nranks    = -1;
66   b->rankorder = PETSC_TRUE;
67   b->ingroup   = MPI_GROUP_NULL;
68   b->outgroup  = MPI_GROUP_NULL;
69   b->graphset  = PETSC_FALSE;
70 #if defined(PETSC_HAVE_DEVICE)
71   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
72   b->use_stream_aware_mpi = PETSC_FALSE;
73   b->unknown_input_stream= PETSC_FALSE;
74   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
75     b->backend = PETSCSF_BACKEND_KOKKOS;
76   #elif defined(PETSC_HAVE_CUDA)
77     b->backend = PETSCSF_BACKEND_CUDA;
78   #elif defined(PETSC_HAVE_HIP)
79     b->backend = PETSCSF_BACKEND_HIP;
80   #endif
81 
82   #if defined(PETSC_HAVE_NVSHMEM)
83     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
84     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
85     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL));
86     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL));
87   #endif
88 #endif
89   b->vscat.from_n = -1;
90   b->vscat.to_n   = -1;
91   b->vscat.unit   = MPIU_SCALAR;
92  *sf = b;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@
97    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
98 
99    Collective
100 
101    Input Parameter:
102 .  sf - star forest
103 
104    Level: advanced
105 
106 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`, `PetscSFDestroy()`
107 @*/
108 PetscErrorCode PetscSFReset(PetscSF sf)
109 {
110   PetscFunctionBegin;
111   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
112   PetscTryTypeMethod(sf,Reset);
113   sf->nroots   = -1;
114   sf->nleaves  = -1;
115   sf->minleaf  = PETSC_MAX_INT;
116   sf->maxleaf  = PETSC_MIN_INT;
117   sf->mine     = NULL;
118   sf->remote   = NULL;
119   sf->graphset = PETSC_FALSE;
120   PetscCall(PetscFree(sf->mine_alloc));
121   PetscCall(PetscFree(sf->remote_alloc));
122   sf->nranks = -1;
123   PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote));
124   sf->degreeknown = PETSC_FALSE;
125   PetscCall(PetscFree(sf->degree));
126   if (sf->ingroup  != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
127   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
128   if (sf->multi) sf->multi->multi = NULL;
129   PetscCall(PetscSFDestroy(&sf->multi));
130   PetscCall(PetscLayoutDestroy(&sf->map));
131 
132  #if defined(PETSC_HAVE_DEVICE)
133   for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]));
134  #endif
135 
136   sf->setupcalled = PETSC_FALSE;
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141    PetscSFSetType - Set the PetscSF communication implementation
142 
143    Collective on PetscSF
144 
145    Input Parameters:
146 +  sf - the PetscSF context
147 -  type - a known method
148 
149    Options Database Key:
150 .  -sf_type <type> - Sets the method; use -help for a list
151    of available methods (for instance, window, basic, neighbor)
152 
153    Notes:
154    See "include/petscsf.h" for available methods (for instance)
155 +    PETSCSFWINDOW - MPI-2/3 one-sided
156 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
157 
158   Level: intermediate
159 
160 .seealso: `PetscSFType`, `PetscSFCreate()`
161 @*/
162 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
163 {
164   PetscBool      match;
165   PetscErrorCode (*r)(PetscSF);
166 
167   PetscFunctionBegin;
168   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
169   PetscValidCharPointer(type,2);
170 
171   PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match));
172   if (match) PetscFunctionReturn(0);
173 
174   PetscCall(PetscFunctionListFind(PetscSFList,type,&r));
175   PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
176   /* Destroy the previous PetscSF implementation context */
177   PetscTryTypeMethod(sf,Destroy);
178   PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops)));
179   PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type));
180   PetscCall((*r)(sf));
181   PetscFunctionReturn(0);
182 }
183 
184 /*@C
185   PetscSFGetType - Get the PetscSF communication implementation
186 
187   Not Collective
188 
189   Input Parameter:
190 . sf  - the PetscSF context
191 
192   Output Parameter:
193 . type - the PetscSF type name
194 
195   Level: intermediate
196 
197 .seealso: `PetscSFSetType()`, `PetscSFCreate()`
198 @*/
199 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
200 {
201   PetscFunctionBegin;
202   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
203   PetscValidPointer(type,2);
204   *type = ((PetscObject)sf)->type_name;
205   PetscFunctionReturn(0);
206 }
207 
208 /*@C
209    PetscSFDestroy - destroy star forest
210 
211    Collective
212 
213    Input Parameter:
214 .  sf - address of star forest
215 
216    Level: intermediate
217 
218 .seealso: `PetscSFCreate()`, `PetscSFReset()`
219 @*/
220 PetscErrorCode PetscSFDestroy(PetscSF *sf)
221 {
222   PetscFunctionBegin;
223   if (!*sf) PetscFunctionReturn(0);
224   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
225   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
226   PetscCall(PetscSFReset(*sf));
227   PetscTryTypeMethod((*sf),Destroy);
228   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
229   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
230   PetscCall(PetscHeaderDestroy(sf));
231   PetscFunctionReturn(0);
232 }
233 
234 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
235 {
236   PetscInt           i, nleaves;
237   PetscMPIInt        size;
238   const PetscInt    *ilocal;
239   const PetscSFNode *iremote;
240 
241   PetscFunctionBegin;
242   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
243   PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote));
244   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
245   for (i = 0; i < nleaves; i++) {
246     const PetscInt rank = iremote[i].rank;
247     const PetscInt remote = iremote[i].index;
248     const PetscInt leaf = ilocal ? ilocal[i] : i;
249     PetscCheck(rank >= 0 && rank < size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
250     PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
251     PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
252   }
253   PetscFunctionReturn(0);
254 }
255 
256 /*@
257    PetscSFSetUp - set up communication structures
258 
259    Collective
260 
261    Input Parameter:
262 .  sf - star forest communication object
263 
264    Level: beginner
265 
266 .seealso: `PetscSFSetFromOptions()`, `PetscSFSetType()`
267 @*/
268 PetscErrorCode PetscSFSetUp(PetscSF sf)
269 {
270   PetscFunctionBegin;
271   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
272   PetscSFCheckGraphSet(sf,1);
273   if (sf->setupcalled) PetscFunctionReturn(0);
274   PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0));
275   PetscCall(PetscSFCheckGraphValid_Private(sf));
276   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */
277   PetscTryTypeMethod(sf,SetUp);
278 #if defined(PETSC_HAVE_CUDA)
279   if (sf->backend == PETSCSF_BACKEND_CUDA) {
280     sf->ops->Malloc = PetscSFMalloc_CUDA;
281     sf->ops->Free   = PetscSFFree_CUDA;
282   }
283 #endif
284 #if defined(PETSC_HAVE_HIP)
285   if (sf->backend == PETSCSF_BACKEND_HIP) {
286     sf->ops->Malloc = PetscSFMalloc_HIP;
287     sf->ops->Free   = PetscSFFree_HIP;
288   }
289 #endif
290 
291 #
292 #if defined(PETSC_HAVE_KOKKOS)
293   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
294     sf->ops->Malloc = PetscSFMalloc_Kokkos;
295     sf->ops->Free   = PetscSFFree_Kokkos;
296   }
297 #endif
298   PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0));
299   sf->setupcalled = PETSC_TRUE;
300   PetscFunctionReturn(0);
301 }
302 
303 /*@
304    PetscSFSetFromOptions - set PetscSF options using the options database
305 
306    Logically Collective
307 
308    Input Parameter:
309 .  sf - star forest
310 
311    Options Database Keys:
312 +  -sf_type               - implementation type, see PetscSFSetType()
313 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
314 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
315                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
316                             If true, this option only works with -use_gpu_aware_mpi 1.
317 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
318                                If true, this option only works with -use_gpu_aware_mpi 1.
319 
320 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
321                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
322                               the only available is kokkos.
323 
324    Level: intermediate
325 @*/
326 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
327 {
328   PetscSFType    deft;
329   char           type[256];
330   PetscBool      flg;
331 
332   PetscFunctionBegin;
333   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
334   PetscObjectOptionsBegin((PetscObject)sf);
335   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
336   PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg));
337   PetscCall(PetscSFSetType(sf,flg ? type : deft));
338   PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL));
339  #if defined(PETSC_HAVE_DEVICE)
340   {
341     char        backendstr[32] = {0};
342     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
343     /* Change the defaults set in PetscSFCreate() with command line options */
344     PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL));
345     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL));
346     PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set));
347     PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda));
348     PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos));
349     PetscCall(PetscStrcasecmp("hip",backendstr,&isHip));
350   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
351     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
352     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
353     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
354     else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
355   #elif defined(PETSC_HAVE_KOKKOS)
356     PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
357    #endif
358   }
359  #endif
360   PetscTryTypeMethod(sf,SetFromOptions,PetscOptionsObject);
361   PetscOptionsEnd();
362   PetscFunctionReturn(0);
363 }
364 
365 /*@
366    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
367 
368    Logically Collective
369 
370    Input Parameters:
371 +  sf - star forest
372 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
373 
374    Level: advanced
375 
376 .seealso: `PetscSFGatherBegin()`, `PetscSFScatterBegin()`
377 @*/
378 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
379 {
380   PetscFunctionBegin;
381   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
382   PetscValidLogicalCollectiveBool(sf,flg,2);
383   PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
384   sf->rankorder = flg;
385   PetscFunctionReturn(0);
386 }
387 
388 /*@C
389    PetscSFSetGraph - Set a parallel star forest
390 
391    Collective
392 
393    Input Parameters:
394 +  sf - star forest
395 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
396 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
397 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
398 during setup in debug mode)
399 .  localmode - copy mode for ilocal
400 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
401 during setup in debug mode)
402 -  remotemode - copy mode for iremote
403 
404    Level: intermediate
405 
406    Notes:
407    Leaf indices in ilocal must be unique, otherwise an error occurs.
408 
409    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
410    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
411    PETSc might modify the respective array;
412    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
413    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
414 
415    Fortran Notes:
416    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
417 
418    Developer Notes:
419    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
420    This also allows to compare leaf sets of two SFs easily.
421 
422 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFGetGraph()`
423 @*/
424 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
425 {
426   PetscBool       unique, contiguous;
427 
428   PetscFunctionBegin;
429   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
430   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
431   if (nleaves > 0) PetscValidPointer(iremote,6);
432   PetscCheck(nroots  >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
433   PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
434   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
435    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
436   PetscCheck((int)localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
437   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
438 
439   if (sf->nroots >= 0) { /* Reset only if graph already set */
440     PetscCall(PetscSFReset(sf));
441   }
442 
443   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0));
444 
445   sf->nroots  = nroots;
446   sf->nleaves = nleaves;
447 
448   if (localmode == PETSC_COPY_VALUES && ilocal) {
449     PetscInt *tlocal = NULL;
450 
451     PetscCall(PetscMalloc1(nleaves,&tlocal));
452     PetscCall(PetscArraycpy(tlocal,ilocal,nleaves));
453     ilocal = tlocal;
454   }
455   if (remotemode == PETSC_COPY_VALUES) {
456     PetscSFNode *tremote = NULL;
457 
458     PetscCall(PetscMalloc1(nleaves,&tremote));
459     PetscCall(PetscArraycpy(tremote,iremote,nleaves));
460     iremote = tremote;
461   }
462 
463   if (nleaves && ilocal) {
464     PetscSFNode   work;
465 
466     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
467     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
468     unique = PetscNot(unique);
469     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
470     sf->minleaf = ilocal[0];
471     sf->maxleaf = ilocal[nleaves-1];
472     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
473   } else {
474     sf->minleaf = 0;
475     sf->maxleaf = nleaves - 1;
476     unique      = PETSC_TRUE;
477     contiguous  = PETSC_TRUE;
478   }
479 
480   if (contiguous) {
481     if (localmode == PETSC_USE_POINTER) {
482       ilocal = NULL;
483     } else {
484       PetscCall(PetscFree(ilocal));
485     }
486   }
487   sf->mine            = ilocal;
488   if (localmode == PETSC_USE_POINTER) {
489     sf->mine_alloc    = NULL;
490   } else {
491     sf->mine_alloc    = ilocal;
492   }
493   sf->remote          = iremote;
494   if (remotemode == PETSC_USE_POINTER) {
495     sf->remote_alloc  = NULL;
496   } else {
497     sf->remote_alloc  = iremote;
498   }
499   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0));
500   sf->graphset = PETSC_TRUE;
501   PetscFunctionReturn(0);
502 }
503 
504 /*@
505   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
506 
507   Collective
508 
509   Input Parameters:
510 + sf      - The PetscSF
511 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
512 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
513 
514   Notes:
515   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
516   n and N are local and global sizes of x respectively.
517 
518   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
519   sequential vectors y on all ranks.
520 
521   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
522   sequential vector y on rank 0.
523 
524   In above cases, entries of x are roots and entries of y are leaves.
525 
526   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
527   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
528   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
529   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
530   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
531 
532   In this case, roots and leaves are symmetric.
533 
534   Level: intermediate
535  @*/
536 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
537 {
538   MPI_Comm       comm;
539   PetscInt       n,N,res[2];
540   PetscMPIInt    rank,size;
541   PetscSFType    type;
542 
543   PetscFunctionBegin;
544   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
545   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
546   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
547   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
548   PetscCallMPI(MPI_Comm_rank(comm,&rank));
549   PetscCallMPI(MPI_Comm_size(comm,&size));
550 
551   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
552     type = PETSCSFALLTOALL;
553     PetscCall(PetscLayoutCreate(comm,&sf->map));
554     PetscCall(PetscLayoutSetLocalSize(sf->map,size));
555     PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size));
556     PetscCall(PetscLayoutSetUp(sf->map));
557   } else {
558     PetscCall(PetscLayoutGetLocalSize(map,&n));
559     PetscCall(PetscLayoutGetSize(map,&N));
560     res[0] = n;
561     res[1] = -n;
562     /* Check if n are same over all ranks so that we can optimize it */
563     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm));
564     if (res[0] == -res[1]) { /* same n */
565       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
566     } else {
567       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
568     }
569     PetscCall(PetscLayoutReference(map,&sf->map));
570   }
571   PetscCall(PetscSFSetType(sf,type));
572 
573   sf->pattern = pattern;
574   sf->mine    = NULL; /* Contiguous */
575 
576   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
577      Also set other easy stuff.
578    */
579   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
580     sf->nleaves      = N;
581     sf->nroots       = n;
582     sf->nranks       = size;
583     sf->minleaf      = 0;
584     sf->maxleaf      = N - 1;
585   } else if (pattern == PETSCSF_PATTERN_GATHER) {
586     sf->nleaves      = rank ? 0 : N;
587     sf->nroots       = n;
588     sf->nranks       = rank ? 0 : size;
589     sf->minleaf      = 0;
590     sf->maxleaf      = rank ? -1 : N - 1;
591   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
592     sf->nleaves      = size;
593     sf->nroots       = size;
594     sf->nranks       = size;
595     sf->minleaf      = 0;
596     sf->maxleaf      = size - 1;
597   }
598   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
599   sf->graphset = PETSC_TRUE;
600   PetscFunctionReturn(0);
601 }
602 
603 /*@
604    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
605 
606    Collective
607 
608    Input Parameter:
609 .  sf - star forest to invert
610 
611    Output Parameter:
612 .  isf - inverse of sf
613 
614    Level: advanced
615 
616    Notes:
617    All roots must have degree 1.
618 
619    The local space may be a permutation, but cannot be sparse.
620 
621 .seealso: `PetscSFSetGraph()`
622 @*/
623 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
624 {
625   PetscMPIInt    rank;
626   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
627   const PetscInt *ilocal;
628   PetscSFNode    *roots,*leaves;
629 
630   PetscFunctionBegin;
631   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
632   PetscSFCheckGraphSet(sf,1);
633   PetscValidPointer(isf,2);
634 
635   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL));
636   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
637 
638   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
639   PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves));
640   for (i=0; i<maxlocal; i++) {
641     leaves[i].rank  = rank;
642     leaves[i].index = i;
643   }
644   for (i=0; i <nroots; i++) {
645     roots[i].rank  = -1;
646     roots[i].index = -1;
647   }
648   PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
649   PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
650 
651   /* Check whether our leaves are sparse */
652   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
653   if (count == nroots) newilocal = NULL;
654   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
655     PetscCall(PetscMalloc1(count,&newilocal));
656     for (i=0,count=0; i<nroots; i++) {
657       if (roots[i].rank >= 0) {
658         newilocal[count]   = i;
659         roots[count].rank  = roots[i].rank;
660         roots[count].index = roots[i].index;
661         count++;
662       }
663     }
664   }
665 
666   PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf));
667   PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES));
668   PetscCall(PetscFree2(roots,leaves));
669   PetscFunctionReturn(0);
670 }
671 
672 /*@
673    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
674 
675    Collective
676 
677    Input Parameters:
678 +  sf - communication object to duplicate
679 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
680 
681    Output Parameter:
682 .  newsf - new communication object
683 
684    Level: beginner
685 
686 .seealso: `PetscSFCreate()`, `PetscSFSetType()`, `PetscSFSetGraph()`
687 @*/
688 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
689 {
690   PetscSFType    type;
691   MPI_Datatype   dtype=MPIU_SCALAR;
692 
693   PetscFunctionBegin;
694   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
695   PetscValidLogicalCollectiveEnum(sf,opt,2);
696   PetscValidPointer(newsf,3);
697   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf));
698   PetscCall(PetscSFGetType(sf,&type));
699   if (type) PetscCall(PetscSFSetType(*newsf,type));
700   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
701   if (opt == PETSCSF_DUPLICATE_GRAPH) {
702     PetscSFCheckGraphSet(sf,1);
703     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
704       PetscInt          nroots,nleaves;
705       const PetscInt    *ilocal;
706       const PetscSFNode *iremote;
707       PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
708       PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES));
709     } else {
710       PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern));
711     }
712   }
713   /* Since oldtype is committed, so is newtype, according to MPI */
714   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype));
715   (*newsf)->vscat.bs     = sf->vscat.bs;
716   (*newsf)->vscat.unit   = dtype;
717   (*newsf)->vscat.to_n   = sf->vscat.to_n;
718   (*newsf)->vscat.from_n = sf->vscat.from_n;
719   /* Do not copy lsf. Build it on demand since it is rarely used */
720 
721 #if defined(PETSC_HAVE_DEVICE)
722   (*newsf)->backend              = sf->backend;
723   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
724   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
725   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
726 #endif
727   PetscTryTypeMethod(sf,Duplicate,opt,*newsf);
728   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
729   PetscFunctionReturn(0);
730 }
731 
732 /*@C
733    PetscSFGetGraph - Get the graph specifying a parallel star forest
734 
735    Not Collective
736 
737    Input Parameter:
738 .  sf - star forest
739 
740    Output Parameters:
741 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
742 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
743 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
744 -  iremote - remote locations of root vertices for each leaf on the current process
745 
746    Notes:
747      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
748 
749      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
750      see its manpage for details.
751 
752    Fortran Notes:
753      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
754      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
755 
756      To check for a NULL ilocal use
757 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
758 
759    Level: intermediate
760 
761 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`
762 @*/
763 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
764 {
765   PetscFunctionBegin;
766   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
767   if (sf->ops->GetGraph) {
768     PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote));
769   } else {
770     if (nroots) *nroots = sf->nroots;
771     if (nleaves) *nleaves = sf->nleaves;
772     if (ilocal) *ilocal = sf->mine;
773     if (iremote) *iremote = sf->remote;
774   }
775   PetscFunctionReturn(0);
776 }
777 
778 /*@
779    PetscSFGetLeafRange - Get the active leaf ranges
780 
781    Not Collective
782 
783    Input Parameter:
784 .  sf - star forest
785 
786    Output Parameters:
787 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
788 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
789 
790    Level: developer
791 
792 .seealso: `PetscSFCreate()`, `PetscSFView()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
793 @*/
794 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
795 {
796   PetscFunctionBegin;
797   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
798   PetscSFCheckGraphSet(sf,1);
799   if (minleaf) *minleaf = sf->minleaf;
800   if (maxleaf) *maxleaf = sf->maxleaf;
801   PetscFunctionReturn(0);
802 }
803 
804 /*@C
805    PetscSFViewFromOptions - View from Options
806 
807    Collective on PetscSF
808 
809    Input Parameters:
810 +  A - the star forest
811 .  obj - Optional object
812 -  name - command line option
813 
814    Level: intermediate
815 .seealso: `PetscSF`, `PetscSFView`, `PetscObjectViewFromOptions()`, `PetscSFCreate()`
816 @*/
817 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
818 {
819   PetscFunctionBegin;
820   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
821   PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name));
822   PetscFunctionReturn(0);
823 }
824 
825 /*@C
826    PetscSFView - view a star forest
827 
828    Collective
829 
830    Input Parameters:
831 +  sf - star forest
832 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
833 
834    Level: beginner
835 
836 .seealso: `PetscSFCreate()`, `PetscSFSetGraph()`
837 @*/
838 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
839 {
840   PetscBool         iascii;
841   PetscViewerFormat format;
842 
843   PetscFunctionBegin;
844   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
845   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer));
846   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
847   PetscCheckSameComm(sf,1,viewer,2);
848   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
850   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
851     PetscMPIInt rank;
852     PetscInt    ii,i,j;
853 
854     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer));
855     PetscCall(PetscViewerASCIIPushTab(viewer));
856     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
857       if (!sf->graphset) {
858         PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n"));
859         PetscCall(PetscViewerASCIIPopTab(viewer));
860         PetscFunctionReturn(0);
861       }
862       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
863       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
864       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks));
865       for (i=0; i<sf->nleaves; i++) {
866         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index));
867       }
868       PetscCall(PetscViewerFlush(viewer));
869       PetscCall(PetscViewerGetFormat(viewer,&format));
870       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
871         PetscMPIInt *tmpranks,*perm;
872         PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm));
873         PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks));
874         for (i=0; i<sf->nranks; i++) perm[i] = i;
875         PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm));
876         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank));
877         for (ii=0; ii<sf->nranks; ii++) {
878           i = perm[ii];
879           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]));
880           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
881             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]));
882           }
883         }
884         PetscCall(PetscFree2(tmpranks,perm));
885       }
886       PetscCall(PetscViewerFlush(viewer));
887       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
888     }
889     PetscCall(PetscViewerASCIIPopTab(viewer));
890   }
891   PetscTryTypeMethod(sf,View,viewer);
892   PetscFunctionReturn(0);
893 }
894 
895 /*@C
896    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
897 
898    Not Collective
899 
900    Input Parameter:
901 .  sf - star forest
902 
903    Output Parameters:
904 +  nranks - number of ranks referenced by local part
905 .  ranks - array of ranks
906 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
907 .  rmine - concatenated array holding local indices referencing each remote rank
908 -  rremote - concatenated array holding remote indices referenced for each remote rank
909 
910    Level: developer
911 
912 .seealso: `PetscSFGetLeafRanks()`
913 @*/
914 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
915 {
916   PetscFunctionBegin;
917   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
918   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
919   if (sf->ops->GetRootRanks) {
920     PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote));
921   } else {
922     /* The generic implementation */
923     if (nranks)  *nranks  = sf->nranks;
924     if (ranks)   *ranks   = sf->ranks;
925     if (roffset) *roffset = sf->roffset;
926     if (rmine)   *rmine   = sf->rmine;
927     if (rremote) *rremote = sf->rremote;
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 /*@C
933    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
934 
935    Not Collective
936 
937    Input Parameter:
938 .  sf - star forest
939 
940    Output Parameters:
941 +  niranks - number of leaf ranks referencing roots on this process
942 .  iranks - array of ranks
943 .  ioffset - offset in irootloc for each rank (length niranks+1)
944 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
945 
946    Level: developer
947 
948 .seealso: `PetscSFGetRootRanks()`
949 @*/
950 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
951 {
952   PetscFunctionBegin;
953   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
954   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
955   if (sf->ops->GetLeafRanks) {
956     PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc));
957   } else {
958     PetscSFType type;
959     PetscCall(PetscSFGetType(sf,&type));
960     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
961   }
962   PetscFunctionReturn(0);
963 }
964 
965 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
966   PetscInt i;
967   for (i=0; i<n; i++) {
968     if (needle == list[i]) return PETSC_TRUE;
969   }
970   return PETSC_FALSE;
971 }
972 
973 /*@C
974    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
975 
976    Collective
977 
978    Input Parameters:
979 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
980 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
981 
982    Level: developer
983 
984 .seealso: `PetscSFGetRootRanks()`
985 @*/
986 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
987 {
988   PetscTable         table;
989   PetscTablePosition pos;
990   PetscMPIInt        size,groupsize,*groupranks;
991   PetscInt           *rcount,*ranks;
992   PetscInt           i, irank = -1,orank = -1;
993 
994   PetscFunctionBegin;
995   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
996   PetscSFCheckGraphSet(sf,1);
997   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
998   PetscCall(PetscTableCreate(10,size,&table));
999   for (i=0; i<sf->nleaves; i++) {
1000     /* Log 1-based rank */
1001     PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES));
1002   }
1003   PetscCall(PetscTableGetCount(table,&sf->nranks));
1004   PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote));
1005   PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks));
1006   PetscCall(PetscTableGetHeadPosition(table,&pos));
1007   for (i=0; i<sf->nranks; i++) {
1008     PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]));
1009     ranks[i]--;             /* Convert back to 0-based */
1010   }
1011   PetscCall(PetscTableDestroy(&table));
1012 
1013   /* We expect that dgroup is reliably "small" while nranks could be large */
1014   {
1015     MPI_Group group = MPI_GROUP_NULL;
1016     PetscMPIInt *dgroupranks;
1017     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1018     PetscCallMPI(MPI_Group_size(dgroup,&groupsize));
1019     PetscCall(PetscMalloc1(groupsize,&dgroupranks));
1020     PetscCall(PetscMalloc1(groupsize,&groupranks));
1021     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1022     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks));
1023     PetscCallMPI(MPI_Group_free(&group));
1024     PetscCall(PetscFree(dgroupranks));
1025   }
1026 
1027   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1028   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1029     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1030       if (InList(ranks[i],groupsize,groupranks)) break;
1031     }
1032     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1033       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1034     }
1035     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1036       PetscInt tmprank,tmpcount;
1037 
1038       tmprank             = ranks[i];
1039       tmpcount            = rcount[i];
1040       ranks[i]            = ranks[sf->ndranks];
1041       rcount[i]           = rcount[sf->ndranks];
1042       ranks[sf->ndranks]  = tmprank;
1043       rcount[sf->ndranks] = tmpcount;
1044       sf->ndranks++;
1045     }
1046   }
1047   PetscCall(PetscFree(groupranks));
1048   PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount));
1049   PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks));
1050   sf->roffset[0] = 0;
1051   for (i=0; i<sf->nranks; i++) {
1052     PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i));
1053     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1054     rcount[i]        = 0;
1055   }
1056   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1057     /* short circuit */
1058     if (orank != sf->remote[i].rank) {
1059       /* Search for index of iremote[i].rank in sf->ranks */
1060       PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank));
1061       if (irank < 0) {
1062         PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank));
1063         if (irank >= 0) irank += sf->ndranks;
1064       }
1065       orank = sf->remote[i].rank;
1066     }
1067     PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1068     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1069     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1070     rcount[irank]++;
1071   }
1072   PetscCall(PetscFree2(rcount,ranks));
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*@C
1077    PetscSFGetGroups - gets incoming and outgoing process groups
1078 
1079    Collective
1080 
1081    Input Parameter:
1082 .  sf - star forest
1083 
1084    Output Parameters:
1085 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1086 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1087 
1088    Level: developer
1089 
1090 .seealso: `PetscSFGetWindow()`, `PetscSFRestoreWindow()`
1091 @*/
1092 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1093 {
1094   MPI_Group      group = MPI_GROUP_NULL;
1095 
1096   PetscFunctionBegin;
1097   PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1098   if (sf->ingroup == MPI_GROUP_NULL) {
1099     PetscInt       i;
1100     const PetscInt *indegree;
1101     PetscMPIInt    rank,*outranks,*inranks;
1102     PetscSFNode    *remote;
1103     PetscSF        bgcount;
1104 
1105     /* Compute the number of incoming ranks */
1106     PetscCall(PetscMalloc1(sf->nranks,&remote));
1107     for (i=0; i<sf->nranks; i++) {
1108       remote[i].rank  = sf->ranks[i];
1109       remote[i].index = 0;
1110     }
1111     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount));
1112     PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1113     PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree));
1114     PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree));
1115     /* Enumerate the incoming ranks */
1116     PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks));
1117     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1118     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1119     PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks));
1120     PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks));
1121     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1122     PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup));
1123     PetscCallMPI(MPI_Group_free(&group));
1124     PetscCall(PetscFree2(inranks,outranks));
1125     PetscCall(PetscSFDestroy(&bgcount));
1126   }
1127   *incoming = sf->ingroup;
1128 
1129   if (sf->outgroup == MPI_GROUP_NULL) {
1130     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1131     PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup));
1132     PetscCallMPI(MPI_Group_free(&group));
1133   }
1134   *outgoing = sf->outgroup;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 /*@
1139    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1140 
1141    Collective
1142 
1143    Input Parameter:
1144 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1145 
1146    Output Parameter:
1147 .  multi - star forest with split roots, such that each root has degree exactly 1
1148 
1149    Level: developer
1150 
1151    Notes:
1152 
1153    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1154    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1155    edge, it is a candidate for future optimization that might involve its removal.
1156 
1157 .seealso: `PetscSFSetGraph()`, `PetscSFGatherBegin()`, `PetscSFScatterBegin()`, `PetscSFComputeMultiRootOriginalNumbering()`
1158 @*/
1159 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1160 {
1161   PetscFunctionBegin;
1162   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1163   PetscValidPointer(multi,2);
1164   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1165     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1166     *multi = sf->multi;
1167     sf->multi->multi = sf->multi;
1168     PetscFunctionReturn(0);
1169   }
1170   if (!sf->multi) {
1171     const PetscInt *indegree;
1172     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1173     PetscSFNode    *remote;
1174     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1175     PetscCall(PetscSFComputeDegreeBegin(sf,&indegree));
1176     PetscCall(PetscSFComputeDegreeEnd(sf,&indegree));
1177     PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset));
1178     inoffset[0] = 0;
1179     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1180     for (i=0; i<maxlocal; i++) outones[i] = 1;
1181     PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1182     PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1183     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1184     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1185       for (i=0; i<sf->nroots; i++) {
1186         PetscCheck(inoffset[i] + indegree[i] == inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1187       }
1188     }
1189     PetscCall(PetscMalloc1(sf->nleaves,&remote));
1190     for (i=0; i<sf->nleaves; i++) {
1191       remote[i].rank  = sf->remote[i].rank;
1192       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1193     }
1194     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1195     sf->multi->multi = sf->multi;
1196     PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1197     if (sf->rankorder) {        /* Sort the ranks */
1198       PetscMPIInt rank;
1199       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1200       PetscSFNode *newremote;
1201       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1202       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1203       PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset));
1204       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1205       PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1206       PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1207       /* Sort the incoming ranks at each vertex, build the inverse map */
1208       for (i=0; i<sf->nroots; i++) {
1209         PetscInt j;
1210         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1211         PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset));
1212         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1213       }
1214       PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1215       PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1216       PetscCall(PetscMalloc1(sf->nleaves,&newremote));
1217       for (i=0; i<sf->nleaves; i++) {
1218         newremote[i].rank  = sf->remote[i].rank;
1219         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1220       }
1221       PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER));
1222       PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset));
1223     }
1224     PetscCall(PetscFree3(inoffset,outones,outoffset));
1225   }
1226   *multi = sf->multi;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 /*@C
1231    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1232 
1233    Collective
1234 
1235    Input Parameters:
1236 +  sf - original star forest
1237 .  nselected  - number of selected roots on this process
1238 -  selected   - indices of the selected roots on this process
1239 
1240    Output Parameter:
1241 .  esf - new star forest
1242 
1243    Level: advanced
1244 
1245    Note:
1246    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1247    be done by calling PetscSFGetGraph().
1248 
1249 .seealso: `PetscSFSetGraph()`, `PetscSFGetGraph()`
1250 @*/
1251 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1252 {
1253   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1254   const PetscInt    *ilocal;
1255   signed char       *rootdata,*leafdata,*leafmem;
1256   const PetscSFNode *iremote;
1257   PetscSFNode       *new_iremote;
1258   MPI_Comm          comm;
1259 
1260   PetscFunctionBegin;
1261   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1262   PetscSFCheckGraphSet(sf,1);
1263   if (nselected) PetscValidIntPointer(selected,3);
1264   PetscValidPointer(esf,4);
1265 
1266   PetscCall(PetscSFSetUp(sf));
1267   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0));
1268   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1269   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
1270 
1271   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1272     PetscBool dups;
1273     PetscCall(PetscCheckDupsInt(nselected,selected,&dups));
1274     PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1275     for (i=0; i<nselected; i++)
1276       PetscCheck(selected[i] >= 0 && selected[i] < nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1277   }
1278 
1279   if (sf->ops->CreateEmbeddedRootSF) PetscUseTypeMethod(sf,CreateEmbeddedRootSF ,nselected,selected,esf);
1280   else {
1281     /* A generic version of creating embedded sf */
1282     PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
1283     maxlocal = maxleaf - minleaf + 1;
1284     PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
1285     leafdata = leafmem - minleaf;
1286     /* Tag selected roots and bcast to leaves */
1287     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1288     PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1289     PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1290 
1291     /* Build esf with leaves that are still connected */
1292     esf_nleaves = 0;
1293     for (i=0; i<nleaves; i++) {
1294       j = ilocal ? ilocal[i] : i;
1295       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1296          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1297       */
1298       esf_nleaves += (leafdata[j] ? 1 : 0);
1299     }
1300     PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal));
1301     PetscCall(PetscMalloc1(esf_nleaves,&new_iremote));
1302     for (i=n=0; i<nleaves; i++) {
1303       j = ilocal ? ilocal[i] : i;
1304       if (leafdata[j]) {
1305         new_ilocal[n]        = j;
1306         new_iremote[n].rank  = iremote[i].rank;
1307         new_iremote[n].index = iremote[i].index;
1308         ++n;
1309       }
1310     }
1311     PetscCall(PetscSFCreate(comm,esf));
1312     PetscCall(PetscSFSetFromOptions(*esf));
1313     PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1314     PetscCall(PetscFree2(rootdata,leafmem));
1315   }
1316   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0));
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 /*@C
1321   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1322 
1323   Collective
1324 
1325   Input Parameters:
1326 + sf - original star forest
1327 . nselected  - number of selected leaves on this process
1328 - selected   - indices of the selected leaves on this process
1329 
1330   Output Parameter:
1331 .  newsf - new star forest
1332 
1333   Level: advanced
1334 
1335 .seealso: `PetscSFCreateEmbeddedRootSF()`, `PetscSFSetGraph()`, `PetscSFGetGraph()`
1336 @*/
1337 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1338 {
1339   const PetscSFNode *iremote;
1340   PetscSFNode       *new_iremote;
1341   const PetscInt    *ilocal;
1342   PetscInt          i,nroots,*leaves,*new_ilocal;
1343   MPI_Comm          comm;
1344 
1345   PetscFunctionBegin;
1346   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1347   PetscSFCheckGraphSet(sf,1);
1348   if (nselected) PetscValidIntPointer(selected,3);
1349   PetscValidPointer(newsf,4);
1350 
1351   /* Uniq selected[] and put results in leaves[] */
1352   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1353   PetscCall(PetscMalloc1(nselected,&leaves));
1354   PetscCall(PetscArraycpy(leaves,selected,nselected));
1355   PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves));
1356   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1357 
1358   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1359   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) PetscUseTypeMethod(sf,CreateEmbeddedLeafSF ,nselected,leaves,newsf);
1360   else {
1361     PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote));
1362     PetscCall(PetscMalloc1(nselected,&new_ilocal));
1363     PetscCall(PetscMalloc1(nselected,&new_iremote));
1364     for (i=0; i<nselected; ++i) {
1365       const PetscInt l     = leaves[i];
1366       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1367       new_iremote[i].rank  = iremote[l].rank;
1368       new_iremote[i].index = iremote[l].index;
1369     }
1370     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf));
1371     PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1372   }
1373   PetscCall(PetscFree(leaves));
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 /*@C
1378    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1379 
1380    Collective on PetscSF
1381 
1382    Input Parameters:
1383 +  sf - star forest on which to communicate
1384 .  unit - data type associated with each node
1385 .  rootdata - buffer to broadcast
1386 -  op - operation to use for reduction
1387 
1388    Output Parameter:
1389 .  leafdata - buffer to be reduced with values from each leaf's respective root
1390 
1391    Level: intermediate
1392 
1393    Notes:
1394     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1395     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1396     use PetscSFBcastWithMemTypeBegin() instead.
1397 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastWithMemTypeBegin()`
1398 @*/
1399 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1400 {
1401   PetscMemType   rootmtype,leafmtype;
1402 
1403   PetscFunctionBegin;
1404   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1405   PetscCall(PetscSFSetUp(sf));
1406   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1407   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1408   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1409   PetscUseTypeMethod(sf,BcastBegin ,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1410   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 /*@C
1415    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1416 
1417    Collective on PetscSF
1418 
1419    Input Parameters:
1420 +  sf - star forest on which to communicate
1421 .  unit - data type associated with each node
1422 .  rootmtype - memory type of rootdata
1423 .  rootdata - buffer to broadcast
1424 .  leafmtype - memory type of leafdata
1425 -  op - operation to use for reduction
1426 
1427    Output Parameter:
1428 .  leafdata - buffer to be reduced with values from each leaf's respective root
1429 
1430    Level: intermediate
1431 
1432 .seealso: `PetscSFBcastEnd()`, `PetscSFBcastBegin()`
1433 @*/
1434 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1435 {
1436   PetscFunctionBegin;
1437   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1438   PetscCall(PetscSFSetUp(sf));
1439   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1440   PetscUseTypeMethod(sf,BcastBegin ,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1441   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 /*@C
1446    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1447 
1448    Collective
1449 
1450    Input Parameters:
1451 +  sf - star forest
1452 .  unit - data type
1453 .  rootdata - buffer to broadcast
1454 -  op - operation to use for reduction
1455 
1456    Output Parameter:
1457 .  leafdata - buffer to be reduced with values from each leaf's respective root
1458 
1459    Level: intermediate
1460 
1461 .seealso: `PetscSFSetGraph()`, `PetscSFReduceEnd()`
1462 @*/
1463 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1464 {
1465   PetscFunctionBegin;
1466   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1467   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0));
1468   PetscUseTypeMethod(sf,BcastEnd ,unit,rootdata,leafdata,op);
1469   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0));
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 /*@C
1474    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1475 
1476    Collective
1477 
1478    Input Parameters:
1479 +  sf - star forest
1480 .  unit - data type
1481 .  leafdata - values to reduce
1482 -  op - reduction operation
1483 
1484    Output Parameter:
1485 .  rootdata - result of reduction of values from all leaves of each root
1486 
1487    Level: intermediate
1488 
1489    Notes:
1490     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1491     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1492     use PetscSFReduceWithMemTypeBegin() instead.
1493 
1494 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceWithMemTypeBegin()`
1495 @*/
1496 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1497 {
1498   PetscMemType   rootmtype,leafmtype;
1499 
1500   PetscFunctionBegin;
1501   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1502   PetscCall(PetscSFSetUp(sf));
1503   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1504   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1505   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1506   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1507   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 /*@C
1512    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1513 
1514    Collective
1515 
1516    Input Parameters:
1517 +  sf - star forest
1518 .  unit - data type
1519 .  leafmtype - memory type of leafdata
1520 .  leafdata - values to reduce
1521 .  rootmtype - memory type of rootdata
1522 -  op - reduction operation
1523 
1524    Output Parameter:
1525 .  rootdata - result of reduction of values from all leaves of each root
1526 
1527    Level: intermediate
1528 
1529 .seealso: `PetscSFBcastBegin()`, `PetscSFReduceBegin()`
1530 @*/
1531 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1532 {
1533   PetscFunctionBegin;
1534   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1535   PetscCall(PetscSFSetUp(sf));
1536   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1537   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1538   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 /*@C
1543    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1544 
1545    Collective
1546 
1547    Input Parameters:
1548 +  sf - star forest
1549 .  unit - data type
1550 .  leafdata - values to reduce
1551 -  op - reduction operation
1552 
1553    Output Parameter:
1554 .  rootdata - result of reduction of values from all leaves of each root
1555 
1556    Level: intermediate
1557 
1558 .seealso: `PetscSFSetGraph()`, `PetscSFBcastEnd()`
1559 @*/
1560 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1561 {
1562   PetscFunctionBegin;
1563   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1564   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0));
1565   PetscUseTypeMethod(sf,ReduceEnd ,unit,leafdata,rootdata,op);
1566   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0));
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 /*@C
1571    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1572 
1573    Collective
1574 
1575    Input Parameters:
1576 +  sf - star forest
1577 .  unit - data type
1578 .  leafdata - leaf values to use in reduction
1579 -  op - operation to use for reduction
1580 
1581    Output Parameters:
1582 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1583 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1584 
1585    Level: advanced
1586 
1587    Note:
1588    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1589    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1590    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1591    integers.
1592 
1593 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1594 @*/
1595 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1596 {
1597   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1598 
1599   PetscFunctionBegin;
1600   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1601   PetscCall(PetscSFSetUp(sf));
1602   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1603   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1604   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1605   PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype));
1606   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1607   PetscUseTypeMethod(sf,FetchAndOpBegin ,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1608   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 /*@C
1613    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1614 
1615    Collective
1616 
1617    Input Parameters:
1618 +  sf - star forest
1619 .  unit - data type
1620 .  rootmtype - memory type of rootdata
1621 .  leafmtype - memory type of leafdata
1622 .  leafdata - leaf values to use in reduction
1623 .  leafupdatemtype - memory type of leafupdate
1624 -  op - operation to use for reduction
1625 
1626    Output Parameters:
1627 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1628 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1629 
1630    Level: advanced
1631 
1632    Note: See PetscSFFetchAndOpBegin() for more details.
1633 
1634 .seealso: `PetscSFFetchAndOpBegin()`, `PetscSFComputeDegreeBegin()`, `PetscSFReduceBegin()`, `PetscSFSetGraph()`
1635 @*/
1636 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1637 {
1638   PetscFunctionBegin;
1639   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1640   PetscCall(PetscSFSetUp(sf));
1641   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1642   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1643   PetscUseTypeMethod(sf,FetchAndOpBegin ,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1644   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 /*@C
1649    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1650 
1651    Collective
1652 
1653    Input Parameters:
1654 +  sf - star forest
1655 .  unit - data type
1656 .  leafdata - leaf values to use in reduction
1657 -  op - operation to use for reduction
1658 
1659    Output Parameters:
1660 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1661 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1662 
1663    Level: advanced
1664 
1665 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFReduceEnd()`, `PetscSFSetGraph()`
1666 @*/
1667 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1668 {
1669   PetscFunctionBegin;
1670   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1671   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1672   PetscUseTypeMethod(sf,FetchAndOpEnd ,unit,rootdata,leafdata,leafupdate,op);
1673   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 /*@C
1678    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1679 
1680    Collective
1681 
1682    Input Parameter:
1683 .  sf - star forest
1684 
1685    Output Parameter:
1686 .  degree - degree of each root vertex
1687 
1688    Level: advanced
1689 
1690    Notes:
1691    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1692 
1693 .seealso: `PetscSFGatherBegin()`
1694 @*/
1695 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1696 {
1697   PetscFunctionBegin;
1698   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1699   PetscSFCheckGraphSet(sf,1);
1700   PetscValidPointer(degree,2);
1701   if (!sf->degreeknown) {
1702     PetscInt i, nroots = sf->nroots, maxlocal;
1703     PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1704     maxlocal = sf->maxleaf-sf->minleaf+1;
1705     PetscCall(PetscMalloc1(nroots,&sf->degree));
1706     PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1707     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1708     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1709     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1710   }
1711   *degree = NULL;
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 /*@C
1716    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1717 
1718    Collective
1719 
1720    Input Parameter:
1721 .  sf - star forest
1722 
1723    Output Parameter:
1724 .  degree - degree of each root vertex
1725 
1726    Level: developer
1727 
1728    Notes:
1729    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1730 
1731 .seealso:
1732 @*/
1733 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1734 {
1735   PetscFunctionBegin;
1736   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1737   PetscSFCheckGraphSet(sf,1);
1738   PetscValidPointer(degree,2);
1739   if (!sf->degreeknown) {
1740     PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1741     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1742     PetscCall(PetscFree(sf->degreetmp));
1743     sf->degreeknown = PETSC_TRUE;
1744   }
1745   *degree = sf->degree;
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 /*@C
1750    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1751    Each multi-root is assigned index of the corresponding original root.
1752 
1753    Collective
1754 
1755    Input Parameters:
1756 +  sf - star forest
1757 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1758 
1759    Output Parameters:
1760 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1761 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1762 
1763    Level: developer
1764 
1765    Notes:
1766    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1767 
1768 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFComputeDegreeEnd()`, `PetscSFGetMultiSF()`
1769 @*/
1770 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1771 {
1772   PetscSF             msf;
1773   PetscInt            i, j, k, nroots, nmroots;
1774 
1775   PetscFunctionBegin;
1776   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1777   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1778   if (nroots) PetscValidIntPointer(degree,2);
1779   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1780   PetscValidPointer(multiRootsOrigNumbering,4);
1781   PetscCall(PetscSFGetMultiSF(sf,&msf));
1782   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1783   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1784   for (i=0,j=0,k=0; i<nroots; i++) {
1785     if (!degree[i]) continue;
1786     for (j=0; j<degree[i]; j++,k++) {
1787       (*multiRootsOrigNumbering)[k] = i;
1788     }
1789   }
1790   PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1791   if (nMultiRoots) *nMultiRoots = nmroots;
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 /*@C
1796    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1797 
1798    Collective
1799 
1800    Input Parameters:
1801 +  sf - star forest
1802 .  unit - data type
1803 -  leafdata - leaf data to gather to roots
1804 
1805    Output Parameter:
1806 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1807 
1808    Level: intermediate
1809 
1810 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1811 @*/
1812 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1813 {
1814   PetscSF        multi = NULL;
1815 
1816   PetscFunctionBegin;
1817   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1818   PetscCall(PetscSFSetUp(sf));
1819   PetscCall(PetscSFGetMultiSF(sf,&multi));
1820   PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 /*@C
1825    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1826 
1827    Collective
1828 
1829    Input Parameters:
1830 +  sf - star forest
1831 .  unit - data type
1832 -  leafdata - leaf data to gather to roots
1833 
1834    Output Parameter:
1835 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1836 
1837    Level: intermediate
1838 
1839 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1840 @*/
1841 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1842 {
1843   PetscSF        multi = NULL;
1844 
1845   PetscFunctionBegin;
1846   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1847   PetscCall(PetscSFGetMultiSF(sf,&multi));
1848   PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 /*@C
1853    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1854 
1855    Collective
1856 
1857    Input Parameters:
1858 +  sf - star forest
1859 .  unit - data type
1860 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1861 
1862    Output Parameter:
1863 .  leafdata - leaf data to be update with personal data from each respective root
1864 
1865    Level: intermediate
1866 
1867 .seealso: `PetscSFComputeDegreeBegin()`, `PetscSFScatterBegin()`
1868 @*/
1869 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1870 {
1871   PetscSF        multi = NULL;
1872 
1873   PetscFunctionBegin;
1874   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1875   PetscCall(PetscSFSetUp(sf));
1876   PetscCall(PetscSFGetMultiSF(sf,&multi));
1877   PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 /*@C
1882    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1883 
1884    Collective
1885 
1886    Input Parameters:
1887 +  sf - star forest
1888 .  unit - data type
1889 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1890 
1891    Output Parameter:
1892 .  leafdata - leaf data to be update with personal data from each respective root
1893 
1894    Level: intermediate
1895 
1896 .seealso: `PetscSFComputeDegreeEnd()`, `PetscSFScatterEnd()`
1897 @*/
1898 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1899 {
1900   PetscSF        multi = NULL;
1901 
1902   PetscFunctionBegin;
1903   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1904   PetscCall(PetscSFGetMultiSF(sf,&multi));
1905   PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1910 {
1911   PetscInt        i, n, nleaves;
1912   const PetscInt *ilocal = NULL;
1913   PetscHSetI      seen;
1914 
1915   PetscFunctionBegin;
1916   if (PetscDefined(USE_DEBUG)) {
1917     PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL));
1918     PetscCall(PetscHSetICreate(&seen));
1919     for (i = 0; i < nleaves; i++) {
1920       const PetscInt leaf = ilocal ? ilocal[i] : i;
1921       PetscCall(PetscHSetIAdd(seen,leaf));
1922     }
1923     PetscCall(PetscHSetIGetSize(seen,&n));
1924     PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1925     PetscCall(PetscHSetIDestroy(&seen));
1926   }
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 /*@
1931   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1932 
1933   Input Parameters:
1934 + sfA - The first PetscSF
1935 - sfB - The second PetscSF
1936 
1937   Output Parameters:
1938 . sfBA - The composite SF
1939 
1940   Level: developer
1941 
1942   Notes:
1943   Currently, the two SFs must be defined on congruent communicators and they must be true star
1944   forests, i.e. the same leaf is not connected with different roots.
1945 
1946   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1947   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1948   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1949   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1950 
1951 .seealso: `PetscSF`, `PetscSFComposeInverse()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
1952 @*/
1953 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1954 {
1955   const PetscSFNode *remotePointsA,*remotePointsB;
1956   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1957   const PetscInt    *localPointsA,*localPointsB;
1958   PetscInt          *localPointsBA;
1959   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1960   PetscBool         denseB;
1961 
1962   PetscFunctionBegin;
1963   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1964   PetscSFCheckGraphSet(sfA,1);
1965   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1966   PetscSFCheckGraphSet(sfB,2);
1967   PetscCheckSameComm(sfA,1,sfB,2);
1968   PetscValidPointer(sfBA,3);
1969   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1970   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1971 
1972   PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA));
1973   PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB));
1974   if (localPointsA) {
1975     PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA));
1976     for (i=0; i<numRootsB; i++) {
1977       reorderedRemotePointsA[i].rank = -1;
1978       reorderedRemotePointsA[i].index = -1;
1979     }
1980     for (i=0; i<numLeavesA; i++) {
1981       if (localPointsA[i] >= numRootsB) continue;
1982       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1983     }
1984     remotePointsA = reorderedRemotePointsA;
1985   }
1986   PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf));
1987   PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB));
1988   PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1989   PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1990   PetscCall(PetscFree(reorderedRemotePointsA));
1991 
1992   denseB = (PetscBool)!localPointsB;
1993   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1994     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1995     else numLeavesBA++;
1996   }
1997   if (denseB) {
1998     localPointsBA  = NULL;
1999     remotePointsBA = leafdataB;
2000   } else {
2001     PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA));
2002     PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA));
2003     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2004       const PetscInt l = localPointsB ? localPointsB[i] : i;
2005 
2006       if (leafdataB[l-minleaf].rank == -1) continue;
2007       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2008       localPointsBA[numLeavesBA] = l;
2009       numLeavesBA++;
2010     }
2011     PetscCall(PetscFree(leafdataB));
2012   }
2013   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2014   PetscCall(PetscSFSetFromOptions(*sfBA));
2015   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 /*@
2020   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2021 
2022   Input Parameters:
2023 + sfA - The first PetscSF
2024 - sfB - The second PetscSF
2025 
2026   Output Parameters:
2027 . sfBA - The composite SF.
2028 
2029   Level: developer
2030 
2031   Notes:
2032   Currently, the two SFs must be defined on congruent communicators and they must be true star
2033   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2034   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2035 
2036   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2037   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2038   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2039   a Reduce on sfB, on connected roots.
2040 
2041 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`, `PetscSFCreateInverseSF()`
2042 @*/
2043 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2044 {
2045   const PetscSFNode *remotePointsA,*remotePointsB;
2046   PetscSFNode       *remotePointsBA;
2047   const PetscInt    *localPointsA,*localPointsB;
2048   PetscSFNode       *reorderedRemotePointsA = NULL;
2049   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2050   MPI_Op            op;
2051 #if defined(PETSC_USE_64BIT_INDICES)
2052   PetscBool         iswin;
2053 #endif
2054 
2055   PetscFunctionBegin;
2056   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2057   PetscSFCheckGraphSet(sfA,1);
2058   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2059   PetscSFCheckGraphSet(sfB,2);
2060   PetscCheckSameComm(sfA,1,sfB,2);
2061   PetscValidPointer(sfBA,3);
2062   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2063   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2064 
2065   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2066   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2067 
2068   /* TODO: Check roots of sfB have degree of 1 */
2069   /* Once we implement it, we can replace the MPI_MAXLOC
2070      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2071      We use MPI_MAXLOC only to have a deterministic output from this routine if
2072      the root condition is not meet.
2073    */
2074   op = MPI_MAXLOC;
2075 #if defined(PETSC_USE_64BIT_INDICES)
2076   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2077   PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin));
2078   if (iswin) op = MPI_REPLACE;
2079 #endif
2080 
2081   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2082   PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA));
2083   for (i=0; i<maxleaf - minleaf + 1; i++) {
2084     reorderedRemotePointsA[i].rank = -1;
2085     reorderedRemotePointsA[i].index = -1;
2086   }
2087   if (localPointsA) {
2088     for (i=0; i<numLeavesA; i++) {
2089       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2090       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2091     }
2092   } else {
2093     for (i=0; i<numLeavesA; i++) {
2094       if (i > maxleaf || i < minleaf) continue;
2095       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2096     }
2097   }
2098 
2099   PetscCall(PetscMalloc1(numRootsB,&localPointsBA));
2100   PetscCall(PetscMalloc1(numRootsB,&remotePointsBA));
2101   for (i=0; i<numRootsB; i++) {
2102     remotePointsBA[i].rank = -1;
2103     remotePointsBA[i].index = -1;
2104   }
2105 
2106   PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2107   PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2108   PetscCall(PetscFree(reorderedRemotePointsA));
2109   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2110     if (remotePointsBA[i].rank == -1) continue;
2111     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2112     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2113     localPointsBA[numLeavesBA] = i;
2114     numLeavesBA++;
2115   }
2116   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2117   PetscCall(PetscSFSetFromOptions(*sfBA));
2118   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 /*
2123   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2124 
2125   Input Parameters:
2126 . sf - The global PetscSF
2127 
2128   Output Parameters:
2129 . out - The local PetscSF
2130  */
2131 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2132 {
2133   MPI_Comm           comm;
2134   PetscMPIInt        myrank;
2135   const PetscInt     *ilocal;
2136   const PetscSFNode  *iremote;
2137   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2138   PetscSFNode        *liremote;
2139   PetscSF            lsf;
2140 
2141   PetscFunctionBegin;
2142   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2143   if (sf->ops->CreateLocalSF) PetscUseTypeMethod(sf,CreateLocalSF ,out);
2144   else {
2145     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2146     PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
2147     PetscCallMPI(MPI_Comm_rank(comm,&myrank));
2148 
2149     /* Find out local edges and build a local SF */
2150     PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
2151     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2152     PetscCall(PetscMalloc1(lnleaves,&lilocal));
2153     PetscCall(PetscMalloc1(lnleaves,&liremote));
2154 
2155     for (i=j=0; i<nleaves; i++) {
2156       if (iremote[i].rank == (PetscInt)myrank) {
2157         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2158         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2159         liremote[j].index = iremote[i].index;
2160         j++;
2161       }
2162     }
2163     PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf));
2164     PetscCall(PetscSFSetFromOptions(lsf));
2165     PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER));
2166     PetscCall(PetscSFSetUp(lsf));
2167     *out = lsf;
2168   }
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2173 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2174 {
2175   PetscMemType       rootmtype,leafmtype;
2176 
2177   PetscFunctionBegin;
2178   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2179   PetscCall(PetscSFSetUp(sf));
2180   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
2181   PetscCall(PetscGetMemType(rootdata,&rootmtype));
2182   PetscCall(PetscGetMemType(leafdata,&leafmtype));
2183   PetscUseTypeMethod(sf,BcastToZero ,unit,rootmtype,rootdata,leafmtype,leafdata);
2184   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 /*@
2189   PetscSFConcatenate - concatenate multiple SFs into one
2190 
2191   Input Parameters:
2192 + comm - the communicator
2193 . nsfs - the number of input PetscSF
2194 . sfs  - the array of input PetscSF
2195 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2196 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2197 
2198   Output Parameters:
2199 . newsf - The resulting PetscSF
2200 
2201   Level: developer
2202 
2203   Notes:
2204   The communicator of all SFs in sfs must be comm.
2205 
2206   The offsets in leafOffsets are added to the original leaf indices.
2207 
2208   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2209   In this case, NULL is also passed as ilocal to the resulting SF.
2210 
2211   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2212   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2213 
2214 .seealso: `PetscSF`, `PetscSFCompose()`, `PetscSFGetGraph()`, `PetscSFSetGraph()`
2215 @*/
2216 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2217 {
2218   PetscInt            i, s, nLeaves, nRoots;
2219   PetscInt           *leafArrayOffsets;
2220   PetscInt           *ilocal_new;
2221   PetscSFNode        *iremote_new;
2222   PetscInt           *rootOffsets;
2223   PetscBool           all_ilocal_null = PETSC_FALSE;
2224   PetscMPIInt         rank;
2225 
2226   PetscFunctionBegin;
2227   {
2228     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2229 
2230     PetscCall(PetscSFCreate(comm,&dummy));
2231     PetscValidLogicalCollectiveInt(dummy,nsfs,2);
2232     PetscValidPointer(sfs,3);
2233     for (i=0; i<nsfs; i++) {
2234       PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3);
2235       PetscCheckSameComm(dummy,1,sfs[i],3);
2236     }
2237     PetscValidLogicalCollectiveBool(dummy,shareRoots,4);
2238     if (leafOffsets) PetscValidIntPointer(leafOffsets,5);
2239     PetscValidPointer(newsf,6);
2240     PetscCall(PetscSFDestroy(&dummy));
2241   }
2242   if (!nsfs) {
2243     PetscCall(PetscSFCreate(comm, newsf));
2244     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2245     PetscFunctionReturn(0);
2246   }
2247   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2248 
2249   PetscCall(PetscCalloc1(nsfs+1, &rootOffsets));
2250   if (shareRoots) {
2251     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2252     if (PetscDefined(USE_DEBUG)) {
2253       for (s=1; s<nsfs; s++) {
2254         PetscInt nr;
2255 
2256         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2257         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2258       }
2259     }
2260   } else {
2261     for (s=0; s<nsfs; s++) {
2262       PetscInt nr;
2263 
2264       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2265       rootOffsets[s+1] = rootOffsets[s] + nr;
2266     }
2267     nRoots = rootOffsets[nsfs];
2268   }
2269 
2270   /* Calculate leaf array offsets and automatic root offsets */
2271   PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets));
2272   leafArrayOffsets[0] = 0;
2273   for (s=0; s<nsfs; s++) {
2274     PetscInt        nl;
2275 
2276     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2277     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2278   }
2279   nLeaves = leafArrayOffsets[nsfs];
2280 
2281   if (!leafOffsets) {
2282     all_ilocal_null = PETSC_TRUE;
2283     for (s=0; s<nsfs; s++) {
2284       const PetscInt *ilocal;
2285 
2286       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2287       if (ilocal) {
2288         all_ilocal_null = PETSC_FALSE;
2289         break;
2290       }
2291     }
2292     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2293   }
2294 
2295   /* Renumber and concatenate local leaves */
2296   ilocal_new = NULL;
2297   if (!all_ilocal_null) {
2298     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2299     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2300     for (s = 0; s<nsfs; s++) {
2301       const PetscInt   *ilocal;
2302       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2303       PetscInt          i, nleaves_l;
2304 
2305       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2306       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2307     }
2308   }
2309 
2310   /* Renumber and concatenate remote roots */
2311   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2312   for (i = 0; i < nLeaves; i++) {
2313     iremote_new[i].rank   = -1;
2314     iremote_new[i].index  = -1;
2315   }
2316   for (s = 0; s<nsfs; s++) {
2317     PetscInt            i, nl, nr;
2318     PetscSF             tmp_sf;
2319     const PetscSFNode  *iremote;
2320     PetscSFNode        *tmp_rootdata;
2321     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2322 
2323     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2324     PetscCall(PetscSFCreate(comm, &tmp_sf));
2325     /* create helper SF with contiguous leaves */
2326     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES));
2327     PetscCall(PetscSFSetUp(tmp_sf));
2328     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2329     for (i = 0; i < nr; i++) {
2330       tmp_rootdata[i].index = i + rootOffsets[s];
2331       tmp_rootdata[i].rank  = (PetscInt) rank;
2332     }
2333     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2334     PetscCall(PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2335     PetscCall(PetscSFDestroy(&tmp_sf));
2336     PetscCall(PetscFree(tmp_rootdata));
2337   }
2338 
2339   /* Build the new SF */
2340   PetscCall(PetscSFCreate(comm, newsf));
2341   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2342   PetscCall(PetscSFSetUp(*newsf));
2343   PetscCall(PetscFree(rootOffsets));
2344   PetscCall(PetscFree(leafArrayOffsets));
2345   PetscFunctionReturn(0);
2346 }
2347