xref: /petsc/src/vec/is/sf/interface/sf.c (revision 6aad120caa16b1027d343a5f30f73d01448e4dc0)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscSF        b;
57 
58   PetscFunctionBegin;
59   PetscValidPointer(sf,2);
60   PetscCall(PetscSFInitializePackage());
61 
62   PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView));
63 
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_MAX_INT;
67   b->maxleaf   = PETSC_MIN_INT;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream= PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78     b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80     b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82     b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL));
89     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95  *sf = b;
96   PetscFunctionReturn(0);
97 }
98 
99 /*@
100    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102    Collective
103 
104    Input Parameter:
105 .  sf - star forest
106 
107    Level: advanced
108 
109 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
115   if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf));
116   sf->nroots   = -1;
117   sf->nleaves  = -1;
118   sf->minleaf  = PETSC_MAX_INT;
119   sf->maxleaf  = PETSC_MIN_INT;
120   sf->mine     = NULL;
121   sf->remote   = NULL;
122   sf->graphset = PETSC_FALSE;
123   PetscCall(PetscFree(sf->mine_alloc));
124   PetscCall(PetscFree(sf->remote_alloc));
125   sf->nranks = -1;
126   PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote));
127   sf->degreeknown = PETSC_FALSE;
128   PetscCall(PetscFree(sf->degree));
129   if (sf->ingroup  != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
130   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
131   if (sf->multi) sf->multi->multi = NULL;
132   PetscCall(PetscSFDestroy(&sf->multi));
133   PetscCall(PetscLayoutDestroy(&sf->map));
134 
135  #if defined(PETSC_HAVE_DEVICE)
136   for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]));
137  #endif
138 
139   sf->setupcalled = PETSC_FALSE;
140   PetscFunctionReturn(0);
141 }
142 
143 /*@C
144    PetscSFSetType - Set the PetscSF communication implementation
145 
146    Collective on PetscSF
147 
148    Input Parameters:
149 +  sf - the PetscSF context
150 -  type - a known method
151 
152    Options Database Key:
153 .  -sf_type <type> - Sets the method; use -help for a list
154    of available methods (for instance, window, basic, neighbor)
155 
156    Notes:
157    See "include/petscsf.h" for available methods (for instance)
158 +    PETSCSFWINDOW - MPI-2/3 one-sided
159 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
160 
161   Level: intermediate
162 
163 .seealso: PetscSFType, PetscSFCreate()
164 @*/
165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
166 {
167   PetscBool      match;
168   PetscErrorCode (*r)(PetscSF);
169 
170   PetscFunctionBegin;
171   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
172   PetscValidCharPointer(type,2);
173 
174   PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match));
175   if (match) PetscFunctionReturn(0);
176 
177   PetscCall(PetscFunctionListFind(PetscSFList,type,&r));
178   PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
179   /* Destroy the previous PetscSF implementation context */
180   if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf));
181   PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops)));
182   PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type));
183   PetscCall((*r)(sf));
184   PetscFunctionReturn(0);
185 }
186 
187 /*@C
188   PetscSFGetType - Get the PetscSF communication implementation
189 
190   Not Collective
191 
192   Input Parameter:
193 . sf  - the PetscSF context
194 
195   Output Parameter:
196 . type - the PetscSF type name
197 
198   Level: intermediate
199 
200 .seealso: PetscSFSetType(), PetscSFCreate()
201 @*/
202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
203 {
204   PetscFunctionBegin;
205   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
206   PetscValidPointer(type,2);
207   *type = ((PetscObject)sf)->type_name;
208   PetscFunctionReturn(0);
209 }
210 
211 /*@C
212    PetscSFDestroy - destroy star forest
213 
214    Collective
215 
216    Input Parameter:
217 .  sf - address of star forest
218 
219    Level: intermediate
220 
221 .seealso: PetscSFCreate(), PetscSFReset()
222 @*/
223 PetscErrorCode PetscSFDestroy(PetscSF *sf)
224 {
225   PetscFunctionBegin;
226   if (!*sf) PetscFunctionReturn(0);
227   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
228   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
229   PetscCall(PetscSFReset(*sf));
230   if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf));
231   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
232   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
233   PetscCall(PetscHeaderDestroy(sf));
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
238 {
239   PetscInt           i, nleaves;
240   PetscMPIInt        size;
241   const PetscInt    *ilocal;
242   const PetscSFNode *iremote;
243 
244   PetscFunctionBegin;
245   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
246   PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote));
247   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
248   for (i = 0; i < nleaves; i++) {
249     const PetscInt rank = iremote[i].rank;
250     const PetscInt remote = iremote[i].index;
251     const PetscInt leaf = ilocal ? ilocal[i] : i;
252     PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
253     PetscCheckFalse(remote < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
254     PetscCheckFalse(leaf < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 /*@
260    PetscSFSetUp - set up communication structures
261 
262    Collective
263 
264    Input Parameter:
265 .  sf - star forest communication object
266 
267    Level: beginner
268 
269 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
270 @*/
271 PetscErrorCode PetscSFSetUp(PetscSF sf)
272 {
273   PetscFunctionBegin;
274   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
275   PetscSFCheckGraphSet(sf,1);
276   if (sf->setupcalled) PetscFunctionReturn(0);
277   PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0));
278   PetscCall(PetscSFCheckGraphValid_Private(sf));
279   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */
280   if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf));
281 #if defined(PETSC_HAVE_CUDA)
282   if (sf->backend == PETSCSF_BACKEND_CUDA) {
283     sf->ops->Malloc = PetscSFMalloc_CUDA;
284     sf->ops->Free   = PetscSFFree_CUDA;
285   }
286 #endif
287 #if defined(PETSC_HAVE_HIP)
288   if (sf->backend == PETSCSF_BACKEND_HIP) {
289     sf->ops->Malloc = PetscSFMalloc_HIP;
290     sf->ops->Free   = PetscSFFree_HIP;
291   }
292 #endif
293 
294 #
295 #if defined(PETSC_HAVE_KOKKOS)
296   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
297     sf->ops->Malloc = PetscSFMalloc_Kokkos;
298     sf->ops->Free   = PetscSFFree_Kokkos;
299   }
300 #endif
301   PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0));
302   sf->setupcalled = PETSC_TRUE;
303   PetscFunctionReturn(0);
304 }
305 
306 /*@
307    PetscSFSetFromOptions - set PetscSF options using the options database
308 
309    Logically Collective
310 
311    Input Parameter:
312 .  sf - star forest
313 
314    Options Database Keys:
315 +  -sf_type               - implementation type, see PetscSFSetType()
316 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
317 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
318                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
319                             If true, this option only works with -use_gpu_aware_mpi 1.
320 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
321                                If true, this option only works with -use_gpu_aware_mpi 1.
322 
323 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
324                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
325                               the only available is kokkos.
326 
327    Level: intermediate
328 @*/
329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
330 {
331   PetscSFType    deft;
332   char           type[256];
333   PetscErrorCode ierr;
334   PetscBool      flg;
335 
336   PetscFunctionBegin;
337   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
338   ierr = PetscObjectOptionsBegin((PetscObject)sf);PetscCall(ierr);
339   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
340   PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg));
341   PetscCall(PetscSFSetType(sf,flg ? type : deft));
342   PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL));
343  #if defined(PETSC_HAVE_DEVICE)
344   {
345     char        backendstr[32] = {0};
346     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
347     /* Change the defaults set in PetscSFCreate() with command line options */
348     PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL));
349     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL));
350     PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set));
351     PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda));
352     PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos));
353     PetscCall(PetscStrcasecmp("hip",backendstr,&isHip));
354   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
355     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
356     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
357     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
358     else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
359   #elif defined(PETSC_HAVE_KOKKOS)
360     PetscCheckFalse(set && !isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
361    #endif
362   }
363  #endif
364   if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf));
365   ierr = PetscOptionsEnd();PetscCall(ierr);
366   PetscFunctionReturn(0);
367 }
368 
369 /*@
370    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
371 
372    Logically Collective
373 
374    Input Parameters:
375 +  sf - star forest
376 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
377 
378    Level: advanced
379 
380 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
381 @*/
382 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
383 {
384   PetscFunctionBegin;
385   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
386   PetscValidLogicalCollectiveBool(sf,flg,2);
387   PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
388   sf->rankorder = flg;
389   PetscFunctionReturn(0);
390 }
391 
392 /*@C
393    PetscSFSetGraph - Set a parallel star forest
394 
395    Collective
396 
397    Input Parameters:
398 +  sf - star forest
399 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
400 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
401 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
402 during setup in debug mode)
403 .  localmode - copy mode for ilocal
404 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
405 during setup in debug mode)
406 -  remotemode - copy mode for iremote
407 
408    Level: intermediate
409 
410    Notes:
411    Leaf indices in ilocal must be unique, otherwise an error occurs.
412 
413    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
414    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
415    PETSc might modify the respective array;
416    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
417    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
418 
419    Fortran Notes:
420    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
421 
422    Developer Notes:
423    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
424    This also allows to compare leaf sets of two SFs easily.
425 
426 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
427 @*/
428 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
429 {
430   PetscBool       unique, contiguous;
431 
432   PetscFunctionBegin;
433   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
434   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
435   if (nleaves > 0) PetscValidPointer(iremote,6);
436   PetscCheckFalse(nroots  < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
437   PetscCheckFalse(nleaves < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
438   PetscCheck(localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
439   PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
440 
441   if (sf->nroots >= 0) { /* Reset only if graph already set */
442     PetscCall(PetscSFReset(sf));
443   }
444 
445   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0));
446 
447   sf->nroots  = nroots;
448   sf->nleaves = nleaves;
449 
450   if (localmode == PETSC_COPY_VALUES && ilocal) {
451     PetscInt *tlocal = NULL;
452 
453     PetscCall(PetscMalloc1(nleaves,&tlocal));
454     PetscCall(PetscArraycpy(tlocal,ilocal,nleaves));
455     ilocal = tlocal;
456   }
457   if (remotemode == PETSC_COPY_VALUES) {
458     PetscSFNode *tremote = NULL;
459 
460     PetscCall(PetscMalloc1(nleaves,&tremote));
461     PetscCall(PetscArraycpy(tremote,iremote,nleaves));
462     iremote = tremote;
463   }
464 
465   if (nleaves && ilocal) {
466     PetscSFNode   work;
467 
468     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
469     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
470     unique = PetscNot(unique);
471     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
472     sf->minleaf = ilocal[0];
473     sf->maxleaf = ilocal[nleaves-1];
474     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
475   } else {
476     sf->minleaf = 0;
477     sf->maxleaf = nleaves - 1;
478     unique      = PETSC_TRUE;
479     contiguous  = PETSC_TRUE;
480   }
481 
482   if (contiguous) {
483     if (localmode == PETSC_USE_POINTER) {
484       ilocal = NULL;
485     } else {
486       PetscCall(PetscFree(ilocal));
487     }
488   }
489   sf->mine            = ilocal;
490   if (localmode == PETSC_USE_POINTER) {
491     sf->mine_alloc    = NULL;
492   } else {
493     sf->mine_alloc    = ilocal;
494   }
495   sf->remote          = iremote;
496   if (remotemode == PETSC_USE_POINTER) {
497     sf->remote_alloc  = NULL;
498   } else {
499     sf->remote_alloc  = iremote;
500   }
501   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0));
502   sf->graphset = PETSC_TRUE;
503   PetscFunctionReturn(0);
504 }
505 
506 /*@
507   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
508 
509   Collective
510 
511   Input Parameters:
512 + sf      - The PetscSF
513 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
514 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
515 
516   Notes:
517   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
518   n and N are local and global sizes of x respectively.
519 
520   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
521   sequential vectors y on all ranks.
522 
523   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
524   sequential vector y on rank 0.
525 
526   In above cases, entries of x are roots and entries of y are leaves.
527 
528   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
529   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
530   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
531   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
532   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
533 
534   In this case, roots and leaves are symmetric.
535 
536   Level: intermediate
537  @*/
538 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
539 {
540   MPI_Comm       comm;
541   PetscInt       n,N,res[2];
542   PetscMPIInt    rank,size;
543   PetscSFType    type;
544 
545   PetscFunctionBegin;
546   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
547   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
548   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
549   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
550   PetscCallMPI(MPI_Comm_rank(comm,&rank));
551   PetscCallMPI(MPI_Comm_size(comm,&size));
552 
553   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
554     type = PETSCSFALLTOALL;
555     PetscCall(PetscLayoutCreate(comm,&sf->map));
556     PetscCall(PetscLayoutSetLocalSize(sf->map,size));
557     PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size));
558     PetscCall(PetscLayoutSetUp(sf->map));
559   } else {
560     PetscCall(PetscLayoutGetLocalSize(map,&n));
561     PetscCall(PetscLayoutGetSize(map,&N));
562     res[0] = n;
563     res[1] = -n;
564     /* Check if n are same over all ranks so that we can optimize it */
565     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm));
566     if (res[0] == -res[1]) { /* same n */
567       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
568     } else {
569       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
570     }
571     PetscCall(PetscLayoutReference(map,&sf->map));
572   }
573   PetscCall(PetscSFSetType(sf,type));
574 
575   sf->pattern = pattern;
576   sf->mine    = NULL; /* Contiguous */
577 
578   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
579      Also set other easy stuff.
580    */
581   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
582     sf->nleaves      = N;
583     sf->nroots       = n;
584     sf->nranks       = size;
585     sf->minleaf      = 0;
586     sf->maxleaf      = N - 1;
587   } else if (pattern == PETSCSF_PATTERN_GATHER) {
588     sf->nleaves      = rank ? 0 : N;
589     sf->nroots       = n;
590     sf->nranks       = rank ? 0 : size;
591     sf->minleaf      = 0;
592     sf->maxleaf      = rank ? -1 : N - 1;
593   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
594     sf->nleaves      = size;
595     sf->nroots       = size;
596     sf->nranks       = size;
597     sf->minleaf      = 0;
598     sf->maxleaf      = size - 1;
599   }
600   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
601   sf->graphset = PETSC_TRUE;
602   PetscFunctionReturn(0);
603 }
604 
605 /*@
606    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
607 
608    Collective
609 
610    Input Parameter:
611 .  sf - star forest to invert
612 
613    Output Parameter:
614 .  isf - inverse of sf
615 
616    Level: advanced
617 
618    Notes:
619    All roots must have degree 1.
620 
621    The local space may be a permutation, but cannot be sparse.
622 
623 .seealso: PetscSFSetGraph()
624 @*/
625 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
626 {
627   PetscMPIInt    rank;
628   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
629   const PetscInt *ilocal;
630   PetscSFNode    *roots,*leaves;
631 
632   PetscFunctionBegin;
633   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
634   PetscSFCheckGraphSet(sf,1);
635   PetscValidPointer(isf,2);
636 
637   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL));
638   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
639 
640   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
641   PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves));
642   for (i=0; i<maxlocal; i++) {
643     leaves[i].rank  = rank;
644     leaves[i].index = i;
645   }
646   for (i=0; i <nroots; i++) {
647     roots[i].rank  = -1;
648     roots[i].index = -1;
649   }
650   PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
651   PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
652 
653   /* Check whether our leaves are sparse */
654   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
655   if (count == nroots) newilocal = NULL;
656   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
657     PetscCall(PetscMalloc1(count,&newilocal));
658     for (i=0,count=0; i<nroots; i++) {
659       if (roots[i].rank >= 0) {
660         newilocal[count]   = i;
661         roots[count].rank  = roots[i].rank;
662         roots[count].index = roots[i].index;
663         count++;
664       }
665     }
666   }
667 
668   PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf));
669   PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES));
670   PetscCall(PetscFree2(roots,leaves));
671   PetscFunctionReturn(0);
672 }
673 
674 /*@
675    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
676 
677    Collective
678 
679    Input Parameters:
680 +  sf - communication object to duplicate
681 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
682 
683    Output Parameter:
684 .  newsf - new communication object
685 
686    Level: beginner
687 
688 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
689 @*/
690 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
691 {
692   PetscSFType    type;
693   MPI_Datatype   dtype=MPIU_SCALAR;
694 
695   PetscFunctionBegin;
696   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
697   PetscValidLogicalCollectiveEnum(sf,opt,2);
698   PetscValidPointer(newsf,3);
699   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf));
700   PetscCall(PetscSFGetType(sf,&type));
701   if (type) PetscCall(PetscSFSetType(*newsf,type));
702   (*newsf)->allow_multi_leaves = sf->allow_multi_leaves; /* Dup this flag ealier since PetscSFSetGraph() below checks on this flag */
703   if (opt == PETSCSF_DUPLICATE_GRAPH) {
704     PetscSFCheckGraphSet(sf,1);
705     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
706       PetscInt          nroots,nleaves;
707       const PetscInt    *ilocal;
708       const PetscSFNode *iremote;
709       PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
710       PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES));
711     } else {
712       PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern));
713     }
714   }
715   /* Since oldtype is committed, so is newtype, according to MPI */
716   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype));
717   (*newsf)->vscat.bs     = sf->vscat.bs;
718   (*newsf)->vscat.unit   = dtype;
719   (*newsf)->vscat.to_n   = sf->vscat.to_n;
720   (*newsf)->vscat.from_n = sf->vscat.from_n;
721   /* Do not copy lsf. Build it on demand since it is rarely used */
722 
723 #if defined(PETSC_HAVE_DEVICE)
724   (*newsf)->backend              = sf->backend;
725   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
726   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
727   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
728 #endif
729   if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf));
730   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
731   PetscFunctionReturn(0);
732 }
733 
734 /*@C
735    PetscSFGetGraph - Get the graph specifying a parallel star forest
736 
737    Not Collective
738 
739    Input Parameter:
740 .  sf - star forest
741 
742    Output Parameters:
743 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
744 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
745 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
746 -  iremote - remote locations of root vertices for each leaf on the current process
747 
748    Notes:
749      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
750 
751      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
752      see its manpage for details.
753 
754    Fortran Notes:
755      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
756      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
757 
758      To check for a NULL ilocal use
759 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
760 
761    Level: intermediate
762 
763 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
764 @*/
765 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
766 {
767   PetscFunctionBegin;
768   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
769   if (sf->ops->GetGraph) {
770     PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote));
771   } else {
772     if (nroots) *nroots = sf->nroots;
773     if (nleaves) *nleaves = sf->nleaves;
774     if (ilocal) *ilocal = sf->mine;
775     if (iremote) *iremote = sf->remote;
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 /*@
781    PetscSFGetLeafRange - Get the active leaf ranges
782 
783    Not Collective
784 
785    Input Parameter:
786 .  sf - star forest
787 
788    Output Parameters:
789 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
790 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
791 
792    Level: developer
793 
794 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
795 @*/
796 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
797 {
798   PetscFunctionBegin;
799   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
800   PetscSFCheckGraphSet(sf,1);
801   if (minleaf) *minleaf = sf->minleaf;
802   if (maxleaf) *maxleaf = sf->maxleaf;
803   PetscFunctionReturn(0);
804 }
805 
806 /*@C
807    PetscSFViewFromOptions - View from Options
808 
809    Collective on PetscSF
810 
811    Input Parameters:
812 +  A - the star forest
813 .  obj - Optional object
814 -  name - command line option
815 
816    Level: intermediate
817 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
818 @*/
819 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
820 {
821   PetscFunctionBegin;
822   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
823   PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name));
824   PetscFunctionReturn(0);
825 }
826 
827 /*@C
828    PetscSFView - view a star forest
829 
830    Collective
831 
832    Input Parameters:
833 +  sf - star forest
834 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
835 
836    Level: beginner
837 
838 .seealso: PetscSFCreate(), PetscSFSetGraph()
839 @*/
840 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
841 {
842   PetscBool         iascii;
843   PetscViewerFormat format;
844 
845   PetscFunctionBegin;
846   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
847   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer));
848   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
849   PetscCheckSameComm(sf,1,viewer,2);
850   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
852   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
853     PetscMPIInt rank;
854     PetscInt    ii,i,j;
855 
856     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer));
857     PetscCall(PetscViewerASCIIPushTab(viewer));
858     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
859       if (!sf->graphset) {
860         PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n"));
861         PetscCall(PetscViewerASCIIPopTab(viewer));
862         PetscFunctionReturn(0);
863       }
864       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
865       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
866       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks));
867       for (i=0; i<sf->nleaves; i++) {
868         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index));
869       }
870       PetscCall(PetscViewerFlush(viewer));
871       PetscCall(PetscViewerGetFormat(viewer,&format));
872       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
873         PetscMPIInt *tmpranks,*perm;
874         PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm));
875         PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks));
876         for (i=0; i<sf->nranks; i++) perm[i] = i;
877         PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm));
878         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank));
879         for (ii=0; ii<sf->nranks; ii++) {
880           i = perm[ii];
881           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]));
882           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
883             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]));
884           }
885         }
886         PetscCall(PetscFree2(tmpranks,perm));
887       }
888       PetscCall(PetscViewerFlush(viewer));
889       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
890     }
891     PetscCall(PetscViewerASCIIPopTab(viewer));
892   }
893   if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer));
894   PetscFunctionReturn(0);
895 }
896 
897 /*@C
898    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
899 
900    Not Collective
901 
902    Input Parameter:
903 .  sf - star forest
904 
905    Output Parameters:
906 +  nranks - number of ranks referenced by local part
907 .  ranks - array of ranks
908 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
909 .  rmine - concatenated array holding local indices referencing each remote rank
910 -  rremote - concatenated array holding remote indices referenced for each remote rank
911 
912    Level: developer
913 
914 .seealso: PetscSFGetLeafRanks()
915 @*/
916 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
917 {
918   PetscFunctionBegin;
919   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
920   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
921   if (sf->ops->GetRootRanks) {
922     PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote));
923   } else {
924     /* The generic implementation */
925     if (nranks)  *nranks  = sf->nranks;
926     if (ranks)   *ranks   = sf->ranks;
927     if (roffset) *roffset = sf->roffset;
928     if (rmine)   *rmine   = sf->rmine;
929     if (rremote) *rremote = sf->rremote;
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 /*@C
935    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
936 
937    Not Collective
938 
939    Input Parameter:
940 .  sf - star forest
941 
942    Output Parameters:
943 +  niranks - number of leaf ranks referencing roots on this process
944 .  iranks - array of ranks
945 .  ioffset - offset in irootloc for each rank (length niranks+1)
946 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
947 
948    Level: developer
949 
950 .seealso: PetscSFGetRootRanks()
951 @*/
952 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
953 {
954   PetscFunctionBegin;
955   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
956   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
957   if (sf->ops->GetLeafRanks) {
958     PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc));
959   } else {
960     PetscSFType type;
961     PetscCall(PetscSFGetType(sf,&type));
962     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
968   PetscInt i;
969   for (i=0; i<n; i++) {
970     if (needle == list[i]) return PETSC_TRUE;
971   }
972   return PETSC_FALSE;
973 }
974 
975 /*@C
976    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
977 
978    Collective
979 
980    Input Parameters:
981 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
982 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
983 
984    Level: developer
985 
986 .seealso: PetscSFGetRootRanks()
987 @*/
988 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
989 {
990   PetscTable         table;
991   PetscTablePosition pos;
992   PetscMPIInt        size,groupsize,*groupranks;
993   PetscInt           *rcount,*ranks;
994   PetscInt           i, irank = -1,orank = -1;
995 
996   PetscFunctionBegin;
997   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
998   PetscSFCheckGraphSet(sf,1);
999   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
1000   PetscCall(PetscTableCreate(10,size,&table));
1001   for (i=0; i<sf->nleaves; i++) {
1002     /* Log 1-based rank */
1003     PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES));
1004   }
1005   PetscCall(PetscTableGetCount(table,&sf->nranks));
1006   PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote));
1007   PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks));
1008   PetscCall(PetscTableGetHeadPosition(table,&pos));
1009   for (i=0; i<sf->nranks; i++) {
1010     PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]));
1011     ranks[i]--;             /* Convert back to 0-based */
1012   }
1013   PetscCall(PetscTableDestroy(&table));
1014 
1015   /* We expect that dgroup is reliably "small" while nranks could be large */
1016   {
1017     MPI_Group group = MPI_GROUP_NULL;
1018     PetscMPIInt *dgroupranks;
1019     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1020     PetscCallMPI(MPI_Group_size(dgroup,&groupsize));
1021     PetscCall(PetscMalloc1(groupsize,&dgroupranks));
1022     PetscCall(PetscMalloc1(groupsize,&groupranks));
1023     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1024     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks));
1025     PetscCallMPI(MPI_Group_free(&group));
1026     PetscCall(PetscFree(dgroupranks));
1027   }
1028 
1029   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1030   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1031     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1032       if (InList(ranks[i],groupsize,groupranks)) break;
1033     }
1034     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1035       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1036     }
1037     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1038       PetscInt tmprank,tmpcount;
1039 
1040       tmprank             = ranks[i];
1041       tmpcount            = rcount[i];
1042       ranks[i]            = ranks[sf->ndranks];
1043       rcount[i]           = rcount[sf->ndranks];
1044       ranks[sf->ndranks]  = tmprank;
1045       rcount[sf->ndranks] = tmpcount;
1046       sf->ndranks++;
1047     }
1048   }
1049   PetscCall(PetscFree(groupranks));
1050   PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount));
1051   PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks));
1052   sf->roffset[0] = 0;
1053   for (i=0; i<sf->nranks; i++) {
1054     PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i));
1055     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1056     rcount[i]        = 0;
1057   }
1058   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1059     /* short circuit */
1060     if (orank != sf->remote[i].rank) {
1061       /* Search for index of iremote[i].rank in sf->ranks */
1062       PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank));
1063       if (irank < 0) {
1064         PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank));
1065         if (irank >= 0) irank += sf->ndranks;
1066       }
1067       orank = sf->remote[i].rank;
1068     }
1069     PetscCheckFalse(irank < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1070     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1071     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1072     rcount[irank]++;
1073   }
1074   PetscCall(PetscFree2(rcount,ranks));
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 /*@C
1079    PetscSFGetGroups - gets incoming and outgoing process groups
1080 
1081    Collective
1082 
1083    Input Parameter:
1084 .  sf - star forest
1085 
1086    Output Parameters:
1087 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1088 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1089 
1090    Level: developer
1091 
1092 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1093 @*/
1094 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1095 {
1096   MPI_Group      group = MPI_GROUP_NULL;
1097 
1098   PetscFunctionBegin;
1099   PetscCheckFalse(sf->nranks < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1100   if (sf->ingroup == MPI_GROUP_NULL) {
1101     PetscInt       i;
1102     const PetscInt *indegree;
1103     PetscMPIInt    rank,*outranks,*inranks;
1104     PetscSFNode    *remote;
1105     PetscSF        bgcount;
1106 
1107     /* Compute the number of incoming ranks */
1108     PetscCall(PetscMalloc1(sf->nranks,&remote));
1109     for (i=0; i<sf->nranks; i++) {
1110       remote[i].rank  = sf->ranks[i];
1111       remote[i].index = 0;
1112     }
1113     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount));
1114     PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1115     PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree));
1116     PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree));
1117     /* Enumerate the incoming ranks */
1118     PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks));
1119     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1120     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1121     PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks));
1122     PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks));
1123     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1124     PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup));
1125     PetscCallMPI(MPI_Group_free(&group));
1126     PetscCall(PetscFree2(inranks,outranks));
1127     PetscCall(PetscSFDestroy(&bgcount));
1128   }
1129   *incoming = sf->ingroup;
1130 
1131   if (sf->outgroup == MPI_GROUP_NULL) {
1132     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1133     PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup));
1134     PetscCallMPI(MPI_Group_free(&group));
1135   }
1136   *outgoing = sf->outgroup;
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 /*@
1141    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1142 
1143    Collective
1144 
1145    Input Parameter:
1146 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1147 
1148    Output Parameter:
1149 .  multi - star forest with split roots, such that each root has degree exactly 1
1150 
1151    Level: developer
1152 
1153    Notes:
1154 
1155    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1156    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1157    edge, it is a candidate for future optimization that might involve its removal.
1158 
1159 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1160 @*/
1161 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1162 {
1163   PetscFunctionBegin;
1164   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1165   PetscValidPointer(multi,2);
1166   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1167     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1168     *multi = sf->multi;
1169     sf->multi->multi = sf->multi;
1170     PetscFunctionReturn(0);
1171   }
1172   if (!sf->multi) {
1173     const PetscInt *indegree;
1174     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1175     PetscSFNode    *remote;
1176     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1177     PetscCall(PetscSFComputeDegreeBegin(sf,&indegree));
1178     PetscCall(PetscSFComputeDegreeEnd(sf,&indegree));
1179     PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset));
1180     inoffset[0] = 0;
1181     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1182     for (i=0; i<maxlocal; i++) outones[i] = 1;
1183     PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1184     PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1185     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1186     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1187       for (i=0; i<sf->nroots; i++) {
1188         PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1189       }
1190     }
1191     PetscCall(PetscMalloc1(sf->nleaves,&remote));
1192     for (i=0; i<sf->nleaves; i++) {
1193       remote[i].rank  = sf->remote[i].rank;
1194       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1195     }
1196     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1197     sf->multi->multi = sf->multi;
1198     PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1199     if (sf->rankorder) {        /* Sort the ranks */
1200       PetscMPIInt rank;
1201       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1202       PetscSFNode *newremote;
1203       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1204       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1205       PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset));
1206       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1207       PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1208       PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1209       /* Sort the incoming ranks at each vertex, build the inverse map */
1210       for (i=0; i<sf->nroots; i++) {
1211         PetscInt j;
1212         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1213         PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset));
1214         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1215       }
1216       PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1217       PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1218       PetscCall(PetscMalloc1(sf->nleaves,&newremote));
1219       for (i=0; i<sf->nleaves; i++) {
1220         newremote[i].rank  = sf->remote[i].rank;
1221         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1222       }
1223       PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER));
1224       PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset));
1225     }
1226     PetscCall(PetscFree3(inoffset,outones,outoffset));
1227   }
1228   *multi = sf->multi;
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 /*@C
1233    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1234 
1235    Collective
1236 
1237    Input Parameters:
1238 +  sf - original star forest
1239 .  nselected  - number of selected roots on this process
1240 -  selected   - indices of the selected roots on this process
1241 
1242    Output Parameter:
1243 .  esf - new star forest
1244 
1245    Level: advanced
1246 
1247    Note:
1248    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1249    be done by calling PetscSFGetGraph().
1250 
1251 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1252 @*/
1253 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1254 {
1255   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1256   const PetscInt    *ilocal;
1257   signed char       *rootdata,*leafdata,*leafmem;
1258   const PetscSFNode *iremote;
1259   PetscSFNode       *new_iremote;
1260   MPI_Comm          comm;
1261 
1262   PetscFunctionBegin;
1263   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1264   PetscSFCheckGraphSet(sf,1);
1265   if (nselected) PetscValidIntPointer(selected,3);
1266   PetscValidPointer(esf,4);
1267 
1268   PetscCall(PetscSFSetUp(sf));
1269   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0));
1270   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1271   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
1272 
1273   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1274     PetscBool dups;
1275     PetscCall(PetscCheckDupsInt(nselected,selected,&dups));
1276     PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1277     for (i=0; i<nselected; i++)
1278       PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1279   }
1280 
1281   if (sf->ops->CreateEmbeddedRootSF) {
1282     PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf));
1283   } else {
1284     /* A generic version of creating embedded sf */
1285     PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
1286     maxlocal = maxleaf - minleaf + 1;
1287     PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
1288     leafdata = leafmem - minleaf;
1289     /* Tag selected roots and bcast to leaves */
1290     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1291     PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1292     PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1293 
1294     /* Build esf with leaves that are still connected */
1295     esf_nleaves = 0;
1296     for (i=0; i<nleaves; i++) {
1297       j = ilocal ? ilocal[i] : i;
1298       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1299          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1300       */
1301       esf_nleaves += (leafdata[j] ? 1 : 0);
1302     }
1303     PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal));
1304     PetscCall(PetscMalloc1(esf_nleaves,&new_iremote));
1305     for (i=n=0; i<nleaves; i++) {
1306       j = ilocal ? ilocal[i] : i;
1307       if (leafdata[j]) {
1308         new_ilocal[n]        = j;
1309         new_iremote[n].rank  = iremote[i].rank;
1310         new_iremote[n].index = iremote[i].index;
1311         ++n;
1312       }
1313     }
1314     PetscCall(PetscSFCreate(comm,esf));
1315     PetscCall(PetscSFSetFromOptions(*esf));
1316     PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1317     PetscCall(PetscFree2(rootdata,leafmem));
1318   }
1319   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0));
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 /*@C
1324   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1325 
1326   Collective
1327 
1328   Input Parameters:
1329 + sf - original star forest
1330 . nselected  - number of selected leaves on this process
1331 - selected   - indices of the selected leaves on this process
1332 
1333   Output Parameter:
1334 .  newsf - new star forest
1335 
1336   Level: advanced
1337 
1338 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1339 @*/
1340 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1341 {
1342   const PetscSFNode *iremote;
1343   PetscSFNode       *new_iremote;
1344   const PetscInt    *ilocal;
1345   PetscInt          i,nroots,*leaves,*new_ilocal;
1346   MPI_Comm          comm;
1347 
1348   PetscFunctionBegin;
1349   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1350   PetscSFCheckGraphSet(sf,1);
1351   if (nselected) PetscValidIntPointer(selected,3);
1352   PetscValidPointer(newsf,4);
1353 
1354   /* Uniq selected[] and put results in leaves[] */
1355   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1356   PetscCall(PetscMalloc1(nselected,&leaves));
1357   PetscCall(PetscArraycpy(leaves,selected,nselected));
1358   PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves));
1359   PetscCheckFalse(nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1360 
1361   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1362   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1363     PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf));
1364   } else {
1365     PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote));
1366     PetscCall(PetscMalloc1(nselected,&new_ilocal));
1367     PetscCall(PetscMalloc1(nselected,&new_iremote));
1368     for (i=0; i<nselected; ++i) {
1369       const PetscInt l     = leaves[i];
1370       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1371       new_iremote[i].rank  = iremote[l].rank;
1372       new_iremote[i].index = iremote[l].index;
1373     }
1374     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf));
1375     PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1376   }
1377   PetscCall(PetscFree(leaves));
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 /*@C
1382    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1383 
1384    Collective on PetscSF
1385 
1386    Input Parameters:
1387 +  sf - star forest on which to communicate
1388 .  unit - data type associated with each node
1389 .  rootdata - buffer to broadcast
1390 -  op - operation to use for reduction
1391 
1392    Output Parameter:
1393 .  leafdata - buffer to be reduced with values from each leaf's respective root
1394 
1395    Level: intermediate
1396 
1397    Notes:
1398     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1399     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1400     use PetscSFBcastWithMemTypeBegin() instead.
1401 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1402 @*/
1403 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1404 {
1405   PetscMemType   rootmtype,leafmtype;
1406 
1407   PetscFunctionBegin;
1408   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1409   PetscCall(PetscSFSetUp(sf));
1410   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1411   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1412   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1413   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1414   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*@C
1419    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1420 
1421    Collective on PetscSF
1422 
1423    Input Parameters:
1424 +  sf - star forest on which to communicate
1425 .  unit - data type associated with each node
1426 .  rootmtype - memory type of rootdata
1427 .  rootdata - buffer to broadcast
1428 .  leafmtype - memory type of leafdata
1429 -  op - operation to use for reduction
1430 
1431    Output Parameter:
1432 .  leafdata - buffer to be reduced with values from each leaf's respective root
1433 
1434    Level: intermediate
1435 
1436 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1437 @*/
1438 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1439 {
1440   PetscFunctionBegin;
1441   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1442   PetscCall(PetscSFSetUp(sf));
1443   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1444   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1445   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 /*@C
1450    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1451 
1452    Collective
1453 
1454    Input Parameters:
1455 +  sf - star forest
1456 .  unit - data type
1457 .  rootdata - buffer to broadcast
1458 -  op - operation to use for reduction
1459 
1460    Output Parameter:
1461 .  leafdata - buffer to be reduced with values from each leaf's respective root
1462 
1463    Level: intermediate
1464 
1465 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1466 @*/
1467 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1468 {
1469   PetscFunctionBegin;
1470   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1471   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0));
1472   PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op));
1473   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0));
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 /*@C
1478    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1479 
1480    Collective
1481 
1482    Input Parameters:
1483 +  sf - star forest
1484 .  unit - data type
1485 .  leafdata - values to reduce
1486 -  op - reduction operation
1487 
1488    Output Parameter:
1489 .  rootdata - result of reduction of values from all leaves of each root
1490 
1491    Level: intermediate
1492 
1493    Notes:
1494     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1495     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1496     use PetscSFReduceWithMemTypeBegin() instead.
1497 
1498 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1499 @*/
1500 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1501 {
1502   PetscMemType   rootmtype,leafmtype;
1503 
1504   PetscFunctionBegin;
1505   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1506   PetscCall(PetscSFSetUp(sf));
1507   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1508   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1509   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1510   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1511   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 /*@C
1516    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1517 
1518    Collective
1519 
1520    Input Parameters:
1521 +  sf - star forest
1522 .  unit - data type
1523 .  leafmtype - memory type of leafdata
1524 .  leafdata - values to reduce
1525 .  rootmtype - memory type of rootdata
1526 -  op - reduction operation
1527 
1528    Output Parameter:
1529 .  rootdata - result of reduction of values from all leaves of each root
1530 
1531    Level: intermediate
1532 
1533 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1534 @*/
1535 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1536 {
1537   PetscFunctionBegin;
1538   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1539   PetscCall(PetscSFSetUp(sf));
1540   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1541   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1542   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 /*@C
1547    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1548 
1549    Collective
1550 
1551    Input Parameters:
1552 +  sf - star forest
1553 .  unit - data type
1554 .  leafdata - values to reduce
1555 -  op - reduction operation
1556 
1557    Output Parameter:
1558 .  rootdata - result of reduction of values from all leaves of each root
1559 
1560    Level: intermediate
1561 
1562 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1563 @*/
1564 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1565 {
1566   PetscFunctionBegin;
1567   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1568   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0));
1569   PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op));
1570   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0));
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 /*@C
1575    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1576 
1577    Collective
1578 
1579    Input Parameters:
1580 +  sf - star forest
1581 .  unit - data type
1582 .  leafdata - leaf values to use in reduction
1583 -  op - operation to use for reduction
1584 
1585    Output Parameters:
1586 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1587 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1588 
1589    Level: advanced
1590 
1591    Note:
1592    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1593    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1594    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1595    integers.
1596 
1597 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1598 @*/
1599 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1600 {
1601   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1602 
1603   PetscFunctionBegin;
1604   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1605   PetscCall(PetscSFSetUp(sf));
1606   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1607   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1608   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1609   PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype));
1610   PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1611   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1612   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 /*@C
1617    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1618 
1619    Collective
1620 
1621    Input Parameters:
1622 +  sf - star forest
1623 .  unit - data type
1624 .  rootmtype - memory type of rootdata
1625 .  leafmtype - memory type of leafdata
1626 .  leafdata - leaf values to use in reduction
1627 .  leafupdatemtype - memory type of leafupdate
1628 -  op - operation to use for reduction
1629 
1630    Output Parameters:
1631 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1632 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1633 
1634    Level: advanced
1635 
1636    Note: See PetscSFFetchAndOpBegin() for more details.
1637 
1638 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1639 @*/
1640 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1641 {
1642   PetscFunctionBegin;
1643   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1644   PetscCall(PetscSFSetUp(sf));
1645   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1646   PetscCheckFalse(leafmtype != leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1647   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1648   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 /*@C
1653    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1654 
1655    Collective
1656 
1657    Input Parameters:
1658 +  sf - star forest
1659 .  unit - data type
1660 .  leafdata - leaf values to use in reduction
1661 -  op - operation to use for reduction
1662 
1663    Output Parameters:
1664 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1665 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1666 
1667    Level: advanced
1668 
1669 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1670 @*/
1671 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1672 {
1673   PetscFunctionBegin;
1674   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1675   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1676   PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op));
1677   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 /*@C
1682    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1683 
1684    Collective
1685 
1686    Input Parameter:
1687 .  sf - star forest
1688 
1689    Output Parameter:
1690 .  degree - degree of each root vertex
1691 
1692    Level: advanced
1693 
1694    Notes:
1695    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1696 
1697 .seealso: PetscSFGatherBegin()
1698 @*/
1699 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1700 {
1701   PetscFunctionBegin;
1702   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1703   PetscSFCheckGraphSet(sf,1);
1704   PetscValidPointer(degree,2);
1705   if (!sf->degreeknown) {
1706     PetscInt i, nroots = sf->nroots, maxlocal;
1707     PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1708     maxlocal = sf->maxleaf-sf->minleaf+1;
1709     PetscCall(PetscMalloc1(nroots,&sf->degree));
1710     PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1711     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1712     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1713     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1714   }
1715   *degree = NULL;
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 /*@C
1720    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1721 
1722    Collective
1723 
1724    Input Parameter:
1725 .  sf - star forest
1726 
1727    Output Parameter:
1728 .  degree - degree of each root vertex
1729 
1730    Level: developer
1731 
1732    Notes:
1733    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1734 
1735 .seealso:
1736 @*/
1737 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1738 {
1739   PetscFunctionBegin;
1740   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1741   PetscSFCheckGraphSet(sf,1);
1742   PetscValidPointer(degree,2);
1743   if (!sf->degreeknown) {
1744     PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1745     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1746     PetscCall(PetscFree(sf->degreetmp));
1747     sf->degreeknown = PETSC_TRUE;
1748   }
1749   *degree = sf->degree;
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 /*@C
1754    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1755    Each multi-root is assigned index of the corresponding original root.
1756 
1757    Collective
1758 
1759    Input Parameters:
1760 +  sf - star forest
1761 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1762 
1763    Output Parameters:
1764 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1765 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1766 
1767    Level: developer
1768 
1769    Notes:
1770    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1771 
1772 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1773 @*/
1774 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1775 {
1776   PetscSF             msf;
1777   PetscInt            i, j, k, nroots, nmroots;
1778 
1779   PetscFunctionBegin;
1780   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1781   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1782   if (nroots) PetscValidIntPointer(degree,2);
1783   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1784   PetscValidPointer(multiRootsOrigNumbering,4);
1785   PetscCall(PetscSFGetMultiSF(sf,&msf));
1786   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1787   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1788   for (i=0,j=0,k=0; i<nroots; i++) {
1789     if (!degree[i]) continue;
1790     for (j=0; j<degree[i]; j++,k++) {
1791       (*multiRootsOrigNumbering)[k] = i;
1792     }
1793   }
1794   PetscCheckFalse(k != nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1795   if (nMultiRoots) *nMultiRoots = nmroots;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 /*@C
1800    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1801 
1802    Collective
1803 
1804    Input Parameters:
1805 +  sf - star forest
1806 .  unit - data type
1807 -  leafdata - leaf data to gather to roots
1808 
1809    Output Parameter:
1810 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1811 
1812    Level: intermediate
1813 
1814 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1815 @*/
1816 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1817 {
1818   PetscSF        multi = NULL;
1819 
1820   PetscFunctionBegin;
1821   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1822   PetscCall(PetscSFSetUp(sf));
1823   PetscCall(PetscSFGetMultiSF(sf,&multi));
1824   PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 /*@C
1829    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1830 
1831    Collective
1832 
1833    Input Parameters:
1834 +  sf - star forest
1835 .  unit - data type
1836 -  leafdata - leaf data to gather to roots
1837 
1838    Output Parameter:
1839 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1840 
1841    Level: intermediate
1842 
1843 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1844 @*/
1845 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1846 {
1847   PetscSF        multi = NULL;
1848 
1849   PetscFunctionBegin;
1850   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1851   PetscCall(PetscSFGetMultiSF(sf,&multi));
1852   PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 /*@C
1857    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1858 
1859    Collective
1860 
1861    Input Parameters:
1862 +  sf - star forest
1863 .  unit - data type
1864 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1865 
1866    Output Parameter:
1867 .  leafdata - leaf data to be update with personal data from each respective root
1868 
1869    Level: intermediate
1870 
1871 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1872 @*/
1873 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1874 {
1875   PetscSF        multi = NULL;
1876 
1877   PetscFunctionBegin;
1878   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1879   PetscCall(PetscSFSetUp(sf));
1880   PetscCall(PetscSFGetMultiSF(sf,&multi));
1881   PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 /*@C
1886    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1887 
1888    Collective
1889 
1890    Input Parameters:
1891 +  sf - star forest
1892 .  unit - data type
1893 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1894 
1895    Output Parameter:
1896 .  leafdata - leaf data to be update with personal data from each respective root
1897 
1898    Level: intermediate
1899 
1900 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1901 @*/
1902 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1903 {
1904   PetscSF        multi = NULL;
1905 
1906   PetscFunctionBegin;
1907   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1908   PetscCall(PetscSFGetMultiSF(sf,&multi));
1909   PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1914 {
1915   PetscInt        i, n, nleaves;
1916   const PetscInt *ilocal = NULL;
1917   PetscHSetI      seen;
1918 
1919   PetscFunctionBegin;
1920   if (PetscDefined(USE_DEBUG)) {
1921     PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL));
1922     PetscCall(PetscHSetICreate(&seen));
1923     for (i = 0; i < nleaves; i++) {
1924       const PetscInt leaf = ilocal ? ilocal[i] : i;
1925       PetscCall(PetscHSetIAdd(seen,leaf));
1926     }
1927     PetscCall(PetscHSetIGetSize(seen,&n));
1928     PetscCheckFalse(n != nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1929     PetscCall(PetscHSetIDestroy(&seen));
1930   }
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 /*@
1935   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1936 
1937   Input Parameters:
1938 + sfA - The first PetscSF
1939 - sfB - The second PetscSF
1940 
1941   Output Parameters:
1942 . sfBA - The composite SF
1943 
1944   Level: developer
1945 
1946   Notes:
1947   Currently, the two SFs must be defined on congruent communicators and they must be true star
1948   forests, i.e. the same leaf is not connected with different roots.
1949 
1950   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1951   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1952   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1953   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1954 
1955 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1956 @*/
1957 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1958 {
1959   const PetscSFNode *remotePointsA,*remotePointsB;
1960   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1961   const PetscInt    *localPointsA,*localPointsB;
1962   PetscInt          *localPointsBA;
1963   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1964   PetscBool         denseB;
1965 
1966   PetscFunctionBegin;
1967   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1968   PetscSFCheckGraphSet(sfA,1);
1969   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1970   PetscSFCheckGraphSet(sfB,2);
1971   PetscCheckSameComm(sfA,1,sfB,2);
1972   PetscValidPointer(sfBA,3);
1973   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1974   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1975 
1976   PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA));
1977   PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB));
1978   if (localPointsA) {
1979     PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA));
1980     for (i=0; i<numRootsB; i++) {
1981       reorderedRemotePointsA[i].rank = -1;
1982       reorderedRemotePointsA[i].index = -1;
1983     }
1984     for (i=0; i<numLeavesA; i++) {
1985       if (localPointsA[i] >= numRootsB) continue;
1986       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1987     }
1988     remotePointsA = reorderedRemotePointsA;
1989   }
1990   PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf));
1991   PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB));
1992   PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1993   PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1994   PetscCall(PetscFree(reorderedRemotePointsA));
1995 
1996   denseB = (PetscBool)!localPointsB;
1997   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1998     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1999     else numLeavesBA++;
2000   }
2001   if (denseB) {
2002     localPointsBA  = NULL;
2003     remotePointsBA = leafdataB;
2004   } else {
2005     PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA));
2006     PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA));
2007     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2008       const PetscInt l = localPointsB ? localPointsB[i] : i;
2009 
2010       if (leafdataB[l-minleaf].rank == -1) continue;
2011       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2012       localPointsBA[numLeavesBA] = l;
2013       numLeavesBA++;
2014     }
2015     PetscCall(PetscFree(leafdataB));
2016   }
2017   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2018   PetscCall(PetscSFSetFromOptions(*sfBA));
2019   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 /*@
2024   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2025 
2026   Input Parameters:
2027 + sfA - The first PetscSF
2028 - sfB - The second PetscSF
2029 
2030   Output Parameters:
2031 . sfBA - The composite SF.
2032 
2033   Level: developer
2034 
2035   Notes:
2036   Currently, the two SFs must be defined on congruent communicators and they must be true star
2037   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2038   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2039 
2040   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2041   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2042   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2043   a Reduce on sfB, on connected roots.
2044 
2045 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2046 @*/
2047 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2048 {
2049   const PetscSFNode *remotePointsA,*remotePointsB;
2050   PetscSFNode       *remotePointsBA;
2051   const PetscInt    *localPointsA,*localPointsB;
2052   PetscSFNode       *reorderedRemotePointsA = NULL;
2053   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2054   MPI_Op            op;
2055 #if defined(PETSC_USE_64BIT_INDICES)
2056   PetscBool         iswin;
2057 #endif
2058 
2059   PetscFunctionBegin;
2060   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2061   PetscSFCheckGraphSet(sfA,1);
2062   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2063   PetscSFCheckGraphSet(sfB,2);
2064   PetscCheckSameComm(sfA,1,sfB,2);
2065   PetscValidPointer(sfBA,3);
2066   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2067   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2068 
2069   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2070   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2071 
2072   /* TODO: Check roots of sfB have degree of 1 */
2073   /* Once we implement it, we can replace the MPI_MAXLOC
2074      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2075      We use MPI_MAXLOC only to have a deterministic output from this routine if
2076      the root condition is not meet.
2077    */
2078   op = MPI_MAXLOC;
2079 #if defined(PETSC_USE_64BIT_INDICES)
2080   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2081   PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin));
2082   if (iswin) op = MPI_REPLACE;
2083 #endif
2084 
2085   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2086   PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA));
2087   for (i=0; i<maxleaf - minleaf + 1; i++) {
2088     reorderedRemotePointsA[i].rank = -1;
2089     reorderedRemotePointsA[i].index = -1;
2090   }
2091   if (localPointsA) {
2092     for (i=0; i<numLeavesA; i++) {
2093       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2094       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2095     }
2096   } else {
2097     for (i=0; i<numLeavesA; i++) {
2098       if (i > maxleaf || i < minleaf) continue;
2099       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2100     }
2101   }
2102 
2103   PetscCall(PetscMalloc1(numRootsB,&localPointsBA));
2104   PetscCall(PetscMalloc1(numRootsB,&remotePointsBA));
2105   for (i=0; i<numRootsB; i++) {
2106     remotePointsBA[i].rank = -1;
2107     remotePointsBA[i].index = -1;
2108   }
2109 
2110   PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2111   PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2112   PetscCall(PetscFree(reorderedRemotePointsA));
2113   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2114     if (remotePointsBA[i].rank == -1) continue;
2115     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2116     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2117     localPointsBA[numLeavesBA] = i;
2118     numLeavesBA++;
2119   }
2120   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2121   PetscCall(PetscSFSetFromOptions(*sfBA));
2122   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 /*
2127   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2128 
2129   Input Parameters:
2130 . sf - The global PetscSF
2131 
2132   Output Parameters:
2133 . out - The local PetscSF
2134  */
2135 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2136 {
2137   MPI_Comm           comm;
2138   PetscMPIInt        myrank;
2139   const PetscInt     *ilocal;
2140   const PetscSFNode  *iremote;
2141   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2142   PetscSFNode        *liremote;
2143   PetscSF            lsf;
2144 
2145   PetscFunctionBegin;
2146   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2147   if (sf->ops->CreateLocalSF) {
2148     PetscCall((*sf->ops->CreateLocalSF)(sf,out));
2149   } else {
2150     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2151     PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
2152     PetscCallMPI(MPI_Comm_rank(comm,&myrank));
2153 
2154     /* Find out local edges and build a local SF */
2155     PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
2156     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2157     PetscCall(PetscMalloc1(lnleaves,&lilocal));
2158     PetscCall(PetscMalloc1(lnleaves,&liremote));
2159 
2160     for (i=j=0; i<nleaves; i++) {
2161       if (iremote[i].rank == (PetscInt)myrank) {
2162         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2163         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2164         liremote[j].index = iremote[i].index;
2165         j++;
2166       }
2167     }
2168     PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf));
2169     PetscCall(PetscSFSetFromOptions(lsf));
2170     PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER));
2171     PetscCall(PetscSFSetUp(lsf));
2172     *out = lsf;
2173   }
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2178 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2179 {
2180   PetscMemType       rootmtype,leafmtype;
2181 
2182   PetscFunctionBegin;
2183   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2184   PetscCall(PetscSFSetUp(sf));
2185   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
2186   PetscCall(PetscGetMemType(rootdata,&rootmtype));
2187   PetscCall(PetscGetMemType(leafdata,&leafmtype));
2188   if (sf->ops->BcastToZero) {
2189     PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata));
2190   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2191   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 /*@
2196   PetscSFConcatenate - concatenate multiple SFs into one
2197 
2198   Input Parameters:
2199 + comm - the communicator
2200 . nsfs - the number of input PetscSF
2201 . sfs  - the array of input PetscSF
2202 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2203 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2204 
2205   Output Parameters:
2206 . newsf - The resulting PetscSF
2207 
2208   Level: developer
2209 
2210   Notes:
2211   The communicator of all SFs in sfs must be comm.
2212 
2213   The offsets in leafOffsets are added to the original leaf indices.
2214 
2215   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2216   In this case, NULL is also passed as ilocal to the resulting SF.
2217 
2218   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2219   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2220 
2221 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2222 @*/
2223 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2224 {
2225   PetscInt            i, s, nLeaves, nRoots;
2226   PetscInt           *leafArrayOffsets;
2227   PetscInt           *ilocal_new;
2228   PetscSFNode        *iremote_new;
2229   PetscInt           *rootOffsets;
2230   PetscBool           all_ilocal_null = PETSC_FALSE;
2231   PetscMPIInt         rank;
2232 
2233   PetscFunctionBegin;
2234   {
2235     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2236 
2237     PetscCall(PetscSFCreate(comm,&dummy));
2238     PetscValidLogicalCollectiveInt(dummy,nsfs,2);
2239     PetscValidPointer(sfs,3);
2240     for (i=0; i<nsfs; i++) {
2241       PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3);
2242       PetscCheckSameComm(dummy,1,sfs[i],3);
2243     }
2244     PetscValidLogicalCollectiveBool(dummy,shareRoots,4);
2245     if (leafOffsets) PetscValidIntPointer(leafOffsets,5);
2246     PetscValidPointer(newsf,6);
2247     PetscCall(PetscSFDestroy(&dummy));
2248   }
2249   if (!nsfs) {
2250     PetscCall(PetscSFCreate(comm, newsf));
2251     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2252     PetscFunctionReturn(0);
2253   }
2254   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2255 
2256   PetscCall(PetscCalloc1(nsfs+1, &rootOffsets));
2257   if (shareRoots) {
2258     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2259     if (PetscDefined(USE_DEBUG)) {
2260       for (s=1; s<nsfs; s++) {
2261         PetscInt nr;
2262 
2263         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2264         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2265       }
2266     }
2267   } else {
2268     for (s=0; s<nsfs; s++) {
2269       PetscInt nr;
2270 
2271       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2272       rootOffsets[s+1] = rootOffsets[s] + nr;
2273     }
2274     nRoots = rootOffsets[nsfs];
2275   }
2276 
2277   /* Calculate leaf array offsets and automatic root offsets */
2278   PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets));
2279   leafArrayOffsets[0] = 0;
2280   for (s=0; s<nsfs; s++) {
2281     PetscInt        nl;
2282 
2283     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2284     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2285   }
2286   nLeaves = leafArrayOffsets[nsfs];
2287 
2288   if (!leafOffsets) {
2289     all_ilocal_null = PETSC_TRUE;
2290     for (s=0; s<nsfs; s++) {
2291       const PetscInt *ilocal;
2292 
2293       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2294       if (ilocal) {
2295         all_ilocal_null = PETSC_FALSE;
2296         break;
2297       }
2298     }
2299     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2300   }
2301 
2302   /* Renumber and concatenate local leaves */
2303   ilocal_new = NULL;
2304   if (!all_ilocal_null) {
2305     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2306     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2307     for (s = 0; s<nsfs; s++) {
2308       const PetscInt   *ilocal;
2309       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2310       PetscInt          i, nleaves_l;
2311 
2312       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2313       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2314     }
2315   }
2316 
2317   /* Renumber and concatenate remote roots */
2318   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2319   for (i = 0; i < nLeaves; i++) {
2320     iremote_new[i].rank   = -1;
2321     iremote_new[i].index  = -1;
2322   }
2323   for (s = 0; s<nsfs; s++) {
2324     PetscInt            i, nl, nr;
2325     PetscSF             tmp_sf;
2326     const PetscSFNode  *iremote;
2327     PetscSFNode        *tmp_rootdata;
2328     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2329 
2330     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2331     PetscCall(PetscSFCreate(comm, &tmp_sf));
2332     /* create helper SF with contiguous leaves */
2333     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES));
2334     PetscCall(PetscSFSetUp(tmp_sf));
2335     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2336     for (i = 0; i < nr; i++) {
2337       tmp_rootdata[i].index = i + rootOffsets[s];
2338       tmp_rootdata[i].rank  = (PetscInt) rank;
2339     }
2340     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2341     PetscCall(PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2342     PetscCall(PetscSFDestroy(&tmp_sf));
2343     PetscCall(PetscFree(tmp_rootdata));
2344   }
2345 
2346   /* Build the new SF */
2347   PetscCall(PetscSFCreate(comm, newsf));
2348   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2349   PetscCall(PetscSFSetUp(*newsf));
2350   PetscCall(PetscFree(rootOffsets));
2351   PetscCall(PetscFree(leafArrayOffsets));
2352   PetscFunctionReturn(0);
2353 }
2354