xref: /petsc/src/vec/is/sf/interface/sf.c (revision 08401ef684002a709c6d3db98a0c9f54a8bcf1ec)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscSF        b;
57 
58   PetscFunctionBegin;
59   PetscValidPointer(sf,2);
60   PetscCall(PetscSFInitializePackage());
61 
62   PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView));
63 
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_MAX_INT;
67   b->maxleaf   = PETSC_MIN_INT;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream= PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78     b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80     b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82     b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL));
89     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95  *sf = b;
96   PetscFunctionReturn(0);
97 }
98 
99 /*@
100    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102    Collective
103 
104    Input Parameter:
105 .  sf - star forest
106 
107    Level: advanced
108 
109 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
115   if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf));
116   sf->nroots   = -1;
117   sf->nleaves  = -1;
118   sf->minleaf  = PETSC_MAX_INT;
119   sf->maxleaf  = PETSC_MIN_INT;
120   sf->mine     = NULL;
121   sf->remote   = NULL;
122   sf->graphset = PETSC_FALSE;
123   PetscCall(PetscFree(sf->mine_alloc));
124   PetscCall(PetscFree(sf->remote_alloc));
125   sf->nranks = -1;
126   PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote));
127   sf->degreeknown = PETSC_FALSE;
128   PetscCall(PetscFree(sf->degree));
129   if (sf->ingroup  != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
130   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
131   if (sf->multi) sf->multi->multi = NULL;
132   PetscCall(PetscSFDestroy(&sf->multi));
133   PetscCall(PetscLayoutDestroy(&sf->map));
134 
135  #if defined(PETSC_HAVE_DEVICE)
136   for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]));
137  #endif
138 
139   sf->setupcalled = PETSC_FALSE;
140   PetscFunctionReturn(0);
141 }
142 
143 /*@C
144    PetscSFSetType - Set the PetscSF communication implementation
145 
146    Collective on PetscSF
147 
148    Input Parameters:
149 +  sf - the PetscSF context
150 -  type - a known method
151 
152    Options Database Key:
153 .  -sf_type <type> - Sets the method; use -help for a list
154    of available methods (for instance, window, basic, neighbor)
155 
156    Notes:
157    See "include/petscsf.h" for available methods (for instance)
158 +    PETSCSFWINDOW - MPI-2/3 one-sided
159 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
160 
161   Level: intermediate
162 
163 .seealso: PetscSFType, PetscSFCreate()
164 @*/
165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
166 {
167   PetscBool      match;
168   PetscErrorCode (*r)(PetscSF);
169 
170   PetscFunctionBegin;
171   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
172   PetscValidCharPointer(type,2);
173 
174   PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match));
175   if (match) PetscFunctionReturn(0);
176 
177   PetscCall(PetscFunctionListFind(PetscSFList,type,&r));
178   PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
179   /* Destroy the previous PetscSF implementation context */
180   if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf));
181   PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops)));
182   PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type));
183   PetscCall((*r)(sf));
184   PetscFunctionReturn(0);
185 }
186 
187 /*@C
188   PetscSFGetType - Get the PetscSF communication implementation
189 
190   Not Collective
191 
192   Input Parameter:
193 . sf  - the PetscSF context
194 
195   Output Parameter:
196 . type - the PetscSF type name
197 
198   Level: intermediate
199 
200 .seealso: PetscSFSetType(), PetscSFCreate()
201 @*/
202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
203 {
204   PetscFunctionBegin;
205   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
206   PetscValidPointer(type,2);
207   *type = ((PetscObject)sf)->type_name;
208   PetscFunctionReturn(0);
209 }
210 
211 /*@C
212    PetscSFDestroy - destroy star forest
213 
214    Collective
215 
216    Input Parameter:
217 .  sf - address of star forest
218 
219    Level: intermediate
220 
221 .seealso: PetscSFCreate(), PetscSFReset()
222 @*/
223 PetscErrorCode PetscSFDestroy(PetscSF *sf)
224 {
225   PetscFunctionBegin;
226   if (!*sf) PetscFunctionReturn(0);
227   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
228   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
229   PetscCall(PetscSFReset(*sf));
230   if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf));
231   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
232   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
233   PetscCall(PetscHeaderDestroy(sf));
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
238 {
239   PetscInt           i, nleaves;
240   PetscMPIInt        size;
241   const PetscInt    *ilocal;
242   const PetscSFNode *iremote;
243 
244   PetscFunctionBegin;
245   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
246   PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote));
247   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
248   for (i = 0; i < nleaves; i++) {
249     const PetscInt rank = iremote[i].rank;
250     const PetscInt remote = iremote[i].index;
251     const PetscInt leaf = ilocal ? ilocal[i] : i;
252     PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
253     PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
254     PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 /*@
260    PetscSFSetUp - set up communication structures
261 
262    Collective
263 
264    Input Parameter:
265 .  sf - star forest communication object
266 
267    Level: beginner
268 
269 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
270 @*/
271 PetscErrorCode PetscSFSetUp(PetscSF sf)
272 {
273   PetscFunctionBegin;
274   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
275   PetscSFCheckGraphSet(sf,1);
276   if (sf->setupcalled) PetscFunctionReturn(0);
277   PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0));
278   PetscCall(PetscSFCheckGraphValid_Private(sf));
279   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */
280   if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf));
281 #if defined(PETSC_HAVE_CUDA)
282   if (sf->backend == PETSCSF_BACKEND_CUDA) {
283     sf->ops->Malloc = PetscSFMalloc_CUDA;
284     sf->ops->Free   = PetscSFFree_CUDA;
285   }
286 #endif
287 #if defined(PETSC_HAVE_HIP)
288   if (sf->backend == PETSCSF_BACKEND_HIP) {
289     sf->ops->Malloc = PetscSFMalloc_HIP;
290     sf->ops->Free   = PetscSFFree_HIP;
291   }
292 #endif
293 
294 #
295 #if defined(PETSC_HAVE_KOKKOS)
296   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
297     sf->ops->Malloc = PetscSFMalloc_Kokkos;
298     sf->ops->Free   = PetscSFFree_Kokkos;
299   }
300 #endif
301   PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0));
302   sf->setupcalled = PETSC_TRUE;
303   PetscFunctionReturn(0);
304 }
305 
306 /*@
307    PetscSFSetFromOptions - set PetscSF options using the options database
308 
309    Logically Collective
310 
311    Input Parameter:
312 .  sf - star forest
313 
314    Options Database Keys:
315 +  -sf_type               - implementation type, see PetscSFSetType()
316 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
317 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
318                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
319                             If true, this option only works with -use_gpu_aware_mpi 1.
320 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
321                                If true, this option only works with -use_gpu_aware_mpi 1.
322 
323 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
324                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
325                               the only available is kokkos.
326 
327    Level: intermediate
328 @*/
329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
330 {
331   PetscSFType    deft;
332   char           type[256];
333   PetscErrorCode ierr;
334   PetscBool      flg;
335 
336   PetscFunctionBegin;
337   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
338   ierr = PetscObjectOptionsBegin((PetscObject)sf);PetscCall(ierr);
339   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
340   PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg));
341   PetscCall(PetscSFSetType(sf,flg ? type : deft));
342   PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL));
343  #if defined(PETSC_HAVE_DEVICE)
344   {
345     char        backendstr[32] = {0};
346     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
347     /* Change the defaults set in PetscSFCreate() with command line options */
348     PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL));
349     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL));
350     PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set));
351     PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda));
352     PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos));
353     PetscCall(PetscStrcasecmp("hip",backendstr,&isHip));
354   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
355     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
356     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
357     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
358     else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
359   #elif defined(PETSC_HAVE_KOKKOS)
360     PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
361    #endif
362   }
363  #endif
364   if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf));
365   ierr = PetscOptionsEnd();PetscCall(ierr);
366   PetscFunctionReturn(0);
367 }
368 
369 /*@
370    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
371 
372    Logically Collective
373 
374    Input Parameters:
375 +  sf - star forest
376 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
377 
378    Level: advanced
379 
380 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
381 @*/
382 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
383 {
384   PetscFunctionBegin;
385   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
386   PetscValidLogicalCollectiveBool(sf,flg,2);
387   PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
388   sf->rankorder = flg;
389   PetscFunctionReturn(0);
390 }
391 
392 /*@C
393    PetscSFSetGraph - Set a parallel star forest
394 
395    Collective
396 
397    Input Parameters:
398 +  sf - star forest
399 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
400 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
401 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
402 during setup in debug mode)
403 .  localmode - copy mode for ilocal
404 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
405 during setup in debug mode)
406 -  remotemode - copy mode for iremote
407 
408    Level: intermediate
409 
410    Notes:
411    Leaf indices in ilocal must be unique, otherwise an error occurs.
412 
413    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
414    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
415    PETSc might modify the respective array;
416    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
417    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
418 
419    Fortran Notes:
420    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
421 
422    Developer Notes:
423    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
424    This also allows to compare leaf sets of two SFs easily.
425 
426 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
427 @*/
428 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
429 {
430   PetscBool       unique, contiguous;
431 
432   PetscFunctionBegin;
433   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
434   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
435   if (nleaves > 0) PetscValidPointer(iremote,6);
436   PetscCheck(nroots  >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
437   PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
438   PetscCheck(localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
439   PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
440 
441   if (sf->nroots >= 0) { /* Reset only if graph already set */
442     PetscCall(PetscSFReset(sf));
443   }
444 
445   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0));
446 
447   sf->nroots  = nroots;
448   sf->nleaves = nleaves;
449 
450   if (localmode == PETSC_COPY_VALUES && ilocal) {
451     PetscInt *tlocal = NULL;
452 
453     PetscCall(PetscMalloc1(nleaves,&tlocal));
454     PetscCall(PetscArraycpy(tlocal,ilocal,nleaves));
455     ilocal = tlocal;
456   }
457   if (remotemode == PETSC_COPY_VALUES) {
458     PetscSFNode *tremote = NULL;
459 
460     PetscCall(PetscMalloc1(nleaves,&tremote));
461     PetscCall(PetscArraycpy(tremote,iremote,nleaves));
462     iremote = tremote;
463   }
464 
465   if (nleaves && ilocal) {
466     PetscSFNode   work;
467 
468     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
469     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
470     unique = PetscNot(unique);
471     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
472     sf->minleaf = ilocal[0];
473     sf->maxleaf = ilocal[nleaves-1];
474     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
475   } else {
476     sf->minleaf = 0;
477     sf->maxleaf = nleaves - 1;
478     unique      = PETSC_TRUE;
479     contiguous  = PETSC_TRUE;
480   }
481 
482   if (contiguous) {
483     if (localmode == PETSC_USE_POINTER) {
484       ilocal = NULL;
485     } else {
486       PetscCall(PetscFree(ilocal));
487     }
488   }
489   sf->mine            = ilocal;
490   if (localmode == PETSC_USE_POINTER) {
491     sf->mine_alloc    = NULL;
492   } else {
493     sf->mine_alloc    = ilocal;
494   }
495   sf->remote          = iremote;
496   if (remotemode == PETSC_USE_POINTER) {
497     sf->remote_alloc  = NULL;
498   } else {
499     sf->remote_alloc  = iremote;
500   }
501   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0));
502   sf->graphset = PETSC_TRUE;
503   PetscFunctionReturn(0);
504 }
505 
506 /*@
507   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
508 
509   Collective
510 
511   Input Parameters:
512 + sf      - The PetscSF
513 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
514 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
515 
516   Notes:
517   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
518   n and N are local and global sizes of x respectively.
519 
520   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
521   sequential vectors y on all ranks.
522 
523   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
524   sequential vector y on rank 0.
525 
526   In above cases, entries of x are roots and entries of y are leaves.
527 
528   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
529   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
530   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
531   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
532   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
533 
534   In this case, roots and leaves are symmetric.
535 
536   Level: intermediate
537  @*/
538 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
539 {
540   MPI_Comm       comm;
541   PetscInt       n,N,res[2];
542   PetscMPIInt    rank,size;
543   PetscSFType    type;
544 
545   PetscFunctionBegin;
546   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
547   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
548   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
549   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
550   PetscCallMPI(MPI_Comm_rank(comm,&rank));
551   PetscCallMPI(MPI_Comm_size(comm,&size));
552 
553   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
554     type = PETSCSFALLTOALL;
555     PetscCall(PetscLayoutCreate(comm,&sf->map));
556     PetscCall(PetscLayoutSetLocalSize(sf->map,size));
557     PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size));
558     PetscCall(PetscLayoutSetUp(sf->map));
559   } else {
560     PetscCall(PetscLayoutGetLocalSize(map,&n));
561     PetscCall(PetscLayoutGetSize(map,&N));
562     res[0] = n;
563     res[1] = -n;
564     /* Check if n are same over all ranks so that we can optimize it */
565     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm));
566     if (res[0] == -res[1]) { /* same n */
567       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
568     } else {
569       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
570     }
571     PetscCall(PetscLayoutReference(map,&sf->map));
572   }
573   PetscCall(PetscSFSetType(sf,type));
574 
575   sf->pattern = pattern;
576   sf->mine    = NULL; /* Contiguous */
577 
578   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
579      Also set other easy stuff.
580    */
581   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
582     sf->nleaves      = N;
583     sf->nroots       = n;
584     sf->nranks       = size;
585     sf->minleaf      = 0;
586     sf->maxleaf      = N - 1;
587   } else if (pattern == PETSCSF_PATTERN_GATHER) {
588     sf->nleaves      = rank ? 0 : N;
589     sf->nroots       = n;
590     sf->nranks       = rank ? 0 : size;
591     sf->minleaf      = 0;
592     sf->maxleaf      = rank ? -1 : N - 1;
593   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
594     sf->nleaves      = size;
595     sf->nroots       = size;
596     sf->nranks       = size;
597     sf->minleaf      = 0;
598     sf->maxleaf      = size - 1;
599   }
600   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
601   sf->graphset = PETSC_TRUE;
602   PetscFunctionReturn(0);
603 }
604 
605 /*@
606    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
607 
608    Collective
609 
610    Input Parameter:
611 .  sf - star forest to invert
612 
613    Output Parameter:
614 .  isf - inverse of sf
615 
616    Level: advanced
617 
618    Notes:
619    All roots must have degree 1.
620 
621    The local space may be a permutation, but cannot be sparse.
622 
623 .seealso: PetscSFSetGraph()
624 @*/
625 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
626 {
627   PetscMPIInt    rank;
628   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
629   const PetscInt *ilocal;
630   PetscSFNode    *roots,*leaves;
631 
632   PetscFunctionBegin;
633   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
634   PetscSFCheckGraphSet(sf,1);
635   PetscValidPointer(isf,2);
636 
637   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL));
638   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
639 
640   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
641   PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves));
642   for (i=0; i<maxlocal; i++) {
643     leaves[i].rank  = rank;
644     leaves[i].index = i;
645   }
646   for (i=0; i <nroots; i++) {
647     roots[i].rank  = -1;
648     roots[i].index = -1;
649   }
650   PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
651   PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
652 
653   /* Check whether our leaves are sparse */
654   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
655   if (count == nroots) newilocal = NULL;
656   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
657     PetscCall(PetscMalloc1(count,&newilocal));
658     for (i=0,count=0; i<nroots; i++) {
659       if (roots[i].rank >= 0) {
660         newilocal[count]   = i;
661         roots[count].rank  = roots[i].rank;
662         roots[count].index = roots[i].index;
663         count++;
664       }
665     }
666   }
667 
668   PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf));
669   PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES));
670   PetscCall(PetscFree2(roots,leaves));
671   PetscFunctionReturn(0);
672 }
673 
674 /*@
675    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
676 
677    Collective
678 
679    Input Parameters:
680 +  sf - communication object to duplicate
681 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
682 
683    Output Parameter:
684 .  newsf - new communication object
685 
686    Level: beginner
687 
688 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
689 @*/
690 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
691 {
692   PetscSFType    type;
693   MPI_Datatype   dtype=MPIU_SCALAR;
694 
695   PetscFunctionBegin;
696   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
697   PetscValidLogicalCollectiveEnum(sf,opt,2);
698   PetscValidPointer(newsf,3);
699   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf));
700   PetscCall(PetscSFGetType(sf,&type));
701   if (type) PetscCall(PetscSFSetType(*newsf,type));
702   if (opt == PETSCSF_DUPLICATE_GRAPH) {
703     PetscSFCheckGraphSet(sf,1);
704     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
705       PetscInt          nroots,nleaves;
706       const PetscInt    *ilocal;
707       const PetscSFNode *iremote;
708       PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
709       PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES));
710     } else {
711       PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern));
712     }
713   }
714   /* Since oldtype is committed, so is newtype, according to MPI */
715   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype));
716   (*newsf)->vscat.bs     = sf->vscat.bs;
717   (*newsf)->vscat.unit   = dtype;
718   (*newsf)->vscat.to_n   = sf->vscat.to_n;
719   (*newsf)->vscat.from_n = sf->vscat.from_n;
720   /* Do not copy lsf. Build it on demand since it is rarely used */
721 
722 #if defined(PETSC_HAVE_DEVICE)
723   (*newsf)->backend              = sf->backend;
724   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
725   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
726   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
727 #endif
728   if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf));
729   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
730   PetscFunctionReturn(0);
731 }
732 
733 /*@C
734    PetscSFGetGraph - Get the graph specifying a parallel star forest
735 
736    Not Collective
737 
738    Input Parameter:
739 .  sf - star forest
740 
741    Output Parameters:
742 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
743 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
744 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
745 -  iremote - remote locations of root vertices for each leaf on the current process
746 
747    Notes:
748      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
749 
750      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
751      see its manpage for details.
752 
753    Fortran Notes:
754      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
755      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
756 
757      To check for a NULL ilocal use
758 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
759 
760    Level: intermediate
761 
762 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
763 @*/
764 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
765 {
766   PetscFunctionBegin;
767   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
768   if (sf->ops->GetGraph) {
769     PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote));
770   } else {
771     if (nroots) *nroots = sf->nroots;
772     if (nleaves) *nleaves = sf->nleaves;
773     if (ilocal) *ilocal = sf->mine;
774     if (iremote) *iremote = sf->remote;
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 /*@
780    PetscSFGetLeafRange - Get the active leaf ranges
781 
782    Not Collective
783 
784    Input Parameter:
785 .  sf - star forest
786 
787    Output Parameters:
788 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
789 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
790 
791    Level: developer
792 
793 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
794 @*/
795 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
796 {
797   PetscFunctionBegin;
798   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
799   PetscSFCheckGraphSet(sf,1);
800   if (minleaf) *minleaf = sf->minleaf;
801   if (maxleaf) *maxleaf = sf->maxleaf;
802   PetscFunctionReturn(0);
803 }
804 
805 /*@C
806    PetscSFViewFromOptions - View from Options
807 
808    Collective on PetscSF
809 
810    Input Parameters:
811 +  A - the star forest
812 .  obj - Optional object
813 -  name - command line option
814 
815    Level: intermediate
816 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
817 @*/
818 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
819 {
820   PetscFunctionBegin;
821   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
822   PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name));
823   PetscFunctionReturn(0);
824 }
825 
826 /*@C
827    PetscSFView - view a star forest
828 
829    Collective
830 
831    Input Parameters:
832 +  sf - star forest
833 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
834 
835    Level: beginner
836 
837 .seealso: PetscSFCreate(), PetscSFSetGraph()
838 @*/
839 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
840 {
841   PetscBool         iascii;
842   PetscViewerFormat format;
843 
844   PetscFunctionBegin;
845   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
846   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer));
847   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
848   PetscCheckSameComm(sf,1,viewer,2);
849   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
850   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
851   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
852     PetscMPIInt rank;
853     PetscInt    ii,i,j;
854 
855     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer));
856     PetscCall(PetscViewerASCIIPushTab(viewer));
857     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
858       if (!sf->graphset) {
859         PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n"));
860         PetscCall(PetscViewerASCIIPopTab(viewer));
861         PetscFunctionReturn(0);
862       }
863       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
864       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
865       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks));
866       for (i=0; i<sf->nleaves; i++) {
867         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index));
868       }
869       PetscCall(PetscViewerFlush(viewer));
870       PetscCall(PetscViewerGetFormat(viewer,&format));
871       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
872         PetscMPIInt *tmpranks,*perm;
873         PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm));
874         PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks));
875         for (i=0; i<sf->nranks; i++) perm[i] = i;
876         PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm));
877         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank));
878         for (ii=0; ii<sf->nranks; ii++) {
879           i = perm[ii];
880           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]));
881           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
882             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]));
883           }
884         }
885         PetscCall(PetscFree2(tmpranks,perm));
886       }
887       PetscCall(PetscViewerFlush(viewer));
888       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
889     }
890     PetscCall(PetscViewerASCIIPopTab(viewer));
891   }
892   if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer));
893   PetscFunctionReturn(0);
894 }
895 
896 /*@C
897    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
898 
899    Not Collective
900 
901    Input Parameter:
902 .  sf - star forest
903 
904    Output Parameters:
905 +  nranks - number of ranks referenced by local part
906 .  ranks - array of ranks
907 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
908 .  rmine - concatenated array holding local indices referencing each remote rank
909 -  rremote - concatenated array holding remote indices referenced for each remote rank
910 
911    Level: developer
912 
913 .seealso: PetscSFGetLeafRanks()
914 @*/
915 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
916 {
917   PetscFunctionBegin;
918   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
919   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
920   if (sf->ops->GetRootRanks) {
921     PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote));
922   } else {
923     /* The generic implementation */
924     if (nranks)  *nranks  = sf->nranks;
925     if (ranks)   *ranks   = sf->ranks;
926     if (roffset) *roffset = sf->roffset;
927     if (rmine)   *rmine   = sf->rmine;
928     if (rremote) *rremote = sf->rremote;
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 /*@C
934    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
935 
936    Not Collective
937 
938    Input Parameter:
939 .  sf - star forest
940 
941    Output Parameters:
942 +  niranks - number of leaf ranks referencing roots on this process
943 .  iranks - array of ranks
944 .  ioffset - offset in irootloc for each rank (length niranks+1)
945 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
946 
947    Level: developer
948 
949 .seealso: PetscSFGetRootRanks()
950 @*/
951 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
952 {
953   PetscFunctionBegin;
954   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
955   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
956   if (sf->ops->GetLeafRanks) {
957     PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc));
958   } else {
959     PetscSFType type;
960     PetscCall(PetscSFGetType(sf,&type));
961     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
962   }
963   PetscFunctionReturn(0);
964 }
965 
966 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
967   PetscInt i;
968   for (i=0; i<n; i++) {
969     if (needle == list[i]) return PETSC_TRUE;
970   }
971   return PETSC_FALSE;
972 }
973 
974 /*@C
975    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
976 
977    Collective
978 
979    Input Parameters:
980 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
981 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
982 
983    Level: developer
984 
985 .seealso: PetscSFGetRootRanks()
986 @*/
987 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
988 {
989   PetscTable         table;
990   PetscTablePosition pos;
991   PetscMPIInt        size,groupsize,*groupranks;
992   PetscInt           *rcount,*ranks;
993   PetscInt           i, irank = -1,orank = -1;
994 
995   PetscFunctionBegin;
996   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
997   PetscSFCheckGraphSet(sf,1);
998   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
999   PetscCall(PetscTableCreate(10,size,&table));
1000   for (i=0; i<sf->nleaves; i++) {
1001     /* Log 1-based rank */
1002     PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES));
1003   }
1004   PetscCall(PetscTableGetCount(table,&sf->nranks));
1005   PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote));
1006   PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks));
1007   PetscCall(PetscTableGetHeadPosition(table,&pos));
1008   for (i=0; i<sf->nranks; i++) {
1009     PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]));
1010     ranks[i]--;             /* Convert back to 0-based */
1011   }
1012   PetscCall(PetscTableDestroy(&table));
1013 
1014   /* We expect that dgroup is reliably "small" while nranks could be large */
1015   {
1016     MPI_Group group = MPI_GROUP_NULL;
1017     PetscMPIInt *dgroupranks;
1018     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1019     PetscCallMPI(MPI_Group_size(dgroup,&groupsize));
1020     PetscCall(PetscMalloc1(groupsize,&dgroupranks));
1021     PetscCall(PetscMalloc1(groupsize,&groupranks));
1022     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1023     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks));
1024     PetscCallMPI(MPI_Group_free(&group));
1025     PetscCall(PetscFree(dgroupranks));
1026   }
1027 
1028   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1029   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1030     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1031       if (InList(ranks[i],groupsize,groupranks)) break;
1032     }
1033     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1034       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1035     }
1036     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1037       PetscInt tmprank,tmpcount;
1038 
1039       tmprank             = ranks[i];
1040       tmpcount            = rcount[i];
1041       ranks[i]            = ranks[sf->ndranks];
1042       rcount[i]           = rcount[sf->ndranks];
1043       ranks[sf->ndranks]  = tmprank;
1044       rcount[sf->ndranks] = tmpcount;
1045       sf->ndranks++;
1046     }
1047   }
1048   PetscCall(PetscFree(groupranks));
1049   PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount));
1050   PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks));
1051   sf->roffset[0] = 0;
1052   for (i=0; i<sf->nranks; i++) {
1053     PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i));
1054     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1055     rcount[i]        = 0;
1056   }
1057   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1058     /* short circuit */
1059     if (orank != sf->remote[i].rank) {
1060       /* Search for index of iremote[i].rank in sf->ranks */
1061       PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank));
1062       if (irank < 0) {
1063         PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank));
1064         if (irank >= 0) irank += sf->ndranks;
1065       }
1066       orank = sf->remote[i].rank;
1067     }
1068     PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1069     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1070     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1071     rcount[irank]++;
1072   }
1073   PetscCall(PetscFree2(rcount,ranks));
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /*@C
1078    PetscSFGetGroups - gets incoming and outgoing process groups
1079 
1080    Collective
1081 
1082    Input Parameter:
1083 .  sf - star forest
1084 
1085    Output Parameters:
1086 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1087 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1088 
1089    Level: developer
1090 
1091 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1092 @*/
1093 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1094 {
1095   MPI_Group      group = MPI_GROUP_NULL;
1096 
1097   PetscFunctionBegin;
1098   PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1099   if (sf->ingroup == MPI_GROUP_NULL) {
1100     PetscInt       i;
1101     const PetscInt *indegree;
1102     PetscMPIInt    rank,*outranks,*inranks;
1103     PetscSFNode    *remote;
1104     PetscSF        bgcount;
1105 
1106     /* Compute the number of incoming ranks */
1107     PetscCall(PetscMalloc1(sf->nranks,&remote));
1108     for (i=0; i<sf->nranks; i++) {
1109       remote[i].rank  = sf->ranks[i];
1110       remote[i].index = 0;
1111     }
1112     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount));
1113     PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1114     PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree));
1115     PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree));
1116     /* Enumerate the incoming ranks */
1117     PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks));
1118     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1119     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1120     PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks));
1121     PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks));
1122     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1123     PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup));
1124     PetscCallMPI(MPI_Group_free(&group));
1125     PetscCall(PetscFree2(inranks,outranks));
1126     PetscCall(PetscSFDestroy(&bgcount));
1127   }
1128   *incoming = sf->ingroup;
1129 
1130   if (sf->outgroup == MPI_GROUP_NULL) {
1131     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1132     PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup));
1133     PetscCallMPI(MPI_Group_free(&group));
1134   }
1135   *outgoing = sf->outgroup;
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 /*@
1140    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1141 
1142    Collective
1143 
1144    Input Parameter:
1145 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1146 
1147    Output Parameter:
1148 .  multi - star forest with split roots, such that each root has degree exactly 1
1149 
1150    Level: developer
1151 
1152    Notes:
1153 
1154    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1155    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1156    edge, it is a candidate for future optimization that might involve its removal.
1157 
1158 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1159 @*/
1160 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1161 {
1162   PetscFunctionBegin;
1163   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1164   PetscValidPointer(multi,2);
1165   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1166     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1167     *multi = sf->multi;
1168     sf->multi->multi = sf->multi;
1169     PetscFunctionReturn(0);
1170   }
1171   if (!sf->multi) {
1172     const PetscInt *indegree;
1173     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1174     PetscSFNode    *remote;
1175     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1176     PetscCall(PetscSFComputeDegreeBegin(sf,&indegree));
1177     PetscCall(PetscSFComputeDegreeEnd(sf,&indegree));
1178     PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset));
1179     inoffset[0] = 0;
1180     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1181     for (i=0; i<maxlocal; i++) outones[i] = 1;
1182     PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1183     PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1184     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1185     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1186       for (i=0; i<sf->nroots; i++) {
1187         PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1188       }
1189     }
1190     PetscCall(PetscMalloc1(sf->nleaves,&remote));
1191     for (i=0; i<sf->nleaves; i++) {
1192       remote[i].rank  = sf->remote[i].rank;
1193       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1194     }
1195     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1196     sf->multi->multi = sf->multi;
1197     PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1198     if (sf->rankorder) {        /* Sort the ranks */
1199       PetscMPIInt rank;
1200       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1201       PetscSFNode *newremote;
1202       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1203       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1204       PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset));
1205       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1206       PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1207       PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1208       /* Sort the incoming ranks at each vertex, build the inverse map */
1209       for (i=0; i<sf->nroots; i++) {
1210         PetscInt j;
1211         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1212         PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset));
1213         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1214       }
1215       PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1216       PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1217       PetscCall(PetscMalloc1(sf->nleaves,&newremote));
1218       for (i=0; i<sf->nleaves; i++) {
1219         newremote[i].rank  = sf->remote[i].rank;
1220         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1221       }
1222       PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER));
1223       PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset));
1224     }
1225     PetscCall(PetscFree3(inoffset,outones,outoffset));
1226   }
1227   *multi = sf->multi;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 /*@C
1232    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1233 
1234    Collective
1235 
1236    Input Parameters:
1237 +  sf - original star forest
1238 .  nselected  - number of selected roots on this process
1239 -  selected   - indices of the selected roots on this process
1240 
1241    Output Parameter:
1242 .  esf - new star forest
1243 
1244    Level: advanced
1245 
1246    Note:
1247    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1248    be done by calling PetscSFGetGraph().
1249 
1250 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1251 @*/
1252 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1253 {
1254   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1255   const PetscInt    *ilocal;
1256   signed char       *rootdata,*leafdata,*leafmem;
1257   const PetscSFNode *iremote;
1258   PetscSFNode       *new_iremote;
1259   MPI_Comm          comm;
1260 
1261   PetscFunctionBegin;
1262   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1263   PetscSFCheckGraphSet(sf,1);
1264   if (nselected) PetscValidIntPointer(selected,3);
1265   PetscValidPointer(esf,4);
1266 
1267   PetscCall(PetscSFSetUp(sf));
1268   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0));
1269   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1270   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
1271 
1272   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1273     PetscBool dups;
1274     PetscCall(PetscCheckDupsInt(nselected,selected,&dups));
1275     PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1276     for (i=0; i<nselected; i++)
1277       PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1278   }
1279 
1280   if (sf->ops->CreateEmbeddedRootSF) {
1281     PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf));
1282   } else {
1283     /* A generic version of creating embedded sf */
1284     PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
1285     maxlocal = maxleaf - minleaf + 1;
1286     PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
1287     leafdata = leafmem - minleaf;
1288     /* Tag selected roots and bcast to leaves */
1289     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1290     PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1291     PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1292 
1293     /* Build esf with leaves that are still connected */
1294     esf_nleaves = 0;
1295     for (i=0; i<nleaves; i++) {
1296       j = ilocal ? ilocal[i] : i;
1297       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1298          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1299       */
1300       esf_nleaves += (leafdata[j] ? 1 : 0);
1301     }
1302     PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal));
1303     PetscCall(PetscMalloc1(esf_nleaves,&new_iremote));
1304     for (i=n=0; i<nleaves; i++) {
1305       j = ilocal ? ilocal[i] : i;
1306       if (leafdata[j]) {
1307         new_ilocal[n]        = j;
1308         new_iremote[n].rank  = iremote[i].rank;
1309         new_iremote[n].index = iremote[i].index;
1310         ++n;
1311       }
1312     }
1313     PetscCall(PetscSFCreate(comm,esf));
1314     PetscCall(PetscSFSetFromOptions(*esf));
1315     PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1316     PetscCall(PetscFree2(rootdata,leafmem));
1317   }
1318   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0));
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 /*@C
1323   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1324 
1325   Collective
1326 
1327   Input Parameters:
1328 + sf - original star forest
1329 . nselected  - number of selected leaves on this process
1330 - selected   - indices of the selected leaves on this process
1331 
1332   Output Parameter:
1333 .  newsf - new star forest
1334 
1335   Level: advanced
1336 
1337 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1338 @*/
1339 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1340 {
1341   const PetscSFNode *iremote;
1342   PetscSFNode       *new_iremote;
1343   const PetscInt    *ilocal;
1344   PetscInt          i,nroots,*leaves,*new_ilocal;
1345   MPI_Comm          comm;
1346 
1347   PetscFunctionBegin;
1348   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1349   PetscSFCheckGraphSet(sf,1);
1350   if (nselected) PetscValidIntPointer(selected,3);
1351   PetscValidPointer(newsf,4);
1352 
1353   /* Uniq selected[] and put results in leaves[] */
1354   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1355   PetscCall(PetscMalloc1(nselected,&leaves));
1356   PetscCall(PetscArraycpy(leaves,selected,nselected));
1357   PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves));
1358   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1359 
1360   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1361   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1362     PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf));
1363   } else {
1364     PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote));
1365     PetscCall(PetscMalloc1(nselected,&new_ilocal));
1366     PetscCall(PetscMalloc1(nselected,&new_iremote));
1367     for (i=0; i<nselected; ++i) {
1368       const PetscInt l     = leaves[i];
1369       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1370       new_iremote[i].rank  = iremote[l].rank;
1371       new_iremote[i].index = iremote[l].index;
1372     }
1373     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf));
1374     PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1375   }
1376   PetscCall(PetscFree(leaves));
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 /*@C
1381    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1382 
1383    Collective on PetscSF
1384 
1385    Input Parameters:
1386 +  sf - star forest on which to communicate
1387 .  unit - data type associated with each node
1388 .  rootdata - buffer to broadcast
1389 -  op - operation to use for reduction
1390 
1391    Output Parameter:
1392 .  leafdata - buffer to be reduced with values from each leaf's respective root
1393 
1394    Level: intermediate
1395 
1396    Notes:
1397     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1398     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1399     use PetscSFBcastWithMemTypeBegin() instead.
1400 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1401 @*/
1402 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1403 {
1404   PetscMemType   rootmtype,leafmtype;
1405 
1406   PetscFunctionBegin;
1407   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1408   PetscCall(PetscSFSetUp(sf));
1409   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1410   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1411   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1412   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1413   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 /*@C
1418    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1419 
1420    Collective on PetscSF
1421 
1422    Input Parameters:
1423 +  sf - star forest on which to communicate
1424 .  unit - data type associated with each node
1425 .  rootmtype - memory type of rootdata
1426 .  rootdata - buffer to broadcast
1427 .  leafmtype - memory type of leafdata
1428 -  op - operation to use for reduction
1429 
1430    Output Parameter:
1431 .  leafdata - buffer to be reduced with values from each leaf's respective root
1432 
1433    Level: intermediate
1434 
1435 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1436 @*/
1437 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1438 {
1439   PetscFunctionBegin;
1440   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1441   PetscCall(PetscSFSetUp(sf));
1442   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1443   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1444   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 /*@C
1449    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1450 
1451    Collective
1452 
1453    Input Parameters:
1454 +  sf - star forest
1455 .  unit - data type
1456 .  rootdata - buffer to broadcast
1457 -  op - operation to use for reduction
1458 
1459    Output Parameter:
1460 .  leafdata - buffer to be reduced with values from each leaf's respective root
1461 
1462    Level: intermediate
1463 
1464 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1465 @*/
1466 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1467 {
1468   PetscFunctionBegin;
1469   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1470   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0));
1471   PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op));
1472   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0));
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 /*@C
1477    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1478 
1479    Collective
1480 
1481    Input Parameters:
1482 +  sf - star forest
1483 .  unit - data type
1484 .  leafdata - values to reduce
1485 -  op - reduction operation
1486 
1487    Output Parameter:
1488 .  rootdata - result of reduction of values from all leaves of each root
1489 
1490    Level: intermediate
1491 
1492    Notes:
1493     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1494     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1495     use PetscSFReduceWithMemTypeBegin() instead.
1496 
1497 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1498 @*/
1499 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1500 {
1501   PetscMemType   rootmtype,leafmtype;
1502 
1503   PetscFunctionBegin;
1504   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1505   PetscCall(PetscSFSetUp(sf));
1506   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1507   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1508   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1509   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1510   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 /*@C
1515    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1516 
1517    Collective
1518 
1519    Input Parameters:
1520 +  sf - star forest
1521 .  unit - data type
1522 .  leafmtype - memory type of leafdata
1523 .  leafdata - values to reduce
1524 .  rootmtype - memory type of rootdata
1525 -  op - reduction operation
1526 
1527    Output Parameter:
1528 .  rootdata - result of reduction of values from all leaves of each root
1529 
1530    Level: intermediate
1531 
1532 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1533 @*/
1534 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1535 {
1536   PetscFunctionBegin;
1537   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1538   PetscCall(PetscSFSetUp(sf));
1539   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1540   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1541   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 /*@C
1546    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1547 
1548    Collective
1549 
1550    Input Parameters:
1551 +  sf - star forest
1552 .  unit - data type
1553 .  leafdata - values to reduce
1554 -  op - reduction operation
1555 
1556    Output Parameter:
1557 .  rootdata - result of reduction of values from all leaves of each root
1558 
1559    Level: intermediate
1560 
1561 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1562 @*/
1563 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1564 {
1565   PetscFunctionBegin;
1566   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1567   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0));
1568   PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op));
1569   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0));
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 /*@C
1574    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1575 
1576    Collective
1577 
1578    Input Parameters:
1579 +  sf - star forest
1580 .  unit - data type
1581 .  leafdata - leaf values to use in reduction
1582 -  op - operation to use for reduction
1583 
1584    Output Parameters:
1585 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1586 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1587 
1588    Level: advanced
1589 
1590    Note:
1591    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1592    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1593    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1594    integers.
1595 
1596 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1597 @*/
1598 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1599 {
1600   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1601 
1602   PetscFunctionBegin;
1603   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1604   PetscCall(PetscSFSetUp(sf));
1605   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1606   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1607   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1608   PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype));
1609   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1610   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1611   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 /*@C
1616    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1617 
1618    Collective
1619 
1620    Input Parameters:
1621 +  sf - star forest
1622 .  unit - data type
1623 .  rootmtype - memory type of rootdata
1624 .  leafmtype - memory type of leafdata
1625 .  leafdata - leaf values to use in reduction
1626 .  leafupdatemtype - memory type of leafupdate
1627 -  op - operation to use for reduction
1628 
1629    Output Parameters:
1630 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1631 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1632 
1633    Level: advanced
1634 
1635    Note: See PetscSFFetchAndOpBegin() for more details.
1636 
1637 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1638 @*/
1639 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1640 {
1641   PetscFunctionBegin;
1642   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1643   PetscCall(PetscSFSetUp(sf));
1644   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1645   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1646   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1647   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 /*@C
1652    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1653 
1654    Collective
1655 
1656    Input Parameters:
1657 +  sf - star forest
1658 .  unit - data type
1659 .  leafdata - leaf values to use in reduction
1660 -  op - operation to use for reduction
1661 
1662    Output Parameters:
1663 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1664 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1665 
1666    Level: advanced
1667 
1668 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1669 @*/
1670 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1671 {
1672   PetscFunctionBegin;
1673   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1674   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1675   PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op));
1676   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 /*@C
1681    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1682 
1683    Collective
1684 
1685    Input Parameter:
1686 .  sf - star forest
1687 
1688    Output Parameter:
1689 .  degree - degree of each root vertex
1690 
1691    Level: advanced
1692 
1693    Notes:
1694    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1695 
1696 .seealso: PetscSFGatherBegin()
1697 @*/
1698 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1699 {
1700   PetscFunctionBegin;
1701   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1702   PetscSFCheckGraphSet(sf,1);
1703   PetscValidPointer(degree,2);
1704   if (!sf->degreeknown) {
1705     PetscInt i, nroots = sf->nroots, maxlocal;
1706     PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1707     maxlocal = sf->maxleaf-sf->minleaf+1;
1708     PetscCall(PetscMalloc1(nroots,&sf->degree));
1709     PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1710     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1711     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1712     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1713   }
1714   *degree = NULL;
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 /*@C
1719    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1720 
1721    Collective
1722 
1723    Input Parameter:
1724 .  sf - star forest
1725 
1726    Output Parameter:
1727 .  degree - degree of each root vertex
1728 
1729    Level: developer
1730 
1731    Notes:
1732    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1733 
1734 .seealso:
1735 @*/
1736 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1737 {
1738   PetscFunctionBegin;
1739   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1740   PetscSFCheckGraphSet(sf,1);
1741   PetscValidPointer(degree,2);
1742   if (!sf->degreeknown) {
1743     PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1744     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1745     PetscCall(PetscFree(sf->degreetmp));
1746     sf->degreeknown = PETSC_TRUE;
1747   }
1748   *degree = sf->degree;
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 /*@C
1753    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1754    Each multi-root is assigned index of the corresponding original root.
1755 
1756    Collective
1757 
1758    Input Parameters:
1759 +  sf - star forest
1760 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1761 
1762    Output Parameters:
1763 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1764 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1765 
1766    Level: developer
1767 
1768    Notes:
1769    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1770 
1771 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1772 @*/
1773 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1774 {
1775   PetscSF             msf;
1776   PetscInt            i, j, k, nroots, nmroots;
1777 
1778   PetscFunctionBegin;
1779   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1780   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1781   if (nroots) PetscValidIntPointer(degree,2);
1782   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1783   PetscValidPointer(multiRootsOrigNumbering,4);
1784   PetscCall(PetscSFGetMultiSF(sf,&msf));
1785   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1786   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1787   for (i=0,j=0,k=0; i<nroots; i++) {
1788     if (!degree[i]) continue;
1789     for (j=0; j<degree[i]; j++,k++) {
1790       (*multiRootsOrigNumbering)[k] = i;
1791     }
1792   }
1793   PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1794   if (nMultiRoots) *nMultiRoots = nmroots;
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 /*@C
1799    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1800 
1801    Collective
1802 
1803    Input Parameters:
1804 +  sf - star forest
1805 .  unit - data type
1806 -  leafdata - leaf data to gather to roots
1807 
1808    Output Parameter:
1809 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1810 
1811    Level: intermediate
1812 
1813 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1814 @*/
1815 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1816 {
1817   PetscSF        multi = NULL;
1818 
1819   PetscFunctionBegin;
1820   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1821   PetscCall(PetscSFSetUp(sf));
1822   PetscCall(PetscSFGetMultiSF(sf,&multi));
1823   PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 /*@C
1828    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1829 
1830    Collective
1831 
1832    Input Parameters:
1833 +  sf - star forest
1834 .  unit - data type
1835 -  leafdata - leaf data to gather to roots
1836 
1837    Output Parameter:
1838 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1839 
1840    Level: intermediate
1841 
1842 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1843 @*/
1844 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1845 {
1846   PetscSF        multi = NULL;
1847 
1848   PetscFunctionBegin;
1849   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1850   PetscCall(PetscSFGetMultiSF(sf,&multi));
1851   PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 /*@C
1856    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1857 
1858    Collective
1859 
1860    Input Parameters:
1861 +  sf - star forest
1862 .  unit - data type
1863 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1864 
1865    Output Parameter:
1866 .  leafdata - leaf data to be update with personal data from each respective root
1867 
1868    Level: intermediate
1869 
1870 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1871 @*/
1872 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1873 {
1874   PetscSF        multi = NULL;
1875 
1876   PetscFunctionBegin;
1877   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1878   PetscCall(PetscSFSetUp(sf));
1879   PetscCall(PetscSFGetMultiSF(sf,&multi));
1880   PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 /*@C
1885    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1886 
1887    Collective
1888 
1889    Input Parameters:
1890 +  sf - star forest
1891 .  unit - data type
1892 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1893 
1894    Output Parameter:
1895 .  leafdata - leaf data to be update with personal data from each respective root
1896 
1897    Level: intermediate
1898 
1899 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1900 @*/
1901 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1902 {
1903   PetscSF        multi = NULL;
1904 
1905   PetscFunctionBegin;
1906   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1907   PetscCall(PetscSFGetMultiSF(sf,&multi));
1908   PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1913 {
1914   PetscInt        i, n, nleaves;
1915   const PetscInt *ilocal = NULL;
1916   PetscHSetI      seen;
1917 
1918   PetscFunctionBegin;
1919   if (PetscDefined(USE_DEBUG)) {
1920     PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL));
1921     PetscCall(PetscHSetICreate(&seen));
1922     for (i = 0; i < nleaves; i++) {
1923       const PetscInt leaf = ilocal ? ilocal[i] : i;
1924       PetscCall(PetscHSetIAdd(seen,leaf));
1925     }
1926     PetscCall(PetscHSetIGetSize(seen,&n));
1927     PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1928     PetscCall(PetscHSetIDestroy(&seen));
1929   }
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 /*@
1934   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1935 
1936   Input Parameters:
1937 + sfA - The first PetscSF
1938 - sfB - The second PetscSF
1939 
1940   Output Parameters:
1941 . sfBA - The composite SF
1942 
1943   Level: developer
1944 
1945   Notes:
1946   Currently, the two SFs must be defined on congruent communicators and they must be true star
1947   forests, i.e. the same leaf is not connected with different roots.
1948 
1949   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1950   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1951   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1952   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1953 
1954 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1955 @*/
1956 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1957 {
1958   const PetscSFNode *remotePointsA,*remotePointsB;
1959   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1960   const PetscInt    *localPointsA,*localPointsB;
1961   PetscInt          *localPointsBA;
1962   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1963   PetscBool         denseB;
1964 
1965   PetscFunctionBegin;
1966   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1967   PetscSFCheckGraphSet(sfA,1);
1968   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1969   PetscSFCheckGraphSet(sfB,2);
1970   PetscCheckSameComm(sfA,1,sfB,2);
1971   PetscValidPointer(sfBA,3);
1972   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1973   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1974 
1975   PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA));
1976   PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB));
1977   if (localPointsA) {
1978     PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA));
1979     for (i=0; i<numRootsB; i++) {
1980       reorderedRemotePointsA[i].rank = -1;
1981       reorderedRemotePointsA[i].index = -1;
1982     }
1983     for (i=0; i<numLeavesA; i++) {
1984       if (localPointsA[i] >= numRootsB) continue;
1985       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1986     }
1987     remotePointsA = reorderedRemotePointsA;
1988   }
1989   PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf));
1990   PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB));
1991   PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1992   PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1993   PetscCall(PetscFree(reorderedRemotePointsA));
1994 
1995   denseB = (PetscBool)!localPointsB;
1996   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1997     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1998     else numLeavesBA++;
1999   }
2000   if (denseB) {
2001     localPointsBA  = NULL;
2002     remotePointsBA = leafdataB;
2003   } else {
2004     PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA));
2005     PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA));
2006     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2007       const PetscInt l = localPointsB ? localPointsB[i] : i;
2008 
2009       if (leafdataB[l-minleaf].rank == -1) continue;
2010       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2011       localPointsBA[numLeavesBA] = l;
2012       numLeavesBA++;
2013     }
2014     PetscCall(PetscFree(leafdataB));
2015   }
2016   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2017   PetscCall(PetscSFSetFromOptions(*sfBA));
2018   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 /*@
2023   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2024 
2025   Input Parameters:
2026 + sfA - The first PetscSF
2027 - sfB - The second PetscSF
2028 
2029   Output Parameters:
2030 . sfBA - The composite SF.
2031 
2032   Level: developer
2033 
2034   Notes:
2035   Currently, the two SFs must be defined on congruent communicators and they must be true star
2036   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2037   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2038 
2039   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2040   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2041   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2042   a Reduce on sfB, on connected roots.
2043 
2044 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2045 @*/
2046 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2047 {
2048   const PetscSFNode *remotePointsA,*remotePointsB;
2049   PetscSFNode       *remotePointsBA;
2050   const PetscInt    *localPointsA,*localPointsB;
2051   PetscSFNode       *reorderedRemotePointsA = NULL;
2052   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2053   MPI_Op            op;
2054 #if defined(PETSC_USE_64BIT_INDICES)
2055   PetscBool         iswin;
2056 #endif
2057 
2058   PetscFunctionBegin;
2059   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2060   PetscSFCheckGraphSet(sfA,1);
2061   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2062   PetscSFCheckGraphSet(sfB,2);
2063   PetscCheckSameComm(sfA,1,sfB,2);
2064   PetscValidPointer(sfBA,3);
2065   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2066   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2067 
2068   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2069   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2070 
2071   /* TODO: Check roots of sfB have degree of 1 */
2072   /* Once we implement it, we can replace the MPI_MAXLOC
2073      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2074      We use MPI_MAXLOC only to have a deterministic output from this routine if
2075      the root condition is not meet.
2076    */
2077   op = MPI_MAXLOC;
2078 #if defined(PETSC_USE_64BIT_INDICES)
2079   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2080   PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin));
2081   if (iswin) op = MPI_REPLACE;
2082 #endif
2083 
2084   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2085   PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA));
2086   for (i=0; i<maxleaf - minleaf + 1; i++) {
2087     reorderedRemotePointsA[i].rank = -1;
2088     reorderedRemotePointsA[i].index = -1;
2089   }
2090   if (localPointsA) {
2091     for (i=0; i<numLeavesA; i++) {
2092       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2093       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2094     }
2095   } else {
2096     for (i=0; i<numLeavesA; i++) {
2097       if (i > maxleaf || i < minleaf) continue;
2098       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2099     }
2100   }
2101 
2102   PetscCall(PetscMalloc1(numRootsB,&localPointsBA));
2103   PetscCall(PetscMalloc1(numRootsB,&remotePointsBA));
2104   for (i=0; i<numRootsB; i++) {
2105     remotePointsBA[i].rank = -1;
2106     remotePointsBA[i].index = -1;
2107   }
2108 
2109   PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2110   PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2111   PetscCall(PetscFree(reorderedRemotePointsA));
2112   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2113     if (remotePointsBA[i].rank == -1) continue;
2114     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2115     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2116     localPointsBA[numLeavesBA] = i;
2117     numLeavesBA++;
2118   }
2119   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2120   PetscCall(PetscSFSetFromOptions(*sfBA));
2121   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 /*
2126   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2127 
2128   Input Parameters:
2129 . sf - The global PetscSF
2130 
2131   Output Parameters:
2132 . out - The local PetscSF
2133  */
2134 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2135 {
2136   MPI_Comm           comm;
2137   PetscMPIInt        myrank;
2138   const PetscInt     *ilocal;
2139   const PetscSFNode  *iremote;
2140   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2141   PetscSFNode        *liremote;
2142   PetscSF            lsf;
2143 
2144   PetscFunctionBegin;
2145   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2146   if (sf->ops->CreateLocalSF) {
2147     PetscCall((*sf->ops->CreateLocalSF)(sf,out));
2148   } else {
2149     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2150     PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
2151     PetscCallMPI(MPI_Comm_rank(comm,&myrank));
2152 
2153     /* Find out local edges and build a local SF */
2154     PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
2155     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2156     PetscCall(PetscMalloc1(lnleaves,&lilocal));
2157     PetscCall(PetscMalloc1(lnleaves,&liremote));
2158 
2159     for (i=j=0; i<nleaves; i++) {
2160       if (iremote[i].rank == (PetscInt)myrank) {
2161         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2162         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2163         liremote[j].index = iremote[i].index;
2164         j++;
2165       }
2166     }
2167     PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf));
2168     PetscCall(PetscSFSetFromOptions(lsf));
2169     PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER));
2170     PetscCall(PetscSFSetUp(lsf));
2171     *out = lsf;
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2177 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2178 {
2179   PetscMemType       rootmtype,leafmtype;
2180 
2181   PetscFunctionBegin;
2182   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2183   PetscCall(PetscSFSetUp(sf));
2184   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
2185   PetscCall(PetscGetMemType(rootdata,&rootmtype));
2186   PetscCall(PetscGetMemType(leafdata,&leafmtype));
2187   if (sf->ops->BcastToZero) {
2188     PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata));
2189   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2190   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 /*@
2195   PetscSFConcatenate - concatenate multiple SFs into one
2196 
2197   Input Parameters:
2198 + comm - the communicator
2199 . nsfs - the number of input PetscSF
2200 . sfs  - the array of input PetscSF
2201 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2202 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2203 
2204   Output Parameters:
2205 . newsf - The resulting PetscSF
2206 
2207   Level: developer
2208 
2209   Notes:
2210   The communicator of all SFs in sfs must be comm.
2211 
2212   The offsets in leafOffsets are added to the original leaf indices.
2213 
2214   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2215   In this case, NULL is also passed as ilocal to the resulting SF.
2216 
2217   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2218   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2219 
2220 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2221 @*/
2222 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2223 {
2224   PetscInt            i, s, nLeaves, nRoots;
2225   PetscInt           *leafArrayOffsets;
2226   PetscInt           *ilocal_new;
2227   PetscSFNode        *iremote_new;
2228   PetscInt           *rootOffsets;
2229   PetscBool           all_ilocal_null = PETSC_FALSE;
2230   PetscMPIInt         rank;
2231 
2232   PetscFunctionBegin;
2233   {
2234     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2235 
2236     PetscCall(PetscSFCreate(comm,&dummy));
2237     PetscValidLogicalCollectiveInt(dummy,nsfs,2);
2238     PetscValidPointer(sfs,3);
2239     for (i=0; i<nsfs; i++) {
2240       PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3);
2241       PetscCheckSameComm(dummy,1,sfs[i],3);
2242     }
2243     PetscValidLogicalCollectiveBool(dummy,shareRoots,4);
2244     if (leafOffsets) PetscValidIntPointer(leafOffsets,5);
2245     PetscValidPointer(newsf,6);
2246     PetscCall(PetscSFDestroy(&dummy));
2247   }
2248   if (!nsfs) {
2249     PetscCall(PetscSFCreate(comm, newsf));
2250     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2251     PetscFunctionReturn(0);
2252   }
2253   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2254 
2255   PetscCall(PetscCalloc1(nsfs+1, &rootOffsets));
2256   if (shareRoots) {
2257     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2258     if (PetscDefined(USE_DEBUG)) {
2259       for (s=1; s<nsfs; s++) {
2260         PetscInt nr;
2261 
2262         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2263         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2264       }
2265     }
2266   } else {
2267     for (s=0; s<nsfs; s++) {
2268       PetscInt nr;
2269 
2270       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2271       rootOffsets[s+1] = rootOffsets[s] + nr;
2272     }
2273     nRoots = rootOffsets[nsfs];
2274   }
2275 
2276   /* Calculate leaf array offsets and automatic root offsets */
2277   PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets));
2278   leafArrayOffsets[0] = 0;
2279   for (s=0; s<nsfs; s++) {
2280     PetscInt        nl;
2281 
2282     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2283     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2284   }
2285   nLeaves = leafArrayOffsets[nsfs];
2286 
2287   if (!leafOffsets) {
2288     all_ilocal_null = PETSC_TRUE;
2289     for (s=0; s<nsfs; s++) {
2290       const PetscInt *ilocal;
2291 
2292       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2293       if (ilocal) {
2294         all_ilocal_null = PETSC_FALSE;
2295         break;
2296       }
2297     }
2298     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2299   }
2300 
2301   /* Renumber and concatenate local leaves */
2302   ilocal_new = NULL;
2303   if (!all_ilocal_null) {
2304     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2305     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2306     for (s = 0; s<nsfs; s++) {
2307       const PetscInt   *ilocal;
2308       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2309       PetscInt          i, nleaves_l;
2310 
2311       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2312       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2313     }
2314   }
2315 
2316   /* Renumber and concatenate remote roots */
2317   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2318   for (i = 0; i < nLeaves; i++) {
2319     iremote_new[i].rank   = -1;
2320     iremote_new[i].index  = -1;
2321   }
2322   for (s = 0; s<nsfs; s++) {
2323     PetscInt            i, nl, nr;
2324     PetscSF             tmp_sf;
2325     const PetscSFNode  *iremote;
2326     PetscSFNode        *tmp_rootdata;
2327     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2328 
2329     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2330     PetscCall(PetscSFCreate(comm, &tmp_sf));
2331     /* create helper SF with contiguous leaves */
2332     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES));
2333     PetscCall(PetscSFSetUp(tmp_sf));
2334     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2335     for (i = 0; i < nr; i++) {
2336       tmp_rootdata[i].index = i + rootOffsets[s];
2337       tmp_rootdata[i].rank  = (PetscInt) rank;
2338     }
2339     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2340     PetscCall(PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2341     PetscCall(PetscSFDestroy(&tmp_sf));
2342     PetscCall(PetscFree(tmp_rootdata));
2343   }
2344 
2345   /* Build the new SF */
2346   PetscCall(PetscSFCreate(comm, newsf));
2347   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2348   PetscCall(PetscSFSetUp(*newsf));
2349   PetscCall(PetscFree(rootOffsets));
2350   PetscCall(PetscFree(leafArrayOffsets));
2351   PetscFunctionReturn(0);
2352 }
2353