xref: /petsc/src/vec/is/sf/interface/sf.c (revision 3307d110e72ee4e6d2468971620073eb5ff93529)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscSF        b;
57 
58   PetscFunctionBegin;
59   PetscValidPointer(sf,2);
60   PetscCall(PetscSFInitializePackage());
61 
62   PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView));
63 
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_MAX_INT;
67   b->maxleaf   = PETSC_MIN_INT;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream= PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78     b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80     b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82     b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL));
89     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95  *sf = b;
96   PetscFunctionReturn(0);
97 }
98 
99 /*@
100    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102    Collective
103 
104    Input Parameter:
105 .  sf - star forest
106 
107    Level: advanced
108 
109 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
115   if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf));
116   sf->nroots   = -1;
117   sf->nleaves  = -1;
118   sf->minleaf  = PETSC_MAX_INT;
119   sf->maxleaf  = PETSC_MIN_INT;
120   sf->mine     = NULL;
121   sf->remote   = NULL;
122   sf->graphset = PETSC_FALSE;
123   PetscCall(PetscFree(sf->mine_alloc));
124   PetscCall(PetscFree(sf->remote_alloc));
125   sf->nranks = -1;
126   PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote));
127   sf->degreeknown = PETSC_FALSE;
128   PetscCall(PetscFree(sf->degree));
129   if (sf->ingroup  != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
130   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
131   if (sf->multi) sf->multi->multi = NULL;
132   PetscCall(PetscSFDestroy(&sf->multi));
133   PetscCall(PetscLayoutDestroy(&sf->map));
134 
135  #if defined(PETSC_HAVE_DEVICE)
136   for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]));
137  #endif
138 
139   sf->setupcalled = PETSC_FALSE;
140   PetscFunctionReturn(0);
141 }
142 
143 /*@C
144    PetscSFSetType - Set the PetscSF communication implementation
145 
146    Collective on PetscSF
147 
148    Input Parameters:
149 +  sf - the PetscSF context
150 -  type - a known method
151 
152    Options Database Key:
153 .  -sf_type <type> - Sets the method; use -help for a list
154    of available methods (for instance, window, basic, neighbor)
155 
156    Notes:
157    See "include/petscsf.h" for available methods (for instance)
158 +    PETSCSFWINDOW - MPI-2/3 one-sided
159 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
160 
161   Level: intermediate
162 
163 .seealso: PetscSFType, PetscSFCreate()
164 @*/
165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
166 {
167   PetscBool      match;
168   PetscErrorCode (*r)(PetscSF);
169 
170   PetscFunctionBegin;
171   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
172   PetscValidCharPointer(type,2);
173 
174   PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match));
175   if (match) PetscFunctionReturn(0);
176 
177   PetscCall(PetscFunctionListFind(PetscSFList,type,&r));
178   PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
179   /* Destroy the previous PetscSF implementation context */
180   if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf));
181   PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops)));
182   PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type));
183   PetscCall((*r)(sf));
184   PetscFunctionReturn(0);
185 }
186 
187 /*@C
188   PetscSFGetType - Get the PetscSF communication implementation
189 
190   Not Collective
191 
192   Input Parameter:
193 . sf  - the PetscSF context
194 
195   Output Parameter:
196 . type - the PetscSF type name
197 
198   Level: intermediate
199 
200 .seealso: PetscSFSetType(), PetscSFCreate()
201 @*/
202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
203 {
204   PetscFunctionBegin;
205   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
206   PetscValidPointer(type,2);
207   *type = ((PetscObject)sf)->type_name;
208   PetscFunctionReturn(0);
209 }
210 
211 /*@C
212    PetscSFDestroy - destroy star forest
213 
214    Collective
215 
216    Input Parameter:
217 .  sf - address of star forest
218 
219    Level: intermediate
220 
221 .seealso: PetscSFCreate(), PetscSFReset()
222 @*/
223 PetscErrorCode PetscSFDestroy(PetscSF *sf)
224 {
225   PetscFunctionBegin;
226   if (!*sf) PetscFunctionReturn(0);
227   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
228   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
229   PetscCall(PetscSFReset(*sf));
230   if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf));
231   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
232   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
233   PetscCall(PetscHeaderDestroy(sf));
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
238 {
239   PetscInt           i, nleaves;
240   PetscMPIInt        size;
241   const PetscInt    *ilocal;
242   const PetscSFNode *iremote;
243 
244   PetscFunctionBegin;
245   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
246   PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote));
247   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
248   for (i = 0; i < nleaves; i++) {
249     const PetscInt rank = iremote[i].rank;
250     const PetscInt remote = iremote[i].index;
251     const PetscInt leaf = ilocal ? ilocal[i] : i;
252     PetscCheckFalse(rank < 0 || rank >= size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
253     PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
254     PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 /*@
260    PetscSFSetUp - set up communication structures
261 
262    Collective
263 
264    Input Parameter:
265 .  sf - star forest communication object
266 
267    Level: beginner
268 
269 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
270 @*/
271 PetscErrorCode PetscSFSetUp(PetscSF sf)
272 {
273   PetscFunctionBegin;
274   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
275   PetscSFCheckGraphSet(sf,1);
276   if (sf->setupcalled) PetscFunctionReturn(0);
277   PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0));
278   PetscCall(PetscSFCheckGraphValid_Private(sf));
279   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */
280   if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf));
281 #if defined(PETSC_HAVE_CUDA)
282   if (sf->backend == PETSCSF_BACKEND_CUDA) {
283     sf->ops->Malloc = PetscSFMalloc_CUDA;
284     sf->ops->Free   = PetscSFFree_CUDA;
285   }
286 #endif
287 #if defined(PETSC_HAVE_HIP)
288   if (sf->backend == PETSCSF_BACKEND_HIP) {
289     sf->ops->Malloc = PetscSFMalloc_HIP;
290     sf->ops->Free   = PetscSFFree_HIP;
291   }
292 #endif
293 
294 #
295 #if defined(PETSC_HAVE_KOKKOS)
296   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
297     sf->ops->Malloc = PetscSFMalloc_Kokkos;
298     sf->ops->Free   = PetscSFFree_Kokkos;
299   }
300 #endif
301   PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0));
302   sf->setupcalled = PETSC_TRUE;
303   PetscFunctionReturn(0);
304 }
305 
306 /*@
307    PetscSFSetFromOptions - set PetscSF options using the options database
308 
309    Logically Collective
310 
311    Input Parameter:
312 .  sf - star forest
313 
314    Options Database Keys:
315 +  -sf_type               - implementation type, see PetscSFSetType()
316 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
317 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
318                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
319                             If true, this option only works with -use_gpu_aware_mpi 1.
320 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
321                                If true, this option only works with -use_gpu_aware_mpi 1.
322 
323 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
324                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
325                               the only available is kokkos.
326 
327    Level: intermediate
328 @*/
329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
330 {
331   PetscSFType    deft;
332   char           type[256];
333   PetscBool      flg;
334 
335   PetscFunctionBegin;
336   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
337   PetscObjectOptionsBegin((PetscObject)sf);
338   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
339   PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg));
340   PetscCall(PetscSFSetType(sf,flg ? type : deft));
341   PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL));
342  #if defined(PETSC_HAVE_DEVICE)
343   {
344     char        backendstr[32] = {0};
345     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
346     /* Change the defaults set in PetscSFCreate() with command line options */
347     PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL));
348     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL));
349     PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set));
350     PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda));
351     PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos));
352     PetscCall(PetscStrcasecmp("hip",backendstr,&isHip));
353   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
354     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
355     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
356     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
357     else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
358   #elif defined(PETSC_HAVE_KOKKOS)
359     PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
360    #endif
361   }
362  #endif
363   if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf));
364   PetscOptionsEnd();
365   PetscFunctionReturn(0);
366 }
367 
368 /*@
369    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
370 
371    Logically Collective
372 
373    Input Parameters:
374 +  sf - star forest
375 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
376 
377    Level: advanced
378 
379 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
380 @*/
381 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
382 {
383   PetscFunctionBegin;
384   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
385   PetscValidLogicalCollectiveBool(sf,flg,2);
386   PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
387   sf->rankorder = flg;
388   PetscFunctionReturn(0);
389 }
390 
391 /*@C
392    PetscSFSetGraph - Set a parallel star forest
393 
394    Collective
395 
396    Input Parameters:
397 +  sf - star forest
398 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
399 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
400 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
401 during setup in debug mode)
402 .  localmode - copy mode for ilocal
403 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
404 during setup in debug mode)
405 -  remotemode - copy mode for iremote
406 
407    Level: intermediate
408 
409    Notes:
410    Leaf indices in ilocal must be unique, otherwise an error occurs.
411 
412    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
413    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
414    PETSc might modify the respective array;
415    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
416    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
417 
418    Fortran Notes:
419    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
420 
421    Developer Notes:
422    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
423    This also allows to compare leaf sets of two SFs easily.
424 
425 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
426 @*/
427 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
428 {
429   PetscBool       unique, contiguous;
430 
431   PetscFunctionBegin;
432   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
433   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
434   if (nleaves > 0) PetscValidPointer(iremote,6);
435   PetscCheck(nroots  >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
436   PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
437   PetscCheck(localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
438   PetscCheck(remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
439 
440   if (sf->nroots >= 0) { /* Reset only if graph already set */
441     PetscCall(PetscSFReset(sf));
442   }
443 
444   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0));
445 
446   sf->nroots  = nroots;
447   sf->nleaves = nleaves;
448 
449   if (localmode == PETSC_COPY_VALUES && ilocal) {
450     PetscInt *tlocal = NULL;
451 
452     PetscCall(PetscMalloc1(nleaves,&tlocal));
453     PetscCall(PetscArraycpy(tlocal,ilocal,nleaves));
454     ilocal = tlocal;
455   }
456   if (remotemode == PETSC_COPY_VALUES) {
457     PetscSFNode *tremote = NULL;
458 
459     PetscCall(PetscMalloc1(nleaves,&tremote));
460     PetscCall(PetscArraycpy(tremote,iremote,nleaves));
461     iremote = tremote;
462   }
463 
464   if (nleaves && ilocal) {
465     PetscSFNode   work;
466 
467     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
468     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
469     unique = PetscNot(unique);
470     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
471     sf->minleaf = ilocal[0];
472     sf->maxleaf = ilocal[nleaves-1];
473     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
474   } else {
475     sf->minleaf = 0;
476     sf->maxleaf = nleaves - 1;
477     unique      = PETSC_TRUE;
478     contiguous  = PETSC_TRUE;
479   }
480 
481   if (contiguous) {
482     if (localmode == PETSC_USE_POINTER) {
483       ilocal = NULL;
484     } else {
485       PetscCall(PetscFree(ilocal));
486     }
487   }
488   sf->mine            = ilocal;
489   if (localmode == PETSC_USE_POINTER) {
490     sf->mine_alloc    = NULL;
491   } else {
492     sf->mine_alloc    = ilocal;
493   }
494   sf->remote          = iremote;
495   if (remotemode == PETSC_USE_POINTER) {
496     sf->remote_alloc  = NULL;
497   } else {
498     sf->remote_alloc  = iremote;
499   }
500   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0));
501   sf->graphset = PETSC_TRUE;
502   PetscFunctionReturn(0);
503 }
504 
505 /*@
506   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
507 
508   Collective
509 
510   Input Parameters:
511 + sf      - The PetscSF
512 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
513 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
514 
515   Notes:
516   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
517   n and N are local and global sizes of x respectively.
518 
519   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
520   sequential vectors y on all ranks.
521 
522   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
523   sequential vector y on rank 0.
524 
525   In above cases, entries of x are roots and entries of y are leaves.
526 
527   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
528   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
529   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
530   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
531   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
532 
533   In this case, roots and leaves are symmetric.
534 
535   Level: intermediate
536  @*/
537 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
538 {
539   MPI_Comm       comm;
540   PetscInt       n,N,res[2];
541   PetscMPIInt    rank,size;
542   PetscSFType    type;
543 
544   PetscFunctionBegin;
545   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
546   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
547   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
548   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
549   PetscCallMPI(MPI_Comm_rank(comm,&rank));
550   PetscCallMPI(MPI_Comm_size(comm,&size));
551 
552   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
553     type = PETSCSFALLTOALL;
554     PetscCall(PetscLayoutCreate(comm,&sf->map));
555     PetscCall(PetscLayoutSetLocalSize(sf->map,size));
556     PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size));
557     PetscCall(PetscLayoutSetUp(sf->map));
558   } else {
559     PetscCall(PetscLayoutGetLocalSize(map,&n));
560     PetscCall(PetscLayoutGetSize(map,&N));
561     res[0] = n;
562     res[1] = -n;
563     /* Check if n are same over all ranks so that we can optimize it */
564     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm));
565     if (res[0] == -res[1]) { /* same n */
566       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
567     } else {
568       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
569     }
570     PetscCall(PetscLayoutReference(map,&sf->map));
571   }
572   PetscCall(PetscSFSetType(sf,type));
573 
574   sf->pattern = pattern;
575   sf->mine    = NULL; /* Contiguous */
576 
577   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
578      Also set other easy stuff.
579    */
580   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
581     sf->nleaves      = N;
582     sf->nroots       = n;
583     sf->nranks       = size;
584     sf->minleaf      = 0;
585     sf->maxleaf      = N - 1;
586   } else if (pattern == PETSCSF_PATTERN_GATHER) {
587     sf->nleaves      = rank ? 0 : N;
588     sf->nroots       = n;
589     sf->nranks       = rank ? 0 : size;
590     sf->minleaf      = 0;
591     sf->maxleaf      = rank ? -1 : N - 1;
592   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
593     sf->nleaves      = size;
594     sf->nroots       = size;
595     sf->nranks       = size;
596     sf->minleaf      = 0;
597     sf->maxleaf      = size - 1;
598   }
599   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
600   sf->graphset = PETSC_TRUE;
601   PetscFunctionReturn(0);
602 }
603 
604 /*@
605    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
606 
607    Collective
608 
609    Input Parameter:
610 .  sf - star forest to invert
611 
612    Output Parameter:
613 .  isf - inverse of sf
614 
615    Level: advanced
616 
617    Notes:
618    All roots must have degree 1.
619 
620    The local space may be a permutation, but cannot be sparse.
621 
622 .seealso: PetscSFSetGraph()
623 @*/
624 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
625 {
626   PetscMPIInt    rank;
627   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
628   const PetscInt *ilocal;
629   PetscSFNode    *roots,*leaves;
630 
631   PetscFunctionBegin;
632   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
633   PetscSFCheckGraphSet(sf,1);
634   PetscValidPointer(isf,2);
635 
636   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL));
637   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
638 
639   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
640   PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves));
641   for (i=0; i<maxlocal; i++) {
642     leaves[i].rank  = rank;
643     leaves[i].index = i;
644   }
645   for (i=0; i <nroots; i++) {
646     roots[i].rank  = -1;
647     roots[i].index = -1;
648   }
649   PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
650   PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
651 
652   /* Check whether our leaves are sparse */
653   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
654   if (count == nroots) newilocal = NULL;
655   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
656     PetscCall(PetscMalloc1(count,&newilocal));
657     for (i=0,count=0; i<nroots; i++) {
658       if (roots[i].rank >= 0) {
659         newilocal[count]   = i;
660         roots[count].rank  = roots[i].rank;
661         roots[count].index = roots[i].index;
662         count++;
663       }
664     }
665   }
666 
667   PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf));
668   PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES));
669   PetscCall(PetscFree2(roots,leaves));
670   PetscFunctionReturn(0);
671 }
672 
673 /*@
674    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
675 
676    Collective
677 
678    Input Parameters:
679 +  sf - communication object to duplicate
680 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
681 
682    Output Parameter:
683 .  newsf - new communication object
684 
685    Level: beginner
686 
687 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
688 @*/
689 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
690 {
691   PetscSFType    type;
692   MPI_Datatype   dtype=MPIU_SCALAR;
693 
694   PetscFunctionBegin;
695   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
696   PetscValidLogicalCollectiveEnum(sf,opt,2);
697   PetscValidPointer(newsf,3);
698   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf));
699   PetscCall(PetscSFGetType(sf,&type));
700   if (type) PetscCall(PetscSFSetType(*newsf,type));
701   if (opt == PETSCSF_DUPLICATE_GRAPH) {
702     PetscSFCheckGraphSet(sf,1);
703     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
704       PetscInt          nroots,nleaves;
705       const PetscInt    *ilocal;
706       const PetscSFNode *iremote;
707       PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
708       PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES));
709     } else {
710       PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern));
711     }
712   }
713   /* Since oldtype is committed, so is newtype, according to MPI */
714   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype));
715   (*newsf)->vscat.bs     = sf->vscat.bs;
716   (*newsf)->vscat.unit   = dtype;
717   (*newsf)->vscat.to_n   = sf->vscat.to_n;
718   (*newsf)->vscat.from_n = sf->vscat.from_n;
719   /* Do not copy lsf. Build it on demand since it is rarely used */
720 
721 #if defined(PETSC_HAVE_DEVICE)
722   (*newsf)->backend              = sf->backend;
723   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
724   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
725   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
726 #endif
727   if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf));
728   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
729   PetscFunctionReturn(0);
730 }
731 
732 /*@C
733    PetscSFGetGraph - Get the graph specifying a parallel star forest
734 
735    Not Collective
736 
737    Input Parameter:
738 .  sf - star forest
739 
740    Output Parameters:
741 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
742 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
743 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
744 -  iremote - remote locations of root vertices for each leaf on the current process
745 
746    Notes:
747      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
748 
749      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
750      see its manpage for details.
751 
752    Fortran Notes:
753      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
754      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
755 
756      To check for a NULL ilocal use
757 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
758 
759    Level: intermediate
760 
761 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
762 @*/
763 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
764 {
765   PetscFunctionBegin;
766   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
767   if (sf->ops->GetGraph) {
768     PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote));
769   } else {
770     if (nroots) *nroots = sf->nroots;
771     if (nleaves) *nleaves = sf->nleaves;
772     if (ilocal) *ilocal = sf->mine;
773     if (iremote) *iremote = sf->remote;
774   }
775   PetscFunctionReturn(0);
776 }
777 
778 /*@
779    PetscSFGetLeafRange - Get the active leaf ranges
780 
781    Not Collective
782 
783    Input Parameter:
784 .  sf - star forest
785 
786    Output Parameters:
787 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
788 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
789 
790    Level: developer
791 
792 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
793 @*/
794 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
795 {
796   PetscFunctionBegin;
797   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
798   PetscSFCheckGraphSet(sf,1);
799   if (minleaf) *minleaf = sf->minleaf;
800   if (maxleaf) *maxleaf = sf->maxleaf;
801   PetscFunctionReturn(0);
802 }
803 
804 /*@C
805    PetscSFViewFromOptions - View from Options
806 
807    Collective on PetscSF
808 
809    Input Parameters:
810 +  A - the star forest
811 .  obj - Optional object
812 -  name - command line option
813 
814    Level: intermediate
815 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
816 @*/
817 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
818 {
819   PetscFunctionBegin;
820   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
821   PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name));
822   PetscFunctionReturn(0);
823 }
824 
825 /*@C
826    PetscSFView - view a star forest
827 
828    Collective
829 
830    Input Parameters:
831 +  sf - star forest
832 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
833 
834    Level: beginner
835 
836 .seealso: PetscSFCreate(), PetscSFSetGraph()
837 @*/
838 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
839 {
840   PetscBool         iascii;
841   PetscViewerFormat format;
842 
843   PetscFunctionBegin;
844   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
845   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer));
846   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
847   PetscCheckSameComm(sf,1,viewer,2);
848   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
849   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
850   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
851     PetscMPIInt rank;
852     PetscInt    ii,i,j;
853 
854     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer));
855     PetscCall(PetscViewerASCIIPushTab(viewer));
856     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
857       if (!sf->graphset) {
858         PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n"));
859         PetscCall(PetscViewerASCIIPopTab(viewer));
860         PetscFunctionReturn(0);
861       }
862       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
863       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
864       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks));
865       for (i=0; i<sf->nleaves; i++) {
866         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index));
867       }
868       PetscCall(PetscViewerFlush(viewer));
869       PetscCall(PetscViewerGetFormat(viewer,&format));
870       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
871         PetscMPIInt *tmpranks,*perm;
872         PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm));
873         PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks));
874         for (i=0; i<sf->nranks; i++) perm[i] = i;
875         PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm));
876         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank));
877         for (ii=0; ii<sf->nranks; ii++) {
878           i = perm[ii];
879           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]));
880           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
881             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]));
882           }
883         }
884         PetscCall(PetscFree2(tmpranks,perm));
885       }
886       PetscCall(PetscViewerFlush(viewer));
887       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
888     }
889     PetscCall(PetscViewerASCIIPopTab(viewer));
890   }
891   if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer));
892   PetscFunctionReturn(0);
893 }
894 
895 /*@C
896    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
897 
898    Not Collective
899 
900    Input Parameter:
901 .  sf - star forest
902 
903    Output Parameters:
904 +  nranks - number of ranks referenced by local part
905 .  ranks - array of ranks
906 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
907 .  rmine - concatenated array holding local indices referencing each remote rank
908 -  rremote - concatenated array holding remote indices referenced for each remote rank
909 
910    Level: developer
911 
912 .seealso: PetscSFGetLeafRanks()
913 @*/
914 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
915 {
916   PetscFunctionBegin;
917   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
918   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
919   if (sf->ops->GetRootRanks) {
920     PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote));
921   } else {
922     /* The generic implementation */
923     if (nranks)  *nranks  = sf->nranks;
924     if (ranks)   *ranks   = sf->ranks;
925     if (roffset) *roffset = sf->roffset;
926     if (rmine)   *rmine   = sf->rmine;
927     if (rremote) *rremote = sf->rremote;
928   }
929   PetscFunctionReturn(0);
930 }
931 
932 /*@C
933    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
934 
935    Not Collective
936 
937    Input Parameter:
938 .  sf - star forest
939 
940    Output Parameters:
941 +  niranks - number of leaf ranks referencing roots on this process
942 .  iranks - array of ranks
943 .  ioffset - offset in irootloc for each rank (length niranks+1)
944 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
945 
946    Level: developer
947 
948 .seealso: PetscSFGetRootRanks()
949 @*/
950 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
951 {
952   PetscFunctionBegin;
953   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
954   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
955   if (sf->ops->GetLeafRanks) {
956     PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc));
957   } else {
958     PetscSFType type;
959     PetscCall(PetscSFGetType(sf,&type));
960     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
961   }
962   PetscFunctionReturn(0);
963 }
964 
965 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
966   PetscInt i;
967   for (i=0; i<n; i++) {
968     if (needle == list[i]) return PETSC_TRUE;
969   }
970   return PETSC_FALSE;
971 }
972 
973 /*@C
974    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
975 
976    Collective
977 
978    Input Parameters:
979 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
980 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
981 
982    Level: developer
983 
984 .seealso: PetscSFGetRootRanks()
985 @*/
986 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
987 {
988   PetscTable         table;
989   PetscTablePosition pos;
990   PetscMPIInt        size,groupsize,*groupranks;
991   PetscInt           *rcount,*ranks;
992   PetscInt           i, irank = -1,orank = -1;
993 
994   PetscFunctionBegin;
995   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
996   PetscSFCheckGraphSet(sf,1);
997   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
998   PetscCall(PetscTableCreate(10,size,&table));
999   for (i=0; i<sf->nleaves; i++) {
1000     /* Log 1-based rank */
1001     PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES));
1002   }
1003   PetscCall(PetscTableGetCount(table,&sf->nranks));
1004   PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote));
1005   PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks));
1006   PetscCall(PetscTableGetHeadPosition(table,&pos));
1007   for (i=0; i<sf->nranks; i++) {
1008     PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]));
1009     ranks[i]--;             /* Convert back to 0-based */
1010   }
1011   PetscCall(PetscTableDestroy(&table));
1012 
1013   /* We expect that dgroup is reliably "small" while nranks could be large */
1014   {
1015     MPI_Group group = MPI_GROUP_NULL;
1016     PetscMPIInt *dgroupranks;
1017     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1018     PetscCallMPI(MPI_Group_size(dgroup,&groupsize));
1019     PetscCall(PetscMalloc1(groupsize,&dgroupranks));
1020     PetscCall(PetscMalloc1(groupsize,&groupranks));
1021     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1022     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks));
1023     PetscCallMPI(MPI_Group_free(&group));
1024     PetscCall(PetscFree(dgroupranks));
1025   }
1026 
1027   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1028   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1029     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1030       if (InList(ranks[i],groupsize,groupranks)) break;
1031     }
1032     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1033       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1034     }
1035     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1036       PetscInt tmprank,tmpcount;
1037 
1038       tmprank             = ranks[i];
1039       tmpcount            = rcount[i];
1040       ranks[i]            = ranks[sf->ndranks];
1041       rcount[i]           = rcount[sf->ndranks];
1042       ranks[sf->ndranks]  = tmprank;
1043       rcount[sf->ndranks] = tmpcount;
1044       sf->ndranks++;
1045     }
1046   }
1047   PetscCall(PetscFree(groupranks));
1048   PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount));
1049   PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks));
1050   sf->roffset[0] = 0;
1051   for (i=0; i<sf->nranks; i++) {
1052     PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i));
1053     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1054     rcount[i]        = 0;
1055   }
1056   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1057     /* short circuit */
1058     if (orank != sf->remote[i].rank) {
1059       /* Search for index of iremote[i].rank in sf->ranks */
1060       PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank));
1061       if (irank < 0) {
1062         PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank));
1063         if (irank >= 0) irank += sf->ndranks;
1064       }
1065       orank = sf->remote[i].rank;
1066     }
1067     PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1068     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1069     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1070     rcount[irank]++;
1071   }
1072   PetscCall(PetscFree2(rcount,ranks));
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*@C
1077    PetscSFGetGroups - gets incoming and outgoing process groups
1078 
1079    Collective
1080 
1081    Input Parameter:
1082 .  sf - star forest
1083 
1084    Output Parameters:
1085 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1086 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1087 
1088    Level: developer
1089 
1090 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1091 @*/
1092 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1093 {
1094   MPI_Group      group = MPI_GROUP_NULL;
1095 
1096   PetscFunctionBegin;
1097   PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1098   if (sf->ingroup == MPI_GROUP_NULL) {
1099     PetscInt       i;
1100     const PetscInt *indegree;
1101     PetscMPIInt    rank,*outranks,*inranks;
1102     PetscSFNode    *remote;
1103     PetscSF        bgcount;
1104 
1105     /* Compute the number of incoming ranks */
1106     PetscCall(PetscMalloc1(sf->nranks,&remote));
1107     for (i=0; i<sf->nranks; i++) {
1108       remote[i].rank  = sf->ranks[i];
1109       remote[i].index = 0;
1110     }
1111     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount));
1112     PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1113     PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree));
1114     PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree));
1115     /* Enumerate the incoming ranks */
1116     PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks));
1117     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1118     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1119     PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks));
1120     PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks));
1121     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1122     PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup));
1123     PetscCallMPI(MPI_Group_free(&group));
1124     PetscCall(PetscFree2(inranks,outranks));
1125     PetscCall(PetscSFDestroy(&bgcount));
1126   }
1127   *incoming = sf->ingroup;
1128 
1129   if (sf->outgroup == MPI_GROUP_NULL) {
1130     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1131     PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup));
1132     PetscCallMPI(MPI_Group_free(&group));
1133   }
1134   *outgoing = sf->outgroup;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 /*@
1139    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1140 
1141    Collective
1142 
1143    Input Parameter:
1144 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1145 
1146    Output Parameter:
1147 .  multi - star forest with split roots, such that each root has degree exactly 1
1148 
1149    Level: developer
1150 
1151    Notes:
1152 
1153    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1154    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1155    edge, it is a candidate for future optimization that might involve its removal.
1156 
1157 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1158 @*/
1159 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1160 {
1161   PetscFunctionBegin;
1162   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1163   PetscValidPointer(multi,2);
1164   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1165     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1166     *multi = sf->multi;
1167     sf->multi->multi = sf->multi;
1168     PetscFunctionReturn(0);
1169   }
1170   if (!sf->multi) {
1171     const PetscInt *indegree;
1172     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1173     PetscSFNode    *remote;
1174     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1175     PetscCall(PetscSFComputeDegreeBegin(sf,&indegree));
1176     PetscCall(PetscSFComputeDegreeEnd(sf,&indegree));
1177     PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset));
1178     inoffset[0] = 0;
1179     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1180     for (i=0; i<maxlocal; i++) outones[i] = 1;
1181     PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1182     PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1183     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1184     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1185       for (i=0; i<sf->nroots; i++) {
1186         PetscCheckFalse(inoffset[i] + indegree[i] != inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1187       }
1188     }
1189     PetscCall(PetscMalloc1(sf->nleaves,&remote));
1190     for (i=0; i<sf->nleaves; i++) {
1191       remote[i].rank  = sf->remote[i].rank;
1192       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1193     }
1194     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1195     sf->multi->multi = sf->multi;
1196     PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1197     if (sf->rankorder) {        /* Sort the ranks */
1198       PetscMPIInt rank;
1199       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1200       PetscSFNode *newremote;
1201       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1202       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1203       PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset));
1204       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1205       PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1206       PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1207       /* Sort the incoming ranks at each vertex, build the inverse map */
1208       for (i=0; i<sf->nroots; i++) {
1209         PetscInt j;
1210         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1211         PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset));
1212         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1213       }
1214       PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1215       PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1216       PetscCall(PetscMalloc1(sf->nleaves,&newremote));
1217       for (i=0; i<sf->nleaves; i++) {
1218         newremote[i].rank  = sf->remote[i].rank;
1219         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1220       }
1221       PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER));
1222       PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset));
1223     }
1224     PetscCall(PetscFree3(inoffset,outones,outoffset));
1225   }
1226   *multi = sf->multi;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 /*@C
1231    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1232 
1233    Collective
1234 
1235    Input Parameters:
1236 +  sf - original star forest
1237 .  nselected  - number of selected roots on this process
1238 -  selected   - indices of the selected roots on this process
1239 
1240    Output Parameter:
1241 .  esf - new star forest
1242 
1243    Level: advanced
1244 
1245    Note:
1246    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1247    be done by calling PetscSFGetGraph().
1248 
1249 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1250 @*/
1251 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1252 {
1253   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1254   const PetscInt    *ilocal;
1255   signed char       *rootdata,*leafdata,*leafmem;
1256   const PetscSFNode *iremote;
1257   PetscSFNode       *new_iremote;
1258   MPI_Comm          comm;
1259 
1260   PetscFunctionBegin;
1261   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1262   PetscSFCheckGraphSet(sf,1);
1263   if (nselected) PetscValidIntPointer(selected,3);
1264   PetscValidPointer(esf,4);
1265 
1266   PetscCall(PetscSFSetUp(sf));
1267   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0));
1268   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1269   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
1270 
1271   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1272     PetscBool dups;
1273     PetscCall(PetscCheckDupsInt(nselected,selected,&dups));
1274     PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1275     for (i=0; i<nselected; i++)
1276       PetscCheckFalse(selected[i] < 0 || selected[i] >= nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1277   }
1278 
1279   if (sf->ops->CreateEmbeddedRootSF) {
1280     PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf));
1281   } else {
1282     /* A generic version of creating embedded sf */
1283     PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
1284     maxlocal = maxleaf - minleaf + 1;
1285     PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
1286     leafdata = leafmem - minleaf;
1287     /* Tag selected roots and bcast to leaves */
1288     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1289     PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1290     PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1291 
1292     /* Build esf with leaves that are still connected */
1293     esf_nleaves = 0;
1294     for (i=0; i<nleaves; i++) {
1295       j = ilocal ? ilocal[i] : i;
1296       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1297          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1298       */
1299       esf_nleaves += (leafdata[j] ? 1 : 0);
1300     }
1301     PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal));
1302     PetscCall(PetscMalloc1(esf_nleaves,&new_iremote));
1303     for (i=n=0; i<nleaves; i++) {
1304       j = ilocal ? ilocal[i] : i;
1305       if (leafdata[j]) {
1306         new_ilocal[n]        = j;
1307         new_iremote[n].rank  = iremote[i].rank;
1308         new_iremote[n].index = iremote[i].index;
1309         ++n;
1310       }
1311     }
1312     PetscCall(PetscSFCreate(comm,esf));
1313     PetscCall(PetscSFSetFromOptions(*esf));
1314     PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1315     PetscCall(PetscFree2(rootdata,leafmem));
1316   }
1317   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0));
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /*@C
1322   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1323 
1324   Collective
1325 
1326   Input Parameters:
1327 + sf - original star forest
1328 . nselected  - number of selected leaves on this process
1329 - selected   - indices of the selected leaves on this process
1330 
1331   Output Parameter:
1332 .  newsf - new star forest
1333 
1334   Level: advanced
1335 
1336 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1337 @*/
1338 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1339 {
1340   const PetscSFNode *iremote;
1341   PetscSFNode       *new_iremote;
1342   const PetscInt    *ilocal;
1343   PetscInt          i,nroots,*leaves,*new_ilocal;
1344   MPI_Comm          comm;
1345 
1346   PetscFunctionBegin;
1347   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1348   PetscSFCheckGraphSet(sf,1);
1349   if (nselected) PetscValidIntPointer(selected,3);
1350   PetscValidPointer(newsf,4);
1351 
1352   /* Uniq selected[] and put results in leaves[] */
1353   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1354   PetscCall(PetscMalloc1(nselected,&leaves));
1355   PetscCall(PetscArraycpy(leaves,selected,nselected));
1356   PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves));
1357   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1358 
1359   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1360   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1361     PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf));
1362   } else {
1363     PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote));
1364     PetscCall(PetscMalloc1(nselected,&new_ilocal));
1365     PetscCall(PetscMalloc1(nselected,&new_iremote));
1366     for (i=0; i<nselected; ++i) {
1367       const PetscInt l     = leaves[i];
1368       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1369       new_iremote[i].rank  = iremote[l].rank;
1370       new_iremote[i].index = iremote[l].index;
1371     }
1372     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf));
1373     PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1374   }
1375   PetscCall(PetscFree(leaves));
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 /*@C
1380    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1381 
1382    Collective on PetscSF
1383 
1384    Input Parameters:
1385 +  sf - star forest on which to communicate
1386 .  unit - data type associated with each node
1387 .  rootdata - buffer to broadcast
1388 -  op - operation to use for reduction
1389 
1390    Output Parameter:
1391 .  leafdata - buffer to be reduced with values from each leaf's respective root
1392 
1393    Level: intermediate
1394 
1395    Notes:
1396     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1397     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1398     use PetscSFBcastWithMemTypeBegin() instead.
1399 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1400 @*/
1401 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1402 {
1403   PetscMemType   rootmtype,leafmtype;
1404 
1405   PetscFunctionBegin;
1406   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1407   PetscCall(PetscSFSetUp(sf));
1408   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1409   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1410   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1411   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1412   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /*@C
1417    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1418 
1419    Collective on PetscSF
1420 
1421    Input Parameters:
1422 +  sf - star forest on which to communicate
1423 .  unit - data type associated with each node
1424 .  rootmtype - memory type of rootdata
1425 .  rootdata - buffer to broadcast
1426 .  leafmtype - memory type of leafdata
1427 -  op - operation to use for reduction
1428 
1429    Output Parameter:
1430 .  leafdata - buffer to be reduced with values from each leaf's respective root
1431 
1432    Level: intermediate
1433 
1434 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1435 @*/
1436 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1437 {
1438   PetscFunctionBegin;
1439   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1440   PetscCall(PetscSFSetUp(sf));
1441   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1442   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1443   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 /*@C
1448    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1449 
1450    Collective
1451 
1452    Input Parameters:
1453 +  sf - star forest
1454 .  unit - data type
1455 .  rootdata - buffer to broadcast
1456 -  op - operation to use for reduction
1457 
1458    Output Parameter:
1459 .  leafdata - buffer to be reduced with values from each leaf's respective root
1460 
1461    Level: intermediate
1462 
1463 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1464 @*/
1465 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1466 {
1467   PetscFunctionBegin;
1468   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1469   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0));
1470   PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op));
1471   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0));
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 /*@C
1476    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1477 
1478    Collective
1479 
1480    Input Parameters:
1481 +  sf - star forest
1482 .  unit - data type
1483 .  leafdata - values to reduce
1484 -  op - reduction operation
1485 
1486    Output Parameter:
1487 .  rootdata - result of reduction of values from all leaves of each root
1488 
1489    Level: intermediate
1490 
1491    Notes:
1492     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1493     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1494     use PetscSFReduceWithMemTypeBegin() instead.
1495 
1496 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1497 @*/
1498 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1499 {
1500   PetscMemType   rootmtype,leafmtype;
1501 
1502   PetscFunctionBegin;
1503   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1504   PetscCall(PetscSFSetUp(sf));
1505   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1506   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1507   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1508   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1509   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 /*@C
1514    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1515 
1516    Collective
1517 
1518    Input Parameters:
1519 +  sf - star forest
1520 .  unit - data type
1521 .  leafmtype - memory type of leafdata
1522 .  leafdata - values to reduce
1523 .  rootmtype - memory type of rootdata
1524 -  op - reduction operation
1525 
1526    Output Parameter:
1527 .  rootdata - result of reduction of values from all leaves of each root
1528 
1529    Level: intermediate
1530 
1531 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1532 @*/
1533 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1534 {
1535   PetscFunctionBegin;
1536   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1537   PetscCall(PetscSFSetUp(sf));
1538   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1539   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1540   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 /*@C
1545    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1546 
1547    Collective
1548 
1549    Input Parameters:
1550 +  sf - star forest
1551 .  unit - data type
1552 .  leafdata - values to reduce
1553 -  op - reduction operation
1554 
1555    Output Parameter:
1556 .  rootdata - result of reduction of values from all leaves of each root
1557 
1558    Level: intermediate
1559 
1560 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1561 @*/
1562 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1563 {
1564   PetscFunctionBegin;
1565   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1566   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0));
1567   PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op));
1568   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0));
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 /*@C
1573    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1574 
1575    Collective
1576 
1577    Input Parameters:
1578 +  sf - star forest
1579 .  unit - data type
1580 .  leafdata - leaf values to use in reduction
1581 -  op - operation to use for reduction
1582 
1583    Output Parameters:
1584 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1585 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1586 
1587    Level: advanced
1588 
1589    Note:
1590    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1591    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1592    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1593    integers.
1594 
1595 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1596 @*/
1597 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1598 {
1599   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1600 
1601   PetscFunctionBegin;
1602   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1603   PetscCall(PetscSFSetUp(sf));
1604   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1605   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1606   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1607   PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype));
1608   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1609   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1610   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 /*@C
1615    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1616 
1617    Collective
1618 
1619    Input Parameters:
1620 +  sf - star forest
1621 .  unit - data type
1622 .  rootmtype - memory type of rootdata
1623 .  leafmtype - memory type of leafdata
1624 .  leafdata - leaf values to use in reduction
1625 .  leafupdatemtype - memory type of leafupdate
1626 -  op - operation to use for reduction
1627 
1628    Output Parameters:
1629 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1630 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1631 
1632    Level: advanced
1633 
1634    Note: See PetscSFFetchAndOpBegin() for more details.
1635 
1636 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1637 @*/
1638 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1639 {
1640   PetscFunctionBegin;
1641   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1642   PetscCall(PetscSFSetUp(sf));
1643   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1644   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1645   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1646   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 /*@C
1651    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1652 
1653    Collective
1654 
1655    Input Parameters:
1656 +  sf - star forest
1657 .  unit - data type
1658 .  leafdata - leaf values to use in reduction
1659 -  op - operation to use for reduction
1660 
1661    Output Parameters:
1662 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1663 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1664 
1665    Level: advanced
1666 
1667 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1668 @*/
1669 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1670 {
1671   PetscFunctionBegin;
1672   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1673   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1674   PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op));
1675   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 /*@C
1680    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1681 
1682    Collective
1683 
1684    Input Parameter:
1685 .  sf - star forest
1686 
1687    Output Parameter:
1688 .  degree - degree of each root vertex
1689 
1690    Level: advanced
1691 
1692    Notes:
1693    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1694 
1695 .seealso: PetscSFGatherBegin()
1696 @*/
1697 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1698 {
1699   PetscFunctionBegin;
1700   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1701   PetscSFCheckGraphSet(sf,1);
1702   PetscValidPointer(degree,2);
1703   if (!sf->degreeknown) {
1704     PetscInt i, nroots = sf->nroots, maxlocal;
1705     PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1706     maxlocal = sf->maxleaf-sf->minleaf+1;
1707     PetscCall(PetscMalloc1(nroots,&sf->degree));
1708     PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1709     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1710     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1711     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1712   }
1713   *degree = NULL;
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 /*@C
1718    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1719 
1720    Collective
1721 
1722    Input Parameter:
1723 .  sf - star forest
1724 
1725    Output Parameter:
1726 .  degree - degree of each root vertex
1727 
1728    Level: developer
1729 
1730    Notes:
1731    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1732 
1733 .seealso:
1734 @*/
1735 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1736 {
1737   PetscFunctionBegin;
1738   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1739   PetscSFCheckGraphSet(sf,1);
1740   PetscValidPointer(degree,2);
1741   if (!sf->degreeknown) {
1742     PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1743     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1744     PetscCall(PetscFree(sf->degreetmp));
1745     sf->degreeknown = PETSC_TRUE;
1746   }
1747   *degree = sf->degree;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 /*@C
1752    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1753    Each multi-root is assigned index of the corresponding original root.
1754 
1755    Collective
1756 
1757    Input Parameters:
1758 +  sf - star forest
1759 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1760 
1761    Output Parameters:
1762 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1763 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1764 
1765    Level: developer
1766 
1767    Notes:
1768    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1769 
1770 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1771 @*/
1772 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1773 {
1774   PetscSF             msf;
1775   PetscInt            i, j, k, nroots, nmroots;
1776 
1777   PetscFunctionBegin;
1778   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1779   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1780   if (nroots) PetscValidIntPointer(degree,2);
1781   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1782   PetscValidPointer(multiRootsOrigNumbering,4);
1783   PetscCall(PetscSFGetMultiSF(sf,&msf));
1784   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1785   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1786   for (i=0,j=0,k=0; i<nroots; i++) {
1787     if (!degree[i]) continue;
1788     for (j=0; j<degree[i]; j++,k++) {
1789       (*multiRootsOrigNumbering)[k] = i;
1790     }
1791   }
1792   PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1793   if (nMultiRoots) *nMultiRoots = nmroots;
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 /*@C
1798    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1799 
1800    Collective
1801 
1802    Input Parameters:
1803 +  sf - star forest
1804 .  unit - data type
1805 -  leafdata - leaf data to gather to roots
1806 
1807    Output Parameter:
1808 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1809 
1810    Level: intermediate
1811 
1812 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1813 @*/
1814 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1815 {
1816   PetscSF        multi = NULL;
1817 
1818   PetscFunctionBegin;
1819   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1820   PetscCall(PetscSFSetUp(sf));
1821   PetscCall(PetscSFGetMultiSF(sf,&multi));
1822   PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 /*@C
1827    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1828 
1829    Collective
1830 
1831    Input Parameters:
1832 +  sf - star forest
1833 .  unit - data type
1834 -  leafdata - leaf data to gather to roots
1835 
1836    Output Parameter:
1837 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1838 
1839    Level: intermediate
1840 
1841 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1842 @*/
1843 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1844 {
1845   PetscSF        multi = NULL;
1846 
1847   PetscFunctionBegin;
1848   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1849   PetscCall(PetscSFGetMultiSF(sf,&multi));
1850   PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 /*@C
1855    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1856 
1857    Collective
1858 
1859    Input Parameters:
1860 +  sf - star forest
1861 .  unit - data type
1862 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1863 
1864    Output Parameter:
1865 .  leafdata - leaf data to be update with personal data from each respective root
1866 
1867    Level: intermediate
1868 
1869 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1870 @*/
1871 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1872 {
1873   PetscSF        multi = NULL;
1874 
1875   PetscFunctionBegin;
1876   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1877   PetscCall(PetscSFSetUp(sf));
1878   PetscCall(PetscSFGetMultiSF(sf,&multi));
1879   PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 /*@C
1884    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1885 
1886    Collective
1887 
1888    Input Parameters:
1889 +  sf - star forest
1890 .  unit - data type
1891 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1892 
1893    Output Parameter:
1894 .  leafdata - leaf data to be update with personal data from each respective root
1895 
1896    Level: intermediate
1897 
1898 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1899 @*/
1900 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1901 {
1902   PetscSF        multi = NULL;
1903 
1904   PetscFunctionBegin;
1905   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1906   PetscCall(PetscSFGetMultiSF(sf,&multi));
1907   PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1912 {
1913   PetscInt        i, n, nleaves;
1914   const PetscInt *ilocal = NULL;
1915   PetscHSetI      seen;
1916 
1917   PetscFunctionBegin;
1918   if (PetscDefined(USE_DEBUG)) {
1919     PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL));
1920     PetscCall(PetscHSetICreate(&seen));
1921     for (i = 0; i < nleaves; i++) {
1922       const PetscInt leaf = ilocal ? ilocal[i] : i;
1923       PetscCall(PetscHSetIAdd(seen,leaf));
1924     }
1925     PetscCall(PetscHSetIGetSize(seen,&n));
1926     PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1927     PetscCall(PetscHSetIDestroy(&seen));
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 /*@
1933   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1934 
1935   Input Parameters:
1936 + sfA - The first PetscSF
1937 - sfB - The second PetscSF
1938 
1939   Output Parameters:
1940 . sfBA - The composite SF
1941 
1942   Level: developer
1943 
1944   Notes:
1945   Currently, the two SFs must be defined on congruent communicators and they must be true star
1946   forests, i.e. the same leaf is not connected with different roots.
1947 
1948   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1949   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1950   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1951   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1952 
1953 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1954 @*/
1955 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1956 {
1957   const PetscSFNode *remotePointsA,*remotePointsB;
1958   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1959   const PetscInt    *localPointsA,*localPointsB;
1960   PetscInt          *localPointsBA;
1961   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1962   PetscBool         denseB;
1963 
1964   PetscFunctionBegin;
1965   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1966   PetscSFCheckGraphSet(sfA,1);
1967   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1968   PetscSFCheckGraphSet(sfB,2);
1969   PetscCheckSameComm(sfA,1,sfB,2);
1970   PetscValidPointer(sfBA,3);
1971   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1972   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1973 
1974   PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA));
1975   PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB));
1976   if (localPointsA) {
1977     PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA));
1978     for (i=0; i<numRootsB; i++) {
1979       reorderedRemotePointsA[i].rank = -1;
1980       reorderedRemotePointsA[i].index = -1;
1981     }
1982     for (i=0; i<numLeavesA; i++) {
1983       if (localPointsA[i] >= numRootsB) continue;
1984       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1985     }
1986     remotePointsA = reorderedRemotePointsA;
1987   }
1988   PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf));
1989   PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB));
1990   PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1991   PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1992   PetscCall(PetscFree(reorderedRemotePointsA));
1993 
1994   denseB = (PetscBool)!localPointsB;
1995   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1996     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1997     else numLeavesBA++;
1998   }
1999   if (denseB) {
2000     localPointsBA  = NULL;
2001     remotePointsBA = leafdataB;
2002   } else {
2003     PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA));
2004     PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA));
2005     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2006       const PetscInt l = localPointsB ? localPointsB[i] : i;
2007 
2008       if (leafdataB[l-minleaf].rank == -1) continue;
2009       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2010       localPointsBA[numLeavesBA] = l;
2011       numLeavesBA++;
2012     }
2013     PetscCall(PetscFree(leafdataB));
2014   }
2015   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2016   PetscCall(PetscSFSetFromOptions(*sfBA));
2017   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 /*@
2022   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2023 
2024   Input Parameters:
2025 + sfA - The first PetscSF
2026 - sfB - The second PetscSF
2027 
2028   Output Parameters:
2029 . sfBA - The composite SF.
2030 
2031   Level: developer
2032 
2033   Notes:
2034   Currently, the two SFs must be defined on congruent communicators and they must be true star
2035   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2036   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2037 
2038   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2039   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2040   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2041   a Reduce on sfB, on connected roots.
2042 
2043 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2044 @*/
2045 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2046 {
2047   const PetscSFNode *remotePointsA,*remotePointsB;
2048   PetscSFNode       *remotePointsBA;
2049   const PetscInt    *localPointsA,*localPointsB;
2050   PetscSFNode       *reorderedRemotePointsA = NULL;
2051   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2052   MPI_Op            op;
2053 #if defined(PETSC_USE_64BIT_INDICES)
2054   PetscBool         iswin;
2055 #endif
2056 
2057   PetscFunctionBegin;
2058   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2059   PetscSFCheckGraphSet(sfA,1);
2060   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2061   PetscSFCheckGraphSet(sfB,2);
2062   PetscCheckSameComm(sfA,1,sfB,2);
2063   PetscValidPointer(sfBA,3);
2064   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2065   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2066 
2067   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2068   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2069 
2070   /* TODO: Check roots of sfB have degree of 1 */
2071   /* Once we implement it, we can replace the MPI_MAXLOC
2072      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2073      We use MPI_MAXLOC only to have a deterministic output from this routine if
2074      the root condition is not meet.
2075    */
2076   op = MPI_MAXLOC;
2077 #if defined(PETSC_USE_64BIT_INDICES)
2078   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2079   PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin));
2080   if (iswin) op = MPI_REPLACE;
2081 #endif
2082 
2083   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2084   PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA));
2085   for (i=0; i<maxleaf - minleaf + 1; i++) {
2086     reorderedRemotePointsA[i].rank = -1;
2087     reorderedRemotePointsA[i].index = -1;
2088   }
2089   if (localPointsA) {
2090     for (i=0; i<numLeavesA; i++) {
2091       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2092       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2093     }
2094   } else {
2095     for (i=0; i<numLeavesA; i++) {
2096       if (i > maxleaf || i < minleaf) continue;
2097       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2098     }
2099   }
2100 
2101   PetscCall(PetscMalloc1(numRootsB,&localPointsBA));
2102   PetscCall(PetscMalloc1(numRootsB,&remotePointsBA));
2103   for (i=0; i<numRootsB; i++) {
2104     remotePointsBA[i].rank = -1;
2105     remotePointsBA[i].index = -1;
2106   }
2107 
2108   PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2109   PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2110   PetscCall(PetscFree(reorderedRemotePointsA));
2111   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2112     if (remotePointsBA[i].rank == -1) continue;
2113     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2114     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2115     localPointsBA[numLeavesBA] = i;
2116     numLeavesBA++;
2117   }
2118   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2119   PetscCall(PetscSFSetFromOptions(*sfBA));
2120   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 /*
2125   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2126 
2127   Input Parameters:
2128 . sf - The global PetscSF
2129 
2130   Output Parameters:
2131 . out - The local PetscSF
2132  */
2133 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2134 {
2135   MPI_Comm           comm;
2136   PetscMPIInt        myrank;
2137   const PetscInt     *ilocal;
2138   const PetscSFNode  *iremote;
2139   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2140   PetscSFNode        *liremote;
2141   PetscSF            lsf;
2142 
2143   PetscFunctionBegin;
2144   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2145   if (sf->ops->CreateLocalSF) {
2146     PetscCall((*sf->ops->CreateLocalSF)(sf,out));
2147   } else {
2148     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2149     PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
2150     PetscCallMPI(MPI_Comm_rank(comm,&myrank));
2151 
2152     /* Find out local edges and build a local SF */
2153     PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
2154     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2155     PetscCall(PetscMalloc1(lnleaves,&lilocal));
2156     PetscCall(PetscMalloc1(lnleaves,&liremote));
2157 
2158     for (i=j=0; i<nleaves; i++) {
2159       if (iremote[i].rank == (PetscInt)myrank) {
2160         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2161         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2162         liremote[j].index = iremote[i].index;
2163         j++;
2164       }
2165     }
2166     PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf));
2167     PetscCall(PetscSFSetFromOptions(lsf));
2168     PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER));
2169     PetscCall(PetscSFSetUp(lsf));
2170     *out = lsf;
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2176 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2177 {
2178   PetscMemType       rootmtype,leafmtype;
2179 
2180   PetscFunctionBegin;
2181   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2182   PetscCall(PetscSFSetUp(sf));
2183   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
2184   PetscCall(PetscGetMemType(rootdata,&rootmtype));
2185   PetscCall(PetscGetMemType(leafdata,&leafmtype));
2186   if (sf->ops->BcastToZero) {
2187     PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata));
2188   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2189   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /*@
2194   PetscSFConcatenate - concatenate multiple SFs into one
2195 
2196   Input Parameters:
2197 + comm - the communicator
2198 . nsfs - the number of input PetscSF
2199 . sfs  - the array of input PetscSF
2200 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2201 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2202 
2203   Output Parameters:
2204 . newsf - The resulting PetscSF
2205 
2206   Level: developer
2207 
2208   Notes:
2209   The communicator of all SFs in sfs must be comm.
2210 
2211   The offsets in leafOffsets are added to the original leaf indices.
2212 
2213   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2214   In this case, NULL is also passed as ilocal to the resulting SF.
2215 
2216   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2217   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2218 
2219 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2220 @*/
2221 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2222 {
2223   PetscInt            i, s, nLeaves, nRoots;
2224   PetscInt           *leafArrayOffsets;
2225   PetscInt           *ilocal_new;
2226   PetscSFNode        *iremote_new;
2227   PetscInt           *rootOffsets;
2228   PetscBool           all_ilocal_null = PETSC_FALSE;
2229   PetscMPIInt         rank;
2230 
2231   PetscFunctionBegin;
2232   {
2233     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2234 
2235     PetscCall(PetscSFCreate(comm,&dummy));
2236     PetscValidLogicalCollectiveInt(dummy,nsfs,2);
2237     PetscValidPointer(sfs,3);
2238     for (i=0; i<nsfs; i++) {
2239       PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3);
2240       PetscCheckSameComm(dummy,1,sfs[i],3);
2241     }
2242     PetscValidLogicalCollectiveBool(dummy,shareRoots,4);
2243     if (leafOffsets) PetscValidIntPointer(leafOffsets,5);
2244     PetscValidPointer(newsf,6);
2245     PetscCall(PetscSFDestroy(&dummy));
2246   }
2247   if (!nsfs) {
2248     PetscCall(PetscSFCreate(comm, newsf));
2249     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2250     PetscFunctionReturn(0);
2251   }
2252   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2253 
2254   PetscCall(PetscCalloc1(nsfs+1, &rootOffsets));
2255   if (shareRoots) {
2256     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2257     if (PetscDefined(USE_DEBUG)) {
2258       for (s=1; s<nsfs; s++) {
2259         PetscInt nr;
2260 
2261         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2262         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2263       }
2264     }
2265   } else {
2266     for (s=0; s<nsfs; s++) {
2267       PetscInt nr;
2268 
2269       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2270       rootOffsets[s+1] = rootOffsets[s] + nr;
2271     }
2272     nRoots = rootOffsets[nsfs];
2273   }
2274 
2275   /* Calculate leaf array offsets and automatic root offsets */
2276   PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets));
2277   leafArrayOffsets[0] = 0;
2278   for (s=0; s<nsfs; s++) {
2279     PetscInt        nl;
2280 
2281     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2282     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2283   }
2284   nLeaves = leafArrayOffsets[nsfs];
2285 
2286   if (!leafOffsets) {
2287     all_ilocal_null = PETSC_TRUE;
2288     for (s=0; s<nsfs; s++) {
2289       const PetscInt *ilocal;
2290 
2291       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2292       if (ilocal) {
2293         all_ilocal_null = PETSC_FALSE;
2294         break;
2295       }
2296     }
2297     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2298   }
2299 
2300   /* Renumber and concatenate local leaves */
2301   ilocal_new = NULL;
2302   if (!all_ilocal_null) {
2303     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2304     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2305     for (s = 0; s<nsfs; s++) {
2306       const PetscInt   *ilocal;
2307       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2308       PetscInt          i, nleaves_l;
2309 
2310       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2311       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2312     }
2313   }
2314 
2315   /* Renumber and concatenate remote roots */
2316   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2317   for (i = 0; i < nLeaves; i++) {
2318     iremote_new[i].rank   = -1;
2319     iremote_new[i].index  = -1;
2320   }
2321   for (s = 0; s<nsfs; s++) {
2322     PetscInt            i, nl, nr;
2323     PetscSF             tmp_sf;
2324     const PetscSFNode  *iremote;
2325     PetscSFNode        *tmp_rootdata;
2326     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2327 
2328     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2329     PetscCall(PetscSFCreate(comm, &tmp_sf));
2330     /* create helper SF with contiguous leaves */
2331     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES));
2332     PetscCall(PetscSFSetUp(tmp_sf));
2333     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2334     for (i = 0; i < nr; i++) {
2335       tmp_rootdata[i].index = i + rootOffsets[s];
2336       tmp_rootdata[i].rank  = (PetscInt) rank;
2337     }
2338     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2339     PetscCall(PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2340     PetscCall(PetscSFDestroy(&tmp_sf));
2341     PetscCall(PetscFree(tmp_rootdata));
2342   }
2343 
2344   /* Build the new SF */
2345   PetscCall(PetscSFCreate(comm, newsf));
2346   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2347   PetscCall(PetscSFSetUp(*newsf));
2348   PetscCall(PetscFree(rootOffsets));
2349   PetscCall(PetscFree(leafArrayOffsets));
2350   PetscFunctionReturn(0);
2351 }
2352