xref: /petsc/src/vec/is/sf/interface/sf.c (revision dfd676b1a855b7f967ece75a22ee7f6626d10f89)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petsc/private/viewerimpl.h>
4 #include <petscctable.h>
5 
6 #if defined(PETSC_HAVE_CUDA)
7   #include <cuda_runtime.h>
8 #endif
9 
10 #if defined(PETSC_HAVE_HIP)
11   #include <hip/hip_runtime.h>
12 #endif
13 
14 #if defined(PETSC_CLANG_STATIC_ANALYZER)
15 void PetscSFCheckGraphSet(PetscSF,int);
16 #else
17 #if defined(PETSC_USE_DEBUG)
18 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
19     if (PetscUnlikely(!(sf)->graphset))                              \
20       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21   } while (0)
22 #else
23 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24 #endif
25 #endif
26 
27 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
28 
29 /*@
30    PetscSFCreate - create a star forest communication context
31 
32    Collective
33 
34    Input Parameter:
35 .  comm - communicator on which the star forest will operate
36 
37    Output Parameter:
38 .  sf - new star forest context
39 
40    Options Database Keys:
41 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
42 .  -sf_type window    -Use MPI-3 one-sided window for communication
43 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
44 
45    Level: intermediate
46 
47    Notes:
48    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50    SFs are optimized and they have better performance than general SFs.
51 
52 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53 @*/
54 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55 {
56   PetscSF        b;
57 
58   PetscFunctionBegin;
59   PetscValidPointer(sf,2);
60   PetscCall(PetscSFInitializePackage());
61 
62   PetscCall(PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView));
63 
64   b->nroots    = -1;
65   b->nleaves   = -1;
66   b->minleaf   = PETSC_MAX_INT;
67   b->maxleaf   = PETSC_MIN_INT;
68   b->nranks    = -1;
69   b->rankorder = PETSC_TRUE;
70   b->ingroup   = MPI_GROUP_NULL;
71   b->outgroup  = MPI_GROUP_NULL;
72   b->graphset  = PETSC_FALSE;
73 #if defined(PETSC_HAVE_DEVICE)
74   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
75   b->use_stream_aware_mpi = PETSC_FALSE;
76   b->unknown_input_stream= PETSC_FALSE;
77   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
78     b->backend = PETSCSF_BACKEND_KOKKOS;
79   #elif defined(PETSC_HAVE_CUDA)
80     b->backend = PETSCSF_BACKEND_CUDA;
81   #elif defined(PETSC_HAVE_HIP)
82     b->backend = PETSCSF_BACKEND_HIP;
83   #endif
84 
85   #if defined(PETSC_HAVE_NVSHMEM)
86     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
87     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
88     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL));
89     PetscCall(PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL));
90   #endif
91 #endif
92   b->vscat.from_n = -1;
93   b->vscat.to_n   = -1;
94   b->vscat.unit   = MPIU_SCALAR;
95  *sf = b;
96   PetscFunctionReturn(0);
97 }
98 
99 /*@
100    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101 
102    Collective
103 
104    Input Parameter:
105 .  sf - star forest
106 
107    Level: advanced
108 
109 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
110 @*/
111 PetscErrorCode PetscSFReset(PetscSF sf)
112 {
113   PetscFunctionBegin;
114   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
115   if (sf->ops->Reset) PetscCall((*sf->ops->Reset)(sf));
116   sf->nroots   = -1;
117   sf->nleaves  = -1;
118   sf->minleaf  = PETSC_MAX_INT;
119   sf->maxleaf  = PETSC_MIN_INT;
120   sf->mine     = NULL;
121   sf->remote   = NULL;
122   sf->graphset = PETSC_FALSE;
123   PetscCall(PetscFree(sf->mine_alloc));
124   PetscCall(PetscFree(sf->remote_alloc));
125   sf->nranks = -1;
126   PetscCall(PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote));
127   sf->degreeknown = PETSC_FALSE;
128   PetscCall(PetscFree(sf->degree));
129   if (sf->ingroup  != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->ingroup));
130   if (sf->outgroup != MPI_GROUP_NULL) PetscCallMPI(MPI_Group_free(&sf->outgroup));
131   if (sf->multi) sf->multi->multi = NULL;
132   PetscCall(PetscSFDestroy(&sf->multi));
133   PetscCall(PetscLayoutDestroy(&sf->map));
134 
135  #if defined(PETSC_HAVE_DEVICE)
136   for (PetscInt i=0; i<2; i++) PetscCall(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]));
137  #endif
138 
139   sf->setupcalled = PETSC_FALSE;
140   PetscFunctionReturn(0);
141 }
142 
143 /*@C
144    PetscSFSetType - Set the PetscSF communication implementation
145 
146    Collective on PetscSF
147 
148    Input Parameters:
149 +  sf - the PetscSF context
150 -  type - a known method
151 
152    Options Database Key:
153 .  -sf_type <type> - Sets the method; use -help for a list
154    of available methods (for instance, window, basic, neighbor)
155 
156    Notes:
157    See "include/petscsf.h" for available methods (for instance)
158 +    PETSCSFWINDOW - MPI-2/3 one-sided
159 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
160 
161   Level: intermediate
162 
163 .seealso: PetscSFType, PetscSFCreate()
164 @*/
165 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
166 {
167   PetscBool      match;
168   PetscErrorCode (*r)(PetscSF);
169 
170   PetscFunctionBegin;
171   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
172   PetscValidCharPointer(type,2);
173 
174   PetscCall(PetscObjectTypeCompare((PetscObject)sf,type,&match));
175   if (match) PetscFunctionReturn(0);
176 
177   PetscCall(PetscFunctionListFind(PetscSFList,type,&r));
178   PetscCheck(r,PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
179   /* Destroy the previous PetscSF implementation context */
180   if (sf->ops->Destroy) PetscCall((*(sf)->ops->Destroy)(sf));
181   PetscCall(PetscMemzero(sf->ops,sizeof(*sf->ops)));
182   PetscCall(PetscObjectChangeTypeName((PetscObject)sf,type));
183   PetscCall((*r)(sf));
184   PetscFunctionReturn(0);
185 }
186 
187 /*@C
188   PetscSFGetType - Get the PetscSF communication implementation
189 
190   Not Collective
191 
192   Input Parameter:
193 . sf  - the PetscSF context
194 
195   Output Parameter:
196 . type - the PetscSF type name
197 
198   Level: intermediate
199 
200 .seealso: PetscSFSetType(), PetscSFCreate()
201 @*/
202 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
203 {
204   PetscFunctionBegin;
205   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
206   PetscValidPointer(type,2);
207   *type = ((PetscObject)sf)->type_name;
208   PetscFunctionReturn(0);
209 }
210 
211 /*@C
212    PetscSFDestroy - destroy star forest
213 
214    Collective
215 
216    Input Parameter:
217 .  sf - address of star forest
218 
219    Level: intermediate
220 
221 .seealso: PetscSFCreate(), PetscSFReset()
222 @*/
223 PetscErrorCode PetscSFDestroy(PetscSF *sf)
224 {
225   PetscFunctionBegin;
226   if (!*sf) PetscFunctionReturn(0);
227   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
228   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
229   PetscCall(PetscSFReset(*sf));
230   if ((*sf)->ops->Destroy) PetscCall((*(*sf)->ops->Destroy)(*sf));
231   PetscCall(PetscSFDestroy(&(*sf)->vscat.lsf));
232   if ((*sf)->vscat.bs > 1) PetscCallMPI(MPI_Type_free(&(*sf)->vscat.unit));
233   PetscCall(PetscHeaderDestroy(sf));
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
238 {
239   PetscInt           i, nleaves;
240   PetscMPIInt        size;
241   const PetscInt    *ilocal;
242   const PetscSFNode *iremote;
243 
244   PetscFunctionBegin;
245   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
246   PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote));
247   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
248   for (i = 0; i < nleaves; i++) {
249     const PetscInt rank = iremote[i].rank;
250     const PetscInt remote = iremote[i].index;
251     const PetscInt leaf = ilocal ? ilocal[i] : i;
252     PetscCheck(rank >= 0 && rank < size,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be in [0, %d)",rank,i,size);
253     PetscCheck(remote >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%" PetscInt_FMT ") for remote %" PetscInt_FMT " is invalid, should be >= 0",remote,i);
254     PetscCheck(leaf >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%" PetscInt_FMT ") for leaf %" PetscInt_FMT " is invalid, should be >= 0",leaf,i);
255   }
256   PetscFunctionReturn(0);
257 }
258 
259 /*@
260    PetscSFSetUp - set up communication structures
261 
262    Collective
263 
264    Input Parameter:
265 .  sf - star forest communication object
266 
267    Level: beginner
268 
269 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
270 @*/
271 PetscErrorCode PetscSFSetUp(PetscSF sf)
272 {
273   PetscFunctionBegin;
274   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
275   PetscSFCheckGraphSet(sf,1);
276   if (sf->setupcalled) PetscFunctionReturn(0);
277   PetscCall(PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0));
278   PetscCall(PetscSFCheckGraphValid_Private(sf));
279   if (!((PetscObject)sf)->type_name) PetscCall(PetscSFSetType(sf,PETSCSFBASIC)); /* Zero all sf->ops */
280   if (sf->ops->SetUp) PetscCall((*sf->ops->SetUp)(sf));
281 #if defined(PETSC_HAVE_CUDA)
282   if (sf->backend == PETSCSF_BACKEND_CUDA) {
283     sf->ops->Malloc = PetscSFMalloc_CUDA;
284     sf->ops->Free   = PetscSFFree_CUDA;
285   }
286 #endif
287 #if defined(PETSC_HAVE_HIP)
288   if (sf->backend == PETSCSF_BACKEND_HIP) {
289     sf->ops->Malloc = PetscSFMalloc_HIP;
290     sf->ops->Free   = PetscSFFree_HIP;
291   }
292 #endif
293 
294 #
295 #if defined(PETSC_HAVE_KOKKOS)
296   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
297     sf->ops->Malloc = PetscSFMalloc_Kokkos;
298     sf->ops->Free   = PetscSFFree_Kokkos;
299   }
300 #endif
301   PetscCall(PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0));
302   sf->setupcalled = PETSC_TRUE;
303   PetscFunctionReturn(0);
304 }
305 
306 /*@
307    PetscSFSetFromOptions - set PetscSF options using the options database
308 
309    Logically Collective
310 
311    Input Parameter:
312 .  sf - star forest
313 
314    Options Database Keys:
315 +  -sf_type               - implementation type, see PetscSFSetType()
316 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
317 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
318                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
319                             If true, this option only works with -use_gpu_aware_mpi 1.
320 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
321                                If true, this option only works with -use_gpu_aware_mpi 1.
322 
323 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
324                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
325                               the only available is kokkos.
326 
327    Level: intermediate
328 @*/
329 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
330 {
331   PetscSFType    deft;
332   char           type[256];
333   PetscBool      flg;
334 
335   PetscFunctionBegin;
336   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
337   PetscObjectOptionsBegin((PetscObject)sf);
338   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
339   PetscCall(PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg));
340   PetscCall(PetscSFSetType(sf,flg ? type : deft));
341   PetscCall(PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL));
342  #if defined(PETSC_HAVE_DEVICE)
343   {
344     char        backendstr[32] = {0};
345     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
346     /* Change the defaults set in PetscSFCreate() with command line options */
347     PetscCall(PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL));
348     PetscCall(PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL));
349     PetscCall(PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set));
350     PetscCall(PetscStrcasecmp("cuda",backendstr,&isCuda));
351     PetscCall(PetscStrcasecmp("kokkos",backendstr,&isKokkos));
352     PetscCall(PetscStrcasecmp("hip",backendstr,&isHip));
353   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
354     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
355     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
356     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
357     else PetscCheck(!set,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
358   #elif defined(PETSC_HAVE_KOKKOS)
359     PetscCheck(!set || isKokkos,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
360    #endif
361   }
362  #endif
363   if (sf->ops->SetFromOptions) PetscCall((*sf->ops->SetFromOptions)(PetscOptionsObject,sf));
364   PetscOptionsEnd();
365   PetscFunctionReturn(0);
366 }
367 
368 /*@
369    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
370 
371    Logically Collective
372 
373    Input Parameters:
374 +  sf - star forest
375 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
376 
377    Level: advanced
378 
379 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
380 @*/
381 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
382 {
383   PetscFunctionBegin;
384   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
385   PetscValidLogicalCollectiveBool(sf,flg,2);
386   PetscCheck(!sf->multi,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
387   sf->rankorder = flg;
388   PetscFunctionReturn(0);
389 }
390 
391 /*@C
392    PetscSFSetGraph - Set a parallel star forest
393 
394    Collective
395 
396    Input Parameters:
397 +  sf - star forest
398 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
399 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
400 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
401 during setup in debug mode)
402 .  localmode - copy mode for ilocal
403 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
404 during setup in debug mode)
405 -  remotemode - copy mode for iremote
406 
407    Level: intermediate
408 
409    Notes:
410    Leaf indices in ilocal must be unique, otherwise an error occurs.
411 
412    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
413    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
414    PETSc might modify the respective array;
415    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
416    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
417 
418    Fortran Notes:
419    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
420 
421    Developer Notes:
422    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
423    This also allows to compare leaf sets of two SFs easily.
424 
425 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
426 @*/
427 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
428 {
429   PetscBool       unique, contiguous;
430 
431   PetscFunctionBegin;
432   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
433   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
434   if (nleaves > 0) PetscValidPointer(iremote,6);
435   PetscCheck(nroots  >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %" PetscInt_FMT ", cannot be negative",nroots);
436   PetscCheck(nleaves >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %" PetscInt_FMT ", cannot be negative",nleaves);
437   /* enums may be handled as unsigned by some compilers, NVHPC for example, the int cast
438    * below is to prevent NVHPC from warning about meaningless comparison of unsigned with zero */
439   PetscCheck((int)localmode  >= PETSC_COPY_VALUES && localmode  <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong localmode %d",localmode);
440   PetscCheck((int)remotemode >= PETSC_COPY_VALUES && remotemode <= PETSC_USE_POINTER,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Wrong remotemode %d",remotemode);
441 
442   if (sf->nroots >= 0) { /* Reset only if graph already set */
443     PetscCall(PetscSFReset(sf));
444   }
445 
446   PetscCall(PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0));
447 
448   sf->nroots  = nroots;
449   sf->nleaves = nleaves;
450 
451   if (localmode == PETSC_COPY_VALUES && ilocal) {
452     PetscInt *tlocal = NULL;
453 
454     PetscCall(PetscMalloc1(nleaves,&tlocal));
455     PetscCall(PetscArraycpy(tlocal,ilocal,nleaves));
456     ilocal = tlocal;
457   }
458   if (remotemode == PETSC_COPY_VALUES) {
459     PetscSFNode *tremote = NULL;
460 
461     PetscCall(PetscMalloc1(nleaves,&tremote));
462     PetscCall(PetscArraycpy(tremote,iremote,nleaves));
463     iremote = tremote;
464   }
465 
466   if (nleaves && ilocal) {
467     PetscSFNode   work;
468 
469     PetscCall(PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work));
470     PetscCall(PetscSortedCheckDupsInt(nleaves, ilocal, &unique));
471     unique = PetscNot(unique);
472     PetscCheck(sf->allow_multi_leaves || unique,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Input ilocal has duplicate entries which is not allowed for this PetscSF");
473     sf->minleaf = ilocal[0];
474     sf->maxleaf = ilocal[nleaves-1];
475     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
476   } else {
477     sf->minleaf = 0;
478     sf->maxleaf = nleaves - 1;
479     unique      = PETSC_TRUE;
480     contiguous  = PETSC_TRUE;
481   }
482 
483   if (contiguous) {
484     if (localmode == PETSC_USE_POINTER) {
485       ilocal = NULL;
486     } else {
487       PetscCall(PetscFree(ilocal));
488     }
489   }
490   sf->mine            = ilocal;
491   if (localmode == PETSC_USE_POINTER) {
492     sf->mine_alloc    = NULL;
493   } else {
494     sf->mine_alloc    = ilocal;
495   }
496   sf->remote          = iremote;
497   if (remotemode == PETSC_USE_POINTER) {
498     sf->remote_alloc  = NULL;
499   } else {
500     sf->remote_alloc  = iremote;
501   }
502   PetscCall(PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0));
503   sf->graphset = PETSC_TRUE;
504   PetscFunctionReturn(0);
505 }
506 
507 /*@
508   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
509 
510   Collective
511 
512   Input Parameters:
513 + sf      - The PetscSF
514 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
515 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
516 
517   Notes:
518   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
519   n and N are local and global sizes of x respectively.
520 
521   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
522   sequential vectors y on all ranks.
523 
524   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
525   sequential vector y on rank 0.
526 
527   In above cases, entries of x are roots and entries of y are leaves.
528 
529   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
530   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
531   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
532   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
533   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
534 
535   In this case, roots and leaves are symmetric.
536 
537   Level: intermediate
538  @*/
539 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
540 {
541   MPI_Comm       comm;
542   PetscInt       n,N,res[2];
543   PetscMPIInt    rank,size;
544   PetscSFType    type;
545 
546   PetscFunctionBegin;
547   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
548   if (pattern != PETSCSF_PATTERN_ALLTOALL) PetscValidPointer(map,2);
549   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
550   PetscCheck(pattern >= PETSCSF_PATTERN_ALLGATHER && pattern <= PETSCSF_PATTERN_ALLTOALL,comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %d",pattern);
551   PetscCallMPI(MPI_Comm_rank(comm,&rank));
552   PetscCallMPI(MPI_Comm_size(comm,&size));
553 
554   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
555     type = PETSCSFALLTOALL;
556     PetscCall(PetscLayoutCreate(comm,&sf->map));
557     PetscCall(PetscLayoutSetLocalSize(sf->map,size));
558     PetscCall(PetscLayoutSetSize(sf->map,((PetscInt)size)*size));
559     PetscCall(PetscLayoutSetUp(sf->map));
560   } else {
561     PetscCall(PetscLayoutGetLocalSize(map,&n));
562     PetscCall(PetscLayoutGetSize(map,&N));
563     res[0] = n;
564     res[1] = -n;
565     /* Check if n are same over all ranks so that we can optimize it */
566     PetscCall(MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm));
567     if (res[0] == -res[1]) { /* same n */
568       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
569     } else {
570       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
571     }
572     PetscCall(PetscLayoutReference(map,&sf->map));
573   }
574   PetscCall(PetscSFSetType(sf,type));
575 
576   sf->pattern = pattern;
577   sf->mine    = NULL; /* Contiguous */
578 
579   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
580      Also set other easy stuff.
581    */
582   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
583     sf->nleaves      = N;
584     sf->nroots       = n;
585     sf->nranks       = size;
586     sf->minleaf      = 0;
587     sf->maxleaf      = N - 1;
588   } else if (pattern == PETSCSF_PATTERN_GATHER) {
589     sf->nleaves      = rank ? 0 : N;
590     sf->nroots       = n;
591     sf->nranks       = rank ? 0 : size;
592     sf->minleaf      = 0;
593     sf->maxleaf      = rank ? -1 : N - 1;
594   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
595     sf->nleaves      = size;
596     sf->nroots       = size;
597     sf->nranks       = size;
598     sf->minleaf      = 0;
599     sf->maxleaf      = size - 1;
600   }
601   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
602   sf->graphset = PETSC_TRUE;
603   PetscFunctionReturn(0);
604 }
605 
606 /*@
607    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
608 
609    Collective
610 
611    Input Parameter:
612 .  sf - star forest to invert
613 
614    Output Parameter:
615 .  isf - inverse of sf
616 
617    Level: advanced
618 
619    Notes:
620    All roots must have degree 1.
621 
622    The local space may be a permutation, but cannot be sparse.
623 
624 .seealso: PetscSFSetGraph()
625 @*/
626 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
627 {
628   PetscMPIInt    rank;
629   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
630   const PetscInt *ilocal;
631   PetscSFNode    *roots,*leaves;
632 
633   PetscFunctionBegin;
634   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
635   PetscSFCheckGraphSet(sf,1);
636   PetscValidPointer(isf,2);
637 
638   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL));
639   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
640 
641   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
642   PetscCall(PetscMalloc2(nroots,&roots,maxlocal,&leaves));
643   for (i=0; i<maxlocal; i++) {
644     leaves[i].rank  = rank;
645     leaves[i].index = i;
646   }
647   for (i=0; i <nroots; i++) {
648     roots[i].rank  = -1;
649     roots[i].index = -1;
650   }
651   PetscCall(PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
652   PetscCall(PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE));
653 
654   /* Check whether our leaves are sparse */
655   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
656   if (count == nroots) newilocal = NULL;
657   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
658     PetscCall(PetscMalloc1(count,&newilocal));
659     for (i=0,count=0; i<nroots; i++) {
660       if (roots[i].rank >= 0) {
661         newilocal[count]   = i;
662         roots[count].rank  = roots[i].rank;
663         roots[count].index = roots[i].index;
664         count++;
665       }
666     }
667   }
668 
669   PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf));
670   PetscCall(PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES));
671   PetscCall(PetscFree2(roots,leaves));
672   PetscFunctionReturn(0);
673 }
674 
675 /*@
676    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
677 
678    Collective
679 
680    Input Parameters:
681 +  sf - communication object to duplicate
682 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
683 
684    Output Parameter:
685 .  newsf - new communication object
686 
687    Level: beginner
688 
689 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
690 @*/
691 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
692 {
693   PetscSFType    type;
694   MPI_Datatype   dtype=MPIU_SCALAR;
695 
696   PetscFunctionBegin;
697   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
698   PetscValidLogicalCollectiveEnum(sf,opt,2);
699   PetscValidPointer(newsf,3);
700   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf));
701   PetscCall(PetscSFGetType(sf,&type));
702   if (type) PetscCall(PetscSFSetType(*newsf,type));
703   if (opt == PETSCSF_DUPLICATE_GRAPH) {
704     PetscSFCheckGraphSet(sf,1);
705     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
706       PetscInt          nroots,nleaves;
707       const PetscInt    *ilocal;
708       const PetscSFNode *iremote;
709       PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
710       PetscCall(PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES));
711     } else {
712       PetscCall(PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern));
713     }
714   }
715   /* Since oldtype is committed, so is newtype, according to MPI */
716   if (sf->vscat.bs > 1) PetscCallMPI(MPI_Type_dup(sf->vscat.unit,&dtype));
717   (*newsf)->vscat.bs     = sf->vscat.bs;
718   (*newsf)->vscat.unit   = dtype;
719   (*newsf)->vscat.to_n   = sf->vscat.to_n;
720   (*newsf)->vscat.from_n = sf->vscat.from_n;
721   /* Do not copy lsf. Build it on demand since it is rarely used */
722 
723 #if defined(PETSC_HAVE_DEVICE)
724   (*newsf)->backend              = sf->backend;
725   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
726   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
727   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
728 #endif
729   if (sf->ops->Duplicate) PetscCall((*sf->ops->Duplicate)(sf,opt,*newsf));
730   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
731   PetscFunctionReturn(0);
732 }
733 
734 /*@C
735    PetscSFGetGraph - Get the graph specifying a parallel star forest
736 
737    Not Collective
738 
739    Input Parameter:
740 .  sf - star forest
741 
742    Output Parameters:
743 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
744 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
745 .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
746 -  iremote - remote locations of root vertices for each leaf on the current process
747 
748    Notes:
749      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
750 
751      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
752      see its manpage for details.
753 
754    Fortran Notes:
755      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
756      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
757 
758      To check for a NULL ilocal use
759 $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
760 
761    Level: intermediate
762 
763 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
764 @*/
765 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
766 {
767   PetscFunctionBegin;
768   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
769   if (sf->ops->GetGraph) {
770     PetscCall((sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote));
771   } else {
772     if (nroots) *nroots = sf->nroots;
773     if (nleaves) *nleaves = sf->nleaves;
774     if (ilocal) *ilocal = sf->mine;
775     if (iremote) *iremote = sf->remote;
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 /*@
781    PetscSFGetLeafRange - Get the active leaf ranges
782 
783    Not Collective
784 
785    Input Parameter:
786 .  sf - star forest
787 
788    Output Parameters:
789 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
790 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
791 
792    Level: developer
793 
794 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
795 @*/
796 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
797 {
798   PetscFunctionBegin;
799   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
800   PetscSFCheckGraphSet(sf,1);
801   if (minleaf) *minleaf = sf->minleaf;
802   if (maxleaf) *maxleaf = sf->maxleaf;
803   PetscFunctionReturn(0);
804 }
805 
806 /*@C
807    PetscSFViewFromOptions - View from Options
808 
809    Collective on PetscSF
810 
811    Input Parameters:
812 +  A - the star forest
813 .  obj - Optional object
814 -  name - command line option
815 
816    Level: intermediate
817 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
818 @*/
819 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
820 {
821   PetscFunctionBegin;
822   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
823   PetscCall(PetscObjectViewFromOptions((PetscObject)A,obj,name));
824   PetscFunctionReturn(0);
825 }
826 
827 /*@C
828    PetscSFView - view a star forest
829 
830    Collective
831 
832    Input Parameters:
833 +  sf - star forest
834 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
835 
836    Level: beginner
837 
838 .seealso: PetscSFCreate(), PetscSFSetGraph()
839 @*/
840 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
841 {
842   PetscBool         iascii;
843   PetscViewerFormat format;
844 
845   PetscFunctionBegin;
846   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
847   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer));
848   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
849   PetscCheckSameComm(sf,1,viewer,2);
850   if (sf->graphset) PetscCall(PetscSFSetUp(sf));
851   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
852   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
853     PetscMPIInt rank;
854     PetscInt    ii,i,j;
855 
856     PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer));
857     PetscCall(PetscViewerASCIIPushTab(viewer));
858     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
859       if (!sf->graphset) {
860         PetscCall(PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n"));
861         PetscCall(PetscViewerASCIIPopTab(viewer));
862         PetscFunctionReturn(0);
863       }
864       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
865       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
866       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks));
867       for (i=0; i<sf->nleaves; i++) {
868         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index));
869       }
870       PetscCall(PetscViewerFlush(viewer));
871       PetscCall(PetscViewerGetFormat(viewer,&format));
872       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
873         PetscMPIInt *tmpranks,*perm;
874         PetscCall(PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm));
875         PetscCall(PetscArraycpy(tmpranks,sf->ranks,sf->nranks));
876         for (i=0; i<sf->nranks; i++) perm[i] = i;
877         PetscCall(PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm));
878         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank));
879         for (ii=0; ii<sf->nranks; ii++) {
880           i = perm[ii];
881           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]));
882           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
883             PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]));
884           }
885         }
886         PetscCall(PetscFree2(tmpranks,perm));
887       }
888       PetscCall(PetscViewerFlush(viewer));
889       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
890     }
891     PetscCall(PetscViewerASCIIPopTab(viewer));
892   }
893   if (sf->ops->View) PetscCall((*sf->ops->View)(sf,viewer));
894   PetscFunctionReturn(0);
895 }
896 
897 /*@C
898    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
899 
900    Not Collective
901 
902    Input Parameter:
903 .  sf - star forest
904 
905    Output Parameters:
906 +  nranks - number of ranks referenced by local part
907 .  ranks - array of ranks
908 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
909 .  rmine - concatenated array holding local indices referencing each remote rank
910 -  rremote - concatenated array holding remote indices referenced for each remote rank
911 
912    Level: developer
913 
914 .seealso: PetscSFGetLeafRanks()
915 @*/
916 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
917 {
918   PetscFunctionBegin;
919   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
920   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
921   if (sf->ops->GetRootRanks) {
922     PetscCall((sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote));
923   } else {
924     /* The generic implementation */
925     if (nranks)  *nranks  = sf->nranks;
926     if (ranks)   *ranks   = sf->ranks;
927     if (roffset) *roffset = sf->roffset;
928     if (rmine)   *rmine   = sf->rmine;
929     if (rremote) *rremote = sf->rremote;
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 /*@C
935    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
936 
937    Not Collective
938 
939    Input Parameter:
940 .  sf - star forest
941 
942    Output Parameters:
943 +  niranks - number of leaf ranks referencing roots on this process
944 .  iranks - array of ranks
945 .  ioffset - offset in irootloc for each rank (length niranks+1)
946 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
947 
948    Level: developer
949 
950 .seealso: PetscSFGetRootRanks()
951 @*/
952 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
953 {
954   PetscFunctionBegin;
955   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
956   PetscCheck(sf->setupcalled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
957   if (sf->ops->GetLeafRanks) {
958     PetscCall((sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc));
959   } else {
960     PetscSFType type;
961     PetscCall(PetscSFGetType(sf,&type));
962     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
963   }
964   PetscFunctionReturn(0);
965 }
966 
967 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
968   PetscInt i;
969   for (i=0; i<n; i++) {
970     if (needle == list[i]) return PETSC_TRUE;
971   }
972   return PETSC_FALSE;
973 }
974 
975 /*@C
976    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
977 
978    Collective
979 
980    Input Parameters:
981 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
982 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
983 
984    Level: developer
985 
986 .seealso: PetscSFGetRootRanks()
987 @*/
988 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
989 {
990   PetscTable         table;
991   PetscTablePosition pos;
992   PetscMPIInt        size,groupsize,*groupranks;
993   PetscInt           *rcount,*ranks;
994   PetscInt           i, irank = -1,orank = -1;
995 
996   PetscFunctionBegin;
997   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
998   PetscSFCheckGraphSet(sf,1);
999   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size));
1000   PetscCall(PetscTableCreate(10,size,&table));
1001   for (i=0; i<sf->nleaves; i++) {
1002     /* Log 1-based rank */
1003     PetscCall(PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES));
1004   }
1005   PetscCall(PetscTableGetCount(table,&sf->nranks));
1006   PetscCall(PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote));
1007   PetscCall(PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks));
1008   PetscCall(PetscTableGetHeadPosition(table,&pos));
1009   for (i=0; i<sf->nranks; i++) {
1010     PetscCall(PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]));
1011     ranks[i]--;             /* Convert back to 0-based */
1012   }
1013   PetscCall(PetscTableDestroy(&table));
1014 
1015   /* We expect that dgroup is reliably "small" while nranks could be large */
1016   {
1017     MPI_Group group = MPI_GROUP_NULL;
1018     PetscMPIInt *dgroupranks;
1019     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1020     PetscCallMPI(MPI_Group_size(dgroup,&groupsize));
1021     PetscCall(PetscMalloc1(groupsize,&dgroupranks));
1022     PetscCall(PetscMalloc1(groupsize,&groupranks));
1023     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1024     if (groupsize) PetscCallMPI(MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks));
1025     PetscCallMPI(MPI_Group_free(&group));
1026     PetscCall(PetscFree(dgroupranks));
1027   }
1028 
1029   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1030   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1031     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1032       if (InList(ranks[i],groupsize,groupranks)) break;
1033     }
1034     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1035       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1036     }
1037     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1038       PetscInt tmprank,tmpcount;
1039 
1040       tmprank             = ranks[i];
1041       tmpcount            = rcount[i];
1042       ranks[i]            = ranks[sf->ndranks];
1043       rcount[i]           = rcount[sf->ndranks];
1044       ranks[sf->ndranks]  = tmprank;
1045       rcount[sf->ndranks] = tmpcount;
1046       sf->ndranks++;
1047     }
1048   }
1049   PetscCall(PetscFree(groupranks));
1050   PetscCall(PetscSortIntWithArray(sf->ndranks,ranks,rcount));
1051   PetscCall(PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks));
1052   sf->roffset[0] = 0;
1053   for (i=0; i<sf->nranks; i++) {
1054     PetscCall(PetscMPIIntCast(ranks[i],sf->ranks+i));
1055     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1056     rcount[i]        = 0;
1057   }
1058   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1059     /* short circuit */
1060     if (orank != sf->remote[i].rank) {
1061       /* Search for index of iremote[i].rank in sf->ranks */
1062       PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank));
1063       if (irank < 0) {
1064         PetscCall(PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank));
1065         if (irank >= 0) irank += sf->ndranks;
1066       }
1067       orank = sf->remote[i].rank;
1068     }
1069     PetscCheck(irank >= 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %" PetscInt_FMT " in array",sf->remote[i].rank);
1070     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1071     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1072     rcount[irank]++;
1073   }
1074   PetscCall(PetscFree2(rcount,ranks));
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 /*@C
1079    PetscSFGetGroups - gets incoming and outgoing process groups
1080 
1081    Collective
1082 
1083    Input Parameter:
1084 .  sf - star forest
1085 
1086    Output Parameters:
1087 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1088 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1089 
1090    Level: developer
1091 
1092 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1093 @*/
1094 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1095 {
1096   MPI_Group      group = MPI_GROUP_NULL;
1097 
1098   PetscFunctionBegin;
1099   PetscCheck(sf->nranks >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1100   if (sf->ingroup == MPI_GROUP_NULL) {
1101     PetscInt       i;
1102     const PetscInt *indegree;
1103     PetscMPIInt    rank,*outranks,*inranks;
1104     PetscSFNode    *remote;
1105     PetscSF        bgcount;
1106 
1107     /* Compute the number of incoming ranks */
1108     PetscCall(PetscMalloc1(sf->nranks,&remote));
1109     for (i=0; i<sf->nranks; i++) {
1110       remote[i].rank  = sf->ranks[i];
1111       remote[i].index = 0;
1112     }
1113     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount));
1114     PetscCall(PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1115     PetscCall(PetscSFComputeDegreeBegin(bgcount,&indegree));
1116     PetscCall(PetscSFComputeDegreeEnd(bgcount,&indegree));
1117     /* Enumerate the incoming ranks */
1118     PetscCall(PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks));
1119     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1120     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1121     PetscCall(PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks));
1122     PetscCall(PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks));
1123     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1124     PetscCallMPI(MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup));
1125     PetscCallMPI(MPI_Group_free(&group));
1126     PetscCall(PetscFree2(inranks,outranks));
1127     PetscCall(PetscSFDestroy(&bgcount));
1128   }
1129   *incoming = sf->ingroup;
1130 
1131   if (sf->outgroup == MPI_GROUP_NULL) {
1132     PetscCallMPI(MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group));
1133     PetscCallMPI(MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup));
1134     PetscCallMPI(MPI_Group_free(&group));
1135   }
1136   *outgoing = sf->outgroup;
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 /*@
1141    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1142 
1143    Collective
1144 
1145    Input Parameter:
1146 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1147 
1148    Output Parameter:
1149 .  multi - star forest with split roots, such that each root has degree exactly 1
1150 
1151    Level: developer
1152 
1153    Notes:
1154 
1155    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1156    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1157    edge, it is a candidate for future optimization that might involve its removal.
1158 
1159 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1160 @*/
1161 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1162 {
1163   PetscFunctionBegin;
1164   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1165   PetscValidPointer(multi,2);
1166   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1167     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1168     *multi = sf->multi;
1169     sf->multi->multi = sf->multi;
1170     PetscFunctionReturn(0);
1171   }
1172   if (!sf->multi) {
1173     const PetscInt *indegree;
1174     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1175     PetscSFNode    *remote;
1176     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1177     PetscCall(PetscSFComputeDegreeBegin(sf,&indegree));
1178     PetscCall(PetscSFComputeDegreeEnd(sf,&indegree));
1179     PetscCall(PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset));
1180     inoffset[0] = 0;
1181     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1182     for (i=0; i<maxlocal; i++) outones[i] = 1;
1183     PetscCall(PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1184     PetscCall(PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM));
1185     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1186     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1187       for (i=0; i<sf->nroots; i++) {
1188         PetscCheck(inoffset[i] + indegree[i] == inoffset[i+1],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1189       }
1190     }
1191     PetscCall(PetscMalloc1(sf->nleaves,&remote));
1192     for (i=0; i<sf->nleaves; i++) {
1193       remote[i].rank  = sf->remote[i].rank;
1194       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1195     }
1196     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi));
1197     sf->multi->multi = sf->multi;
1198     PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER));
1199     if (sf->rankorder) {        /* Sort the ranks */
1200       PetscMPIInt rank;
1201       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1202       PetscSFNode *newremote;
1203       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank));
1204       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1205       PetscCall(PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset));
1206       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1207       PetscCall(PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1208       PetscCall(PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE));
1209       /* Sort the incoming ranks at each vertex, build the inverse map */
1210       for (i=0; i<sf->nroots; i++) {
1211         PetscInt j;
1212         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1213         PetscCall(PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset));
1214         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1215       }
1216       PetscCall(PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1217       PetscCall(PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE));
1218       PetscCall(PetscMalloc1(sf->nleaves,&newremote));
1219       for (i=0; i<sf->nleaves; i++) {
1220         newremote[i].rank  = sf->remote[i].rank;
1221         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1222       }
1223       PetscCall(PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER));
1224       PetscCall(PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset));
1225     }
1226     PetscCall(PetscFree3(inoffset,outones,outoffset));
1227   }
1228   *multi = sf->multi;
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 /*@C
1233    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1234 
1235    Collective
1236 
1237    Input Parameters:
1238 +  sf - original star forest
1239 .  nselected  - number of selected roots on this process
1240 -  selected   - indices of the selected roots on this process
1241 
1242    Output Parameter:
1243 .  esf - new star forest
1244 
1245    Level: advanced
1246 
1247    Note:
1248    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1249    be done by calling PetscSFGetGraph().
1250 
1251 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1252 @*/
1253 PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1254 {
1255   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1256   const PetscInt    *ilocal;
1257   signed char       *rootdata,*leafdata,*leafmem;
1258   const PetscSFNode *iremote;
1259   PetscSFNode       *new_iremote;
1260   MPI_Comm          comm;
1261 
1262   PetscFunctionBegin;
1263   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1264   PetscSFCheckGraphSet(sf,1);
1265   if (nselected) PetscValidIntPointer(selected,3);
1266   PetscValidPointer(esf,4);
1267 
1268   PetscCall(PetscSFSetUp(sf));
1269   PetscCall(PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0));
1270   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1271   PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
1272 
1273   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1274     PetscBool dups;
1275     PetscCall(PetscCheckDupsInt(nselected,selected,&dups));
1276     PetscCheck(!dups,comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1277     for (i=0; i<nselected; i++)
1278       PetscCheck(selected[i] >= 0 && selected[i] < nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %" PetscInt_FMT " is out of [0,%" PetscInt_FMT ")",selected[i],nroots);
1279   }
1280 
1281   if (sf->ops->CreateEmbeddedRootSF) {
1282     PetscCall((*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf));
1283   } else {
1284     /* A generic version of creating embedded sf */
1285     PetscCall(PetscSFGetLeafRange(sf,&minleaf,&maxleaf));
1286     maxlocal = maxleaf - minleaf + 1;
1287     PetscCall(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem));
1288     leafdata = leafmem - minleaf;
1289     /* Tag selected roots and bcast to leaves */
1290     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1291     PetscCall(PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1292     PetscCall(PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE));
1293 
1294     /* Build esf with leaves that are still connected */
1295     esf_nleaves = 0;
1296     for (i=0; i<nleaves; i++) {
1297       j = ilocal ? ilocal[i] : i;
1298       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1299          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1300       */
1301       esf_nleaves += (leafdata[j] ? 1 : 0);
1302     }
1303     PetscCall(PetscMalloc1(esf_nleaves,&new_ilocal));
1304     PetscCall(PetscMalloc1(esf_nleaves,&new_iremote));
1305     for (i=n=0; i<nleaves; i++) {
1306       j = ilocal ? ilocal[i] : i;
1307       if (leafdata[j]) {
1308         new_ilocal[n]        = j;
1309         new_iremote[n].rank  = iremote[i].rank;
1310         new_iremote[n].index = iremote[i].index;
1311         ++n;
1312       }
1313     }
1314     PetscCall(PetscSFCreate(comm,esf));
1315     PetscCall(PetscSFSetFromOptions(*esf));
1316     PetscCall(PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1317     PetscCall(PetscFree2(rootdata,leafmem));
1318   }
1319   PetscCall(PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0));
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 /*@C
1324   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1325 
1326   Collective
1327 
1328   Input Parameters:
1329 + sf - original star forest
1330 . nselected  - number of selected leaves on this process
1331 - selected   - indices of the selected leaves on this process
1332 
1333   Output Parameter:
1334 .  newsf - new star forest
1335 
1336   Level: advanced
1337 
1338 .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1339 @*/
1340 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1341 {
1342   const PetscSFNode *iremote;
1343   PetscSFNode       *new_iremote;
1344   const PetscInt    *ilocal;
1345   PetscInt          i,nroots,*leaves,*new_ilocal;
1346   MPI_Comm          comm;
1347 
1348   PetscFunctionBegin;
1349   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1350   PetscSFCheckGraphSet(sf,1);
1351   if (nselected) PetscValidIntPointer(selected,3);
1352   PetscValidPointer(newsf,4);
1353 
1354   /* Uniq selected[] and put results in leaves[] */
1355   PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
1356   PetscCall(PetscMalloc1(nselected,&leaves));
1357   PetscCall(PetscArraycpy(leaves,selected,nselected));
1358   PetscCall(PetscSortedRemoveDupsInt(&nselected,leaves));
1359   PetscCheck(!nselected || !(leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves),comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",leaves[0],leaves[nselected-1],sf->nleaves);
1360 
1361   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1362   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1363     PetscCall((*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf));
1364   } else {
1365     PetscCall(PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote));
1366     PetscCall(PetscMalloc1(nselected,&new_ilocal));
1367     PetscCall(PetscMalloc1(nselected,&new_iremote));
1368     for (i=0; i<nselected; ++i) {
1369       const PetscInt l     = leaves[i];
1370       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1371       new_iremote[i].rank  = iremote[l].rank;
1372       new_iremote[i].index = iremote[l].index;
1373     }
1374     PetscCall(PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf));
1375     PetscCall(PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER));
1376   }
1377   PetscCall(PetscFree(leaves));
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 /*@C
1382    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1383 
1384    Collective on PetscSF
1385 
1386    Input Parameters:
1387 +  sf - star forest on which to communicate
1388 .  unit - data type associated with each node
1389 .  rootdata - buffer to broadcast
1390 -  op - operation to use for reduction
1391 
1392    Output Parameter:
1393 .  leafdata - buffer to be reduced with values from each leaf's respective root
1394 
1395    Level: intermediate
1396 
1397    Notes:
1398     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1399     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1400     use PetscSFBcastWithMemTypeBegin() instead.
1401 .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1402 @*/
1403 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1404 {
1405   PetscMemType   rootmtype,leafmtype;
1406 
1407   PetscFunctionBegin;
1408   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1409   PetscCall(PetscSFSetUp(sf));
1410   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1411   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1412   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1413   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1414   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*@C
1419    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1420 
1421    Collective on PetscSF
1422 
1423    Input Parameters:
1424 +  sf - star forest on which to communicate
1425 .  unit - data type associated with each node
1426 .  rootmtype - memory type of rootdata
1427 .  rootdata - buffer to broadcast
1428 .  leafmtype - memory type of leafdata
1429 -  op - operation to use for reduction
1430 
1431    Output Parameter:
1432 .  leafdata - buffer to be reduced with values from each leaf's respective root
1433 
1434    Level: intermediate
1435 
1436 .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1437 @*/
1438 PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1439 {
1440   PetscFunctionBegin;
1441   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1442   PetscCall(PetscSFSetUp(sf));
1443   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
1444   PetscCall((*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op));
1445   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 /*@C
1450    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1451 
1452    Collective
1453 
1454    Input Parameters:
1455 +  sf - star forest
1456 .  unit - data type
1457 .  rootdata - buffer to broadcast
1458 -  op - operation to use for reduction
1459 
1460    Output Parameter:
1461 .  leafdata - buffer to be reduced with values from each leaf's respective root
1462 
1463    Level: intermediate
1464 
1465 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1466 @*/
1467 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1468 {
1469   PetscFunctionBegin;
1470   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1471   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0));
1472   PetscCall((*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op));
1473   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0));
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 /*@C
1478    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1479 
1480    Collective
1481 
1482    Input Parameters:
1483 +  sf - star forest
1484 .  unit - data type
1485 .  leafdata - values to reduce
1486 -  op - reduction operation
1487 
1488    Output Parameter:
1489 .  rootdata - result of reduction of values from all leaves of each root
1490 
1491    Level: intermediate
1492 
1493    Notes:
1494     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1495     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1496     use PetscSFReduceWithMemTypeBegin() instead.
1497 
1498 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1499 @*/
1500 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1501 {
1502   PetscMemType   rootmtype,leafmtype;
1503 
1504   PetscFunctionBegin;
1505   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1506   PetscCall(PetscSFSetUp(sf));
1507   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1508   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1509   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1510   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1511   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 /*@C
1516    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1517 
1518    Collective
1519 
1520    Input Parameters:
1521 +  sf - star forest
1522 .  unit - data type
1523 .  leafmtype - memory type of leafdata
1524 .  leafdata - values to reduce
1525 .  rootmtype - memory type of rootdata
1526 -  op - reduction operation
1527 
1528    Output Parameter:
1529 .  rootdata - result of reduction of values from all leaves of each root
1530 
1531    Level: intermediate
1532 
1533 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1534 @*/
1535 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1536 {
1537   PetscFunctionBegin;
1538   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1539   PetscCall(PetscSFSetUp(sf));
1540   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0));
1541   PetscCall((sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op));
1542   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0));
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 /*@C
1547    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1548 
1549    Collective
1550 
1551    Input Parameters:
1552 +  sf - star forest
1553 .  unit - data type
1554 .  leafdata - values to reduce
1555 -  op - reduction operation
1556 
1557    Output Parameter:
1558 .  rootdata - result of reduction of values from all leaves of each root
1559 
1560    Level: intermediate
1561 
1562 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1563 @*/
1564 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1565 {
1566   PetscFunctionBegin;
1567   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1568   if (!sf->vscat.logging) PetscCall(PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0));
1569   PetscCall((*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op));
1570   if (!sf->vscat.logging) PetscCall(PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0));
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 /*@C
1575    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1576 
1577    Collective
1578 
1579    Input Parameters:
1580 +  sf - star forest
1581 .  unit - data type
1582 .  leafdata - leaf values to use in reduction
1583 -  op - operation to use for reduction
1584 
1585    Output Parameters:
1586 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1587 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1588 
1589    Level: advanced
1590 
1591    Note:
1592    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1593    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1594    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1595    integers.
1596 
1597 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1598 @*/
1599 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1600 {
1601   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1602 
1603   PetscFunctionBegin;
1604   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1605   PetscCall(PetscSFSetUp(sf));
1606   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1607   PetscCall(PetscGetMemType(rootdata,&rootmtype));
1608   PetscCall(PetscGetMemType(leafdata,&leafmtype));
1609   PetscCall(PetscGetMemType(leafupdate,&leafupdatemtype));
1610   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1611   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1612   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 /*@C
1617    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1618 
1619    Collective
1620 
1621    Input Parameters:
1622 +  sf - star forest
1623 .  unit - data type
1624 .  rootmtype - memory type of rootdata
1625 .  leafmtype - memory type of leafdata
1626 .  leafdata - leaf values to use in reduction
1627 .  leafupdatemtype - memory type of leafupdate
1628 -  op - operation to use for reduction
1629 
1630    Output Parameters:
1631 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1632 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1633 
1634    Level: advanced
1635 
1636    Note: See PetscSFFetchAndOpBegin() for more details.
1637 
1638 .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1639 @*/
1640 PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1641 {
1642   PetscFunctionBegin;
1643   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1644   PetscCall(PetscSFSetUp(sf));
1645   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1646   PetscCheck(leafmtype == leafupdatemtype,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1647   PetscCall((*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op));
1648   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 /*@C
1653    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1654 
1655    Collective
1656 
1657    Input Parameters:
1658 +  sf - star forest
1659 .  unit - data type
1660 .  leafdata - leaf values to use in reduction
1661 -  op - operation to use for reduction
1662 
1663    Output Parameters:
1664 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1665 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1666 
1667    Level: advanced
1668 
1669 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1670 @*/
1671 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1672 {
1673   PetscFunctionBegin;
1674   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1675   PetscCall(PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1676   PetscCall((*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op));
1677   PetscCall(PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0));
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 /*@C
1682    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1683 
1684    Collective
1685 
1686    Input Parameter:
1687 .  sf - star forest
1688 
1689    Output Parameter:
1690 .  degree - degree of each root vertex
1691 
1692    Level: advanced
1693 
1694    Notes:
1695    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1696 
1697 .seealso: PetscSFGatherBegin()
1698 @*/
1699 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1700 {
1701   PetscFunctionBegin;
1702   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1703   PetscSFCheckGraphSet(sf,1);
1704   PetscValidPointer(degree,2);
1705   if (!sf->degreeknown) {
1706     PetscInt i, nroots = sf->nroots, maxlocal;
1707     PetscCheck(!sf->degree,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1708     maxlocal = sf->maxleaf-sf->minleaf+1;
1709     PetscCall(PetscMalloc1(nroots,&sf->degree));
1710     PetscCall(PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp)); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1711     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1712     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1713     PetscCall(PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1714   }
1715   *degree = NULL;
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 /*@C
1720    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1721 
1722    Collective
1723 
1724    Input Parameter:
1725 .  sf - star forest
1726 
1727    Output Parameter:
1728 .  degree - degree of each root vertex
1729 
1730    Level: developer
1731 
1732    Notes:
1733    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1734 
1735 .seealso:
1736 @*/
1737 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1738 {
1739   PetscFunctionBegin;
1740   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1741   PetscSFCheckGraphSet(sf,1);
1742   PetscValidPointer(degree,2);
1743   if (!sf->degreeknown) {
1744     PetscCheck(sf->degreetmp,PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1745     PetscCall(PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM));
1746     PetscCall(PetscFree(sf->degreetmp));
1747     sf->degreeknown = PETSC_TRUE;
1748   }
1749   *degree = sf->degree;
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 /*@C
1754    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1755    Each multi-root is assigned index of the corresponding original root.
1756 
1757    Collective
1758 
1759    Input Parameters:
1760 +  sf - star forest
1761 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1762 
1763    Output Parameters:
1764 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1765 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1766 
1767    Level: developer
1768 
1769    Notes:
1770    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1771 
1772 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1773 @*/
1774 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1775 {
1776   PetscSF             msf;
1777   PetscInt            i, j, k, nroots, nmroots;
1778 
1779   PetscFunctionBegin;
1780   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1781   PetscCall(PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL));
1782   if (nroots) PetscValidIntPointer(degree,2);
1783   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1784   PetscValidPointer(multiRootsOrigNumbering,4);
1785   PetscCall(PetscSFGetMultiSF(sf,&msf));
1786   PetscCall(PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL));
1787   PetscCall(PetscMalloc1(nmroots, multiRootsOrigNumbering));
1788   for (i=0,j=0,k=0; i<nroots; i++) {
1789     if (!degree[i]) continue;
1790     for (j=0; j<degree[i]; j++,k++) {
1791       (*multiRootsOrigNumbering)[k] = i;
1792     }
1793   }
1794   PetscCheck(k == nmroots,PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1795   if (nMultiRoots) *nMultiRoots = nmroots;
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 /*@C
1800    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1801 
1802    Collective
1803 
1804    Input Parameters:
1805 +  sf - star forest
1806 .  unit - data type
1807 -  leafdata - leaf data to gather to roots
1808 
1809    Output Parameter:
1810 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1811 
1812    Level: intermediate
1813 
1814 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1815 @*/
1816 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1817 {
1818   PetscSF        multi = NULL;
1819 
1820   PetscFunctionBegin;
1821   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1822   PetscCall(PetscSFSetUp(sf));
1823   PetscCall(PetscSFGetMultiSF(sf,&multi));
1824   PetscCall(PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 /*@C
1829    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1830 
1831    Collective
1832 
1833    Input Parameters:
1834 +  sf - star forest
1835 .  unit - data type
1836 -  leafdata - leaf data to gather to roots
1837 
1838    Output Parameter:
1839 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1840 
1841    Level: intermediate
1842 
1843 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1844 @*/
1845 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1846 {
1847   PetscSF        multi = NULL;
1848 
1849   PetscFunctionBegin;
1850   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1851   PetscCall(PetscSFGetMultiSF(sf,&multi));
1852   PetscCall(PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE));
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 /*@C
1857    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1858 
1859    Collective
1860 
1861    Input Parameters:
1862 +  sf - star forest
1863 .  unit - data type
1864 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1865 
1866    Output Parameter:
1867 .  leafdata - leaf data to be update with personal data from each respective root
1868 
1869    Level: intermediate
1870 
1871 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1872 @*/
1873 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1874 {
1875   PetscSF        multi = NULL;
1876 
1877   PetscFunctionBegin;
1878   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1879   PetscCall(PetscSFSetUp(sf));
1880   PetscCall(PetscSFGetMultiSF(sf,&multi));
1881   PetscCall(PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 /*@C
1886    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1887 
1888    Collective
1889 
1890    Input Parameters:
1891 +  sf - star forest
1892 .  unit - data type
1893 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1894 
1895    Output Parameter:
1896 .  leafdata - leaf data to be update with personal data from each respective root
1897 
1898    Level: intermediate
1899 
1900 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1901 @*/
1902 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1903 {
1904   PetscSF        multi = NULL;
1905 
1906   PetscFunctionBegin;
1907   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1908   PetscCall(PetscSFGetMultiSF(sf,&multi));
1909   PetscCall(PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE));
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1914 {
1915   PetscInt        i, n, nleaves;
1916   const PetscInt *ilocal = NULL;
1917   PetscHSetI      seen;
1918 
1919   PetscFunctionBegin;
1920   if (PetscDefined(USE_DEBUG)) {
1921     PetscCall(PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL));
1922     PetscCall(PetscHSetICreate(&seen));
1923     for (i = 0; i < nleaves; i++) {
1924       const PetscInt leaf = ilocal ? ilocal[i] : i;
1925       PetscCall(PetscHSetIAdd(seen,leaf));
1926     }
1927     PetscCall(PetscHSetIGetSize(seen,&n));
1928     PetscCheck(n == nleaves,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1929     PetscCall(PetscHSetIDestroy(&seen));
1930   }
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 /*@
1935   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1936 
1937   Input Parameters:
1938 + sfA - The first PetscSF
1939 - sfB - The second PetscSF
1940 
1941   Output Parameters:
1942 . sfBA - The composite SF
1943 
1944   Level: developer
1945 
1946   Notes:
1947   Currently, the two SFs must be defined on congruent communicators and they must be true star
1948   forests, i.e. the same leaf is not connected with different roots.
1949 
1950   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1951   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1952   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1953   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1954 
1955 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1956 @*/
1957 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1958 {
1959   const PetscSFNode *remotePointsA,*remotePointsB;
1960   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1961   const PetscInt    *localPointsA,*localPointsB;
1962   PetscInt          *localPointsBA;
1963   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1964   PetscBool         denseB;
1965 
1966   PetscFunctionBegin;
1967   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1968   PetscSFCheckGraphSet(sfA,1);
1969   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1970   PetscSFCheckGraphSet(sfB,2);
1971   PetscCheckSameComm(sfA,1,sfB,2);
1972   PetscValidPointer(sfBA,3);
1973   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
1974   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
1975 
1976   PetscCall(PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA));
1977   PetscCall(PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB));
1978   if (localPointsA) {
1979     PetscCall(PetscMalloc1(numRootsB,&reorderedRemotePointsA));
1980     for (i=0; i<numRootsB; i++) {
1981       reorderedRemotePointsA[i].rank = -1;
1982       reorderedRemotePointsA[i].index = -1;
1983     }
1984     for (i=0; i<numLeavesA; i++) {
1985       if (localPointsA[i] >= numRootsB) continue;
1986       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1987     }
1988     remotePointsA = reorderedRemotePointsA;
1989   }
1990   PetscCall(PetscSFGetLeafRange(sfB,&minleaf,&maxleaf));
1991   PetscCall(PetscMalloc1(maxleaf-minleaf+1,&leafdataB));
1992   PetscCall(PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1993   PetscCall(PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE));
1994   PetscCall(PetscFree(reorderedRemotePointsA));
1995 
1996   denseB = (PetscBool)!localPointsB;
1997   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1998     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1999     else numLeavesBA++;
2000   }
2001   if (denseB) {
2002     localPointsBA  = NULL;
2003     remotePointsBA = leafdataB;
2004   } else {
2005     PetscCall(PetscMalloc1(numLeavesBA,&localPointsBA));
2006     PetscCall(PetscMalloc1(numLeavesBA,&remotePointsBA));
2007     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2008       const PetscInt l = localPointsB ? localPointsB[i] : i;
2009 
2010       if (leafdataB[l-minleaf].rank == -1) continue;
2011       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2012       localPointsBA[numLeavesBA] = l;
2013       numLeavesBA++;
2014     }
2015     PetscCall(PetscFree(leafdataB));
2016   }
2017   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2018   PetscCall(PetscSFSetFromOptions(*sfBA));
2019   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 /*@
2024   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2025 
2026   Input Parameters:
2027 + sfA - The first PetscSF
2028 - sfB - The second PetscSF
2029 
2030   Output Parameters:
2031 . sfBA - The composite SF.
2032 
2033   Level: developer
2034 
2035   Notes:
2036   Currently, the two SFs must be defined on congruent communicators and they must be true star
2037   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2038   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2039 
2040   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2041   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2042   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2043   a Reduce on sfB, on connected roots.
2044 
2045 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2046 @*/
2047 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2048 {
2049   const PetscSFNode *remotePointsA,*remotePointsB;
2050   PetscSFNode       *remotePointsBA;
2051   const PetscInt    *localPointsA,*localPointsB;
2052   PetscSFNode       *reorderedRemotePointsA = NULL;
2053   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2054   MPI_Op            op;
2055 #if defined(PETSC_USE_64BIT_INDICES)
2056   PetscBool         iswin;
2057 #endif
2058 
2059   PetscFunctionBegin;
2060   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2061   PetscSFCheckGraphSet(sfA,1);
2062   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2063   PetscSFCheckGraphSet(sfB,2);
2064   PetscCheckSameComm(sfA,1,sfB,2);
2065   PetscValidPointer(sfBA,3);
2066   PetscCall(PetscSFCheckLeavesUnique_Private(sfA));
2067   PetscCall(PetscSFCheckLeavesUnique_Private(sfB));
2068 
2069   PetscCall(PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA));
2070   PetscCall(PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB));
2071 
2072   /* TODO: Check roots of sfB have degree of 1 */
2073   /* Once we implement it, we can replace the MPI_MAXLOC
2074      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2075      We use MPI_MAXLOC only to have a deterministic output from this routine if
2076      the root condition is not meet.
2077    */
2078   op = MPI_MAXLOC;
2079 #if defined(PETSC_USE_64BIT_INDICES)
2080   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2081   PetscCall(PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin));
2082   if (iswin) op = MPI_REPLACE;
2083 #endif
2084 
2085   PetscCall(PetscSFGetLeafRange(sfB, &minleaf, &maxleaf));
2086   PetscCall(PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA));
2087   for (i=0; i<maxleaf - minleaf + 1; i++) {
2088     reorderedRemotePointsA[i].rank = -1;
2089     reorderedRemotePointsA[i].index = -1;
2090   }
2091   if (localPointsA) {
2092     for (i=0; i<numLeavesA; i++) {
2093       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2094       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2095     }
2096   } else {
2097     for (i=0; i<numLeavesA; i++) {
2098       if (i > maxleaf || i < minleaf) continue;
2099       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2100     }
2101   }
2102 
2103   PetscCall(PetscMalloc1(numRootsB,&localPointsBA));
2104   PetscCall(PetscMalloc1(numRootsB,&remotePointsBA));
2105   for (i=0; i<numRootsB; i++) {
2106     remotePointsBA[i].rank = -1;
2107     remotePointsBA[i].index = -1;
2108   }
2109 
2110   PetscCall(PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2111   PetscCall(PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op));
2112   PetscCall(PetscFree(reorderedRemotePointsA));
2113   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2114     if (remotePointsBA[i].rank == -1) continue;
2115     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2116     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2117     localPointsBA[numLeavesBA] = i;
2118     numLeavesBA++;
2119   }
2120   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA));
2121   PetscCall(PetscSFSetFromOptions(*sfBA));
2122   PetscCall(PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER));
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 /*
2127   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2128 
2129   Input Parameters:
2130 . sf - The global PetscSF
2131 
2132   Output Parameters:
2133 . out - The local PetscSF
2134  */
2135 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2136 {
2137   MPI_Comm           comm;
2138   PetscMPIInt        myrank;
2139   const PetscInt     *ilocal;
2140   const PetscSFNode  *iremote;
2141   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2142   PetscSFNode        *liremote;
2143   PetscSF            lsf;
2144 
2145   PetscFunctionBegin;
2146   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2147   if (sf->ops->CreateLocalSF) {
2148     PetscCall((*sf->ops->CreateLocalSF)(sf,out));
2149   } else {
2150     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2151     PetscCall(PetscObjectGetComm((PetscObject)sf,&comm));
2152     PetscCallMPI(MPI_Comm_rank(comm,&myrank));
2153 
2154     /* Find out local edges and build a local SF */
2155     PetscCall(PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote));
2156     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2157     PetscCall(PetscMalloc1(lnleaves,&lilocal));
2158     PetscCall(PetscMalloc1(lnleaves,&liremote));
2159 
2160     for (i=j=0; i<nleaves; i++) {
2161       if (iremote[i].rank == (PetscInt)myrank) {
2162         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2163         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2164         liremote[j].index = iremote[i].index;
2165         j++;
2166       }
2167     }
2168     PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf));
2169     PetscCall(PetscSFSetFromOptions(lsf));
2170     PetscCall(PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER));
2171     PetscCall(PetscSFSetUp(lsf));
2172     *out = lsf;
2173   }
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2178 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2179 {
2180   PetscMemType       rootmtype,leafmtype;
2181 
2182   PetscFunctionBegin;
2183   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2184   PetscCall(PetscSFSetUp(sf));
2185   PetscCall(PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0));
2186   PetscCall(PetscGetMemType(rootdata,&rootmtype));
2187   PetscCall(PetscGetMemType(leafdata,&leafmtype));
2188   if (sf->ops->BcastToZero) {
2189     PetscCall((*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata));
2190   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2191   PetscCall(PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0));
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 /*@
2196   PetscSFConcatenate - concatenate multiple SFs into one
2197 
2198   Input Parameters:
2199 + comm - the communicator
2200 . nsfs - the number of input PetscSF
2201 . sfs  - the array of input PetscSF
2202 . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2203 - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2204 
2205   Output Parameters:
2206 . newsf - The resulting PetscSF
2207 
2208   Level: developer
2209 
2210   Notes:
2211   The communicator of all SFs in sfs must be comm.
2212 
2213   The offsets in leafOffsets are added to the original leaf indices.
2214 
2215   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2216   In this case, NULL is also passed as ilocal to the resulting SF.
2217 
2218   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2219   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2220 
2221 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2222 @*/
2223 PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2224 {
2225   PetscInt            i, s, nLeaves, nRoots;
2226   PetscInt           *leafArrayOffsets;
2227   PetscInt           *ilocal_new;
2228   PetscSFNode        *iremote_new;
2229   PetscInt           *rootOffsets;
2230   PetscBool           all_ilocal_null = PETSC_FALSE;
2231   PetscMPIInt         rank;
2232 
2233   PetscFunctionBegin;
2234   {
2235     PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2236 
2237     PetscCall(PetscSFCreate(comm,&dummy));
2238     PetscValidLogicalCollectiveInt(dummy,nsfs,2);
2239     PetscValidPointer(sfs,3);
2240     for (i=0; i<nsfs; i++) {
2241       PetscValidHeaderSpecific(sfs[i],PETSCSF_CLASSID,3);
2242       PetscCheckSameComm(dummy,1,sfs[i],3);
2243     }
2244     PetscValidLogicalCollectiveBool(dummy,shareRoots,4);
2245     if (leafOffsets) PetscValidIntPointer(leafOffsets,5);
2246     PetscValidPointer(newsf,6);
2247     PetscCall(PetscSFDestroy(&dummy));
2248   }
2249   if (!nsfs) {
2250     PetscCall(PetscSFCreate(comm, newsf));
2251     PetscCall(PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
2252     PetscFunctionReturn(0);
2253   }
2254   PetscCallMPI(MPI_Comm_rank(comm, &rank));
2255 
2256   PetscCall(PetscCalloc1(nsfs+1, &rootOffsets));
2257   if (shareRoots) {
2258     PetscCall(PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL));
2259     if (PetscDefined(USE_DEBUG)) {
2260       for (s=1; s<nsfs; s++) {
2261         PetscInt nr;
2262 
2263         PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2264         PetscCheck(nr == nRoots, comm, PETSC_ERR_ARG_SIZ, "shareRoots = PETSC_TRUE but sfs[%" PetscInt_FMT "] has a different number of roots (%" PetscInt_FMT ") than sfs[0] (%" PetscInt_FMT ")", s, nr, nRoots);
2265       }
2266     }
2267   } else {
2268     for (s=0; s<nsfs; s++) {
2269       PetscInt nr;
2270 
2271       PetscCall(PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL));
2272       rootOffsets[s+1] = rootOffsets[s] + nr;
2273     }
2274     nRoots = rootOffsets[nsfs];
2275   }
2276 
2277   /* Calculate leaf array offsets and automatic root offsets */
2278   PetscCall(PetscMalloc1(nsfs+1,&leafArrayOffsets));
2279   leafArrayOffsets[0] = 0;
2280   for (s=0; s<nsfs; s++) {
2281     PetscInt        nl;
2282 
2283     PetscCall(PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL));
2284     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2285   }
2286   nLeaves = leafArrayOffsets[nsfs];
2287 
2288   if (!leafOffsets) {
2289     all_ilocal_null = PETSC_TRUE;
2290     for (s=0; s<nsfs; s++) {
2291       const PetscInt *ilocal;
2292 
2293       PetscCall(PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL));
2294       if (ilocal) {
2295         all_ilocal_null = PETSC_FALSE;
2296         break;
2297       }
2298     }
2299     PetscCheck(all_ilocal_null, PETSC_COMM_SELF, PETSC_ERR_ARG_NULL, "leafOffsets can be passed as NULL only if all SFs have ilocal = NULL");
2300   }
2301 
2302   /* Renumber and concatenate local leaves */
2303   ilocal_new = NULL;
2304   if (!all_ilocal_null) {
2305     PetscCall(PetscMalloc1(nLeaves, &ilocal_new));
2306     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2307     for (s = 0; s<nsfs; s++) {
2308       const PetscInt   *ilocal;
2309       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2310       PetscInt          i, nleaves_l;
2311 
2312       PetscCall(PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL));
2313       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2314     }
2315   }
2316 
2317   /* Renumber and concatenate remote roots */
2318   PetscCall(PetscMalloc1(nLeaves, &iremote_new));
2319   for (i = 0; i < nLeaves; i++) {
2320     iremote_new[i].rank   = -1;
2321     iremote_new[i].index  = -1;
2322   }
2323   for (s = 0; s<nsfs; s++) {
2324     PetscInt            i, nl, nr;
2325     PetscSF             tmp_sf;
2326     const PetscSFNode  *iremote;
2327     PetscSFNode        *tmp_rootdata;
2328     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2329 
2330     PetscCall(PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote));
2331     PetscCall(PetscSFCreate(comm, &tmp_sf));
2332     /* create helper SF with contiguous leaves */
2333     PetscCall(PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES));
2334     PetscCall(PetscSFSetUp(tmp_sf));
2335     PetscCall(PetscMalloc1(nr, &tmp_rootdata));
2336     for (i = 0; i < nr; i++) {
2337       tmp_rootdata[i].index = i + rootOffsets[s];
2338       tmp_rootdata[i].rank  = (PetscInt) rank;
2339     }
2340     PetscCall(PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2341     PetscCall(PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE));
2342     PetscCall(PetscSFDestroy(&tmp_sf));
2343     PetscCall(PetscFree(tmp_rootdata));
2344   }
2345 
2346   /* Build the new SF */
2347   PetscCall(PetscSFCreate(comm, newsf));
2348   PetscCall(PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER));
2349   PetscCall(PetscSFSetUp(*newsf));
2350   PetscCall(PetscFree(rootOffsets));
2351   PetscCall(PetscFree(leafArrayOffsets));
2352   PetscFunctionReturn(0);
2353 }
2354