xref: /petsc/src/vec/is/sf/interface/sf.c (revision d7cc930e14e615e9907267aaa472dd0ccceeab82)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_HAVE_CUDA)
6   #include <cuda_runtime.h>
7 #endif
8 
9 #if defined(PETSC_HAVE_HIP)
10   #include <hip/hip_runtime.h>
11 #endif
12 
13 #if defined(PETSC_USE_DEBUG)
14 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
15     if (PetscUnlikely(!(sf)->graphset))                              \
16       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
17   } while (0)
18 #else
19 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
20 #endif
21 
22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
23 
24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
25 {
26   PetscFunctionBegin;
27   PetscValidPointer(type,2);
28   *type = PETSC_MEMTYPE_HOST;
29 #if defined(PETSC_HAVE_CUDA)
30   if (PetscCUDAInitialized && data) {
31     cudaError_t                  cerr;
32     struct cudaPointerAttributes attr;
33     enum cudaMemoryType          mtype;
34     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
35     cudaGetLastError(); /* Reset the last error */
36     #if (CUDART_VERSION < 10000)
37       mtype = attr.memoryType;
38     #else
39       mtype = attr.type;
40     #endif
41     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
42   }
43 #endif
44 
45 #if defined(PETSC_HAVE_HIP)
46   if (PetscHIPInitialized && data) {
47     hipError_t                   cerr;
48     struct hipPointerAttribute_t attr;
49     enum hipMemoryType           mtype;
50     cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
51     hipGetLastError(); /* Reset the last error */
52     mtype = attr.memoryType;
53     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
54   }
55 #endif
56   PetscFunctionReturn(0);
57 }
58 
59 /*@
60    PetscSFCreate - create a star forest communication context
61 
62    Collective
63 
64    Input Arguments:
65 .  comm - communicator on which the star forest will operate
66 
67    Output Arguments:
68 .  sf - new star forest context
69 
70    Options Database Keys:
71 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
72 .  -sf_type window    -Use MPI-3 one-sided window for communication
73 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
74 
75    Level: intermediate
76 
77    Notes:
78    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80    SFs are optimized and they have better performance than general SFs.
81 
82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
83 @*/
84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
85 {
86   PetscErrorCode ierr;
87   PetscSF        b;
88 
89   PetscFunctionBegin;
90   PetscValidPointer(sf,2);
91   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
92 
93   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
94 
95   b->nroots    = -1;
96   b->nleaves   = -1;
97   b->minleaf   = PETSC_MAX_INT;
98   b->maxleaf   = PETSC_MIN_INT;
99   b->nranks    = -1;
100   b->rankorder = PETSC_TRUE;
101   b->ingroup   = MPI_GROUP_NULL;
102   b->outgroup  = MPI_GROUP_NULL;
103   b->graphset  = PETSC_FALSE;
104 #if defined(PETSC_HAVE_DEVICE)
105   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
106   b->use_stream_aware_mpi = PETSC_FALSE;
107   b->use_default_stream   = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
108   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
109     b->backend = PETSCSF_BACKEND_KOKKOS;
110   #elif defined(PETSC_HAVE_CUDA)
111     b->backend = PETSCSF_BACKEND_CUDA;
112   #endif
113 #endif
114   *sf = b;
115   PetscFunctionReturn(0);
116 }
117 
118 /*@
119    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
120 
121    Collective
122 
123    Input Arguments:
124 .  sf - star forest
125 
126    Level: advanced
127 
128 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
129 @*/
130 PetscErrorCode PetscSFReset(PetscSF sf)
131 {
132   PetscErrorCode ierr;
133 
134   PetscFunctionBegin;
135   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
136   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
137   sf->nroots   = -1;
138   sf->nleaves  = -1;
139   sf->minleaf  = PETSC_MAX_INT;
140   sf->maxleaf  = PETSC_MIN_INT;
141   sf->mine     = NULL;
142   sf->remote   = NULL;
143   sf->graphset = PETSC_FALSE;
144   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
145   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
146   sf->nranks = -1;
147   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
148 #if defined(PETSC_HAVE_DEVICE)
149   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
150 #endif
151   sf->degreeknown = PETSC_FALSE;
152   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
153   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
154   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
155   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
156   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
157   sf->setupcalled = PETSC_FALSE;
158   PetscFunctionReturn(0);
159 }
160 
161 /*@C
162    PetscSFSetType - Set the PetscSF communication implementation
163 
164    Collective on PetscSF
165 
166    Input Parameters:
167 +  sf - the PetscSF context
168 -  type - a known method
169 
170    Options Database Key:
171 .  -sf_type <type> - Sets the method; use -help for a list
172    of available methods (for instance, window, basic, neighbor)
173 
174    Notes:
175    See "include/petscsf.h" for available methods (for instance)
176 +    PETSCSFWINDOW - MPI-2/3 one-sided
177 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
178 
179   Level: intermediate
180 
181 .seealso: PetscSFType, PetscSFCreate()
182 @*/
183 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
184 {
185   PetscErrorCode ierr,(*r)(PetscSF);
186   PetscBool      match;
187 
188   PetscFunctionBegin;
189   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
190   PetscValidCharPointer(type,2);
191 
192   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
193   if (match) PetscFunctionReturn(0);
194 
195   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
196   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
197   /* Destroy the previous PetscSF implementation context */
198   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
199   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
200   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
201   ierr = (*r)(sf);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206   PetscSFGetType - Get the PetscSF communication implementation
207 
208   Not Collective
209 
210   Input Parameter:
211 . sf  - the PetscSF context
212 
213   Output Parameter:
214 . type - the PetscSF type name
215 
216   Level: intermediate
217 
218 .seealso: PetscSFSetType(), PetscSFCreate()
219 @*/
220 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
221 {
222   PetscFunctionBegin;
223   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
224   PetscValidPointer(type,2);
225   *type = ((PetscObject)sf)->type_name;
226   PetscFunctionReturn(0);
227 }
228 
229 /*@
230    PetscSFDestroy - destroy star forest
231 
232    Collective
233 
234    Input Arguments:
235 .  sf - address of star forest
236 
237    Level: intermediate
238 
239 .seealso: PetscSFCreate(), PetscSFReset()
240 @*/
241 PetscErrorCode PetscSFDestroy(PetscSF *sf)
242 {
243   PetscErrorCode ierr;
244 
245   PetscFunctionBegin;
246   if (!*sf) PetscFunctionReturn(0);
247   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
248   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
249   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
250   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
251   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
256 {
257   PetscInt           i, nleaves;
258   PetscMPIInt        size;
259   const PetscInt    *ilocal;
260   const PetscSFNode *iremote;
261   PetscErrorCode     ierr;
262 
263   PetscFunctionBegin;
264   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
265   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
266   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
267   for (i = 0; i < nleaves; i++) {
268     const PetscInt rank = iremote[i].rank;
269     const PetscInt remote = iremote[i].index;
270     const PetscInt leaf = ilocal ? ilocal[i] : i;
271     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
272     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
273     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 
279 
280 
281 /*@
282    PetscSFSetUp - set up communication structures
283 
284    Collective
285 
286    Input Arguments:
287 .  sf - star forest communication object
288 
289    Level: beginner
290 
291 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
292 @*/
293 PetscErrorCode PetscSFSetUp(PetscSF sf)
294 {
295   PetscErrorCode ierr;
296 
297   PetscFunctionBegin;
298   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
299   PetscSFCheckGraphSet(sf,1);
300   if (sf->setupcalled) PetscFunctionReturn(0);
301   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
302   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
303   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
304   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
305 #if defined(PETSC_HAVE_CUDA)
306   if (sf->backend == PETSCSF_BACKEND_CUDA) {
307     sf->ops->Malloc = PetscSFMalloc_Cuda;
308     sf->ops->Free   = PetscSFFree_Cuda;
309   }
310 #endif
311 
312 #if defined(PETSC_HAVE_KOKKOS)
313   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
314     sf->ops->Malloc = PetscSFMalloc_Kokkos;
315     sf->ops->Free   = PetscSFFree_Kokkos;
316   }
317 #endif
318   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
319   sf->setupcalled = PETSC_TRUE;
320   PetscFunctionReturn(0);
321 }
322 
323 /*@
324    PetscSFSetFromOptions - set PetscSF options using the options database
325 
326    Logically Collective
327 
328    Input Arguments:
329 .  sf - star forest
330 
331    Options Database Keys:
332 +  -sf_type               - implementation type, see PetscSFSetType()
333 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
334 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
335                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
336                             If true, this option only works with -use_cuda_aware_mpi 1.
337 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
338                                If true, this option only works with -use_cuda_aware_mpi 1.
339 
340 -  -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos.
341                               On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices,
342                               the only available is kokkos.
343 
344    Level: intermediate
345 @*/
346 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
347 {
348   PetscSFType    deft;
349   char           type[256];
350   PetscErrorCode ierr;
351   PetscBool      flg;
352 
353   PetscFunctionBegin;
354   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
355   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
356   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
357   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
358   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
359   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
360 #if defined(PETSC_HAVE_DEVICE)
361   {
362     char        backendstr[32] = {0};
363     PetscBool   isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
364     /* Change the defaults set in PetscSFCreate() with command line options */
365     ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
366     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
367     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
368     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
369     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
370   #if defined(PETSC_HAVE_CUDA)
371     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
372     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
373     else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr);
374   #elif defined(PETSC_HAVE_KOKKOS)
375     if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
376   #endif
377   }
378 #endif
379   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
380   ierr = PetscOptionsEnd();CHKERRQ(ierr);
381   PetscFunctionReturn(0);
382 }
383 
384 /*@
385    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
386 
387    Logically Collective
388 
389    Input Arguments:
390 +  sf - star forest
391 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
392 
393    Level: advanced
394 
395 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
396 @*/
397 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
398 {
399   PetscFunctionBegin;
400   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
401   PetscValidLogicalCollectiveBool(sf,flg,2);
402   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
403   sf->rankorder = flg;
404   PetscFunctionReturn(0);
405 }
406 
407 /*@
408    PetscSFSetGraph - Set a parallel star forest
409 
410    Collective
411 
412    Input Arguments:
413 +  sf - star forest
414 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
415 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
416 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
417 during setup in debug mode)
418 .  localmode - copy mode for ilocal
419 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
420 during setup in debug mode)
421 -  remotemode - copy mode for iremote
422 
423    Level: intermediate
424 
425    Notes:
426     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
427 
428    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
429    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
430    needed
431 
432    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
433    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
434 
435 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
436 @*/
437 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
438 {
439   PetscErrorCode ierr;
440 
441   PetscFunctionBegin;
442   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
443   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
444   if (nleaves > 0) PetscValidPointer(iremote,6);
445   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
446   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
447 
448   if (sf->nroots >= 0) { /* Reset only if graph already set */
449     ierr = PetscSFReset(sf);CHKERRQ(ierr);
450   }
451 
452   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
453 
454   sf->nroots  = nroots;
455   sf->nleaves = nleaves;
456 
457   if (nleaves && ilocal) {
458     PetscInt i;
459     PetscInt minleaf = PETSC_MAX_INT;
460     PetscInt maxleaf = PETSC_MIN_INT;
461     int      contiguous = 1;
462     for (i=0; i<nleaves; i++) {
463       minleaf = PetscMin(minleaf,ilocal[i]);
464       maxleaf = PetscMax(maxleaf,ilocal[i]);
465       contiguous &= (ilocal[i] == i);
466     }
467     sf->minleaf = minleaf;
468     sf->maxleaf = maxleaf;
469     if (contiguous) {
470       if (localmode == PETSC_OWN_POINTER) {
471         ierr = PetscFree(ilocal);CHKERRQ(ierr);
472       }
473       ilocal = NULL;
474     }
475   } else {
476     sf->minleaf = 0;
477     sf->maxleaf = nleaves - 1;
478   }
479 
480   if (ilocal) {
481     switch (localmode) {
482     case PETSC_COPY_VALUES:
483       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
484       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
485       sf->mine = sf->mine_alloc;
486       break;
487     case PETSC_OWN_POINTER:
488       sf->mine_alloc = (PetscInt*)ilocal;
489       sf->mine       = sf->mine_alloc;
490       break;
491     case PETSC_USE_POINTER:
492       sf->mine_alloc = NULL;
493       sf->mine       = (PetscInt*)ilocal;
494       break;
495     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
496     }
497   }
498 
499   switch (remotemode) {
500   case PETSC_COPY_VALUES:
501     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
502     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
503     sf->remote = sf->remote_alloc;
504     break;
505   case PETSC_OWN_POINTER:
506     sf->remote_alloc = (PetscSFNode*)iremote;
507     sf->remote       = sf->remote_alloc;
508     break;
509   case PETSC_USE_POINTER:
510     sf->remote_alloc = NULL;
511     sf->remote       = (PetscSFNode*)iremote;
512     break;
513   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
514   }
515 
516   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
517   sf->graphset = PETSC_TRUE;
518   PetscFunctionReturn(0);
519 }
520 
521 /*@
522   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
523 
524   Collective
525 
526   Input Parameters:
527 + sf      - The PetscSF
528 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
529 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
530 
531   Notes:
532   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
533   n and N are local and global sizes of x respectively.
534 
535   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
536   sequential vectors y on all ranks.
537 
538   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
539   sequential vector y on rank 0.
540 
541   In above cases, entries of x are roots and entries of y are leaves.
542 
543   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
544   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
545   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
546   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
547   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
548 
549   In this case, roots and leaves are symmetric.
550 
551   Level: intermediate
552  @*/
553 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
554 {
555   MPI_Comm       comm;
556   PetscInt       n,N,res[2];
557   PetscMPIInt    rank,size;
558   PetscSFType    type;
559   PetscErrorCode ierr;
560 
561   PetscFunctionBegin;
562   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
563   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
564   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
565   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
566 
567   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
568     type = PETSCSFALLTOALL;
569     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
570     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
571     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
572     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
573   } else {
574     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
575     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
576     res[0] = n;
577     res[1] = -n;
578     /* Check if n are same over all ranks so that we can optimize it */
579     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
580     if (res[0] == -res[1]) { /* same n */
581       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
582     } else {
583       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
584     }
585     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
586   }
587   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
588 
589   sf->pattern = pattern;
590   sf->mine    = NULL; /* Contiguous */
591 
592   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
593      Also set other easy stuff.
594    */
595   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
596     sf->nleaves      = N;
597     sf->nroots       = n;
598     sf->nranks       = size;
599     sf->minleaf      = 0;
600     sf->maxleaf      = N - 1;
601   } else if (pattern == PETSCSF_PATTERN_GATHER) {
602     sf->nleaves      = rank ? 0 : N;
603     sf->nroots       = n;
604     sf->nranks       = rank ? 0 : size;
605     sf->minleaf      = 0;
606     sf->maxleaf      = rank ? -1 : N - 1;
607   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
608     sf->nleaves      = size;
609     sf->nroots       = size;
610     sf->nranks       = size;
611     sf->minleaf      = 0;
612     sf->maxleaf      = size - 1;
613   }
614   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
615   sf->graphset = PETSC_TRUE;
616   PetscFunctionReturn(0);
617 }
618 
619 /*@
620    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
621 
622    Collective
623 
624    Input Arguments:
625 
626 .  sf - star forest to invert
627 
628    Output Arguments:
629 .  isf - inverse of sf
630    Level: advanced
631 
632    Notes:
633    All roots must have degree 1.
634 
635    The local space may be a permutation, but cannot be sparse.
636 
637 .seealso: PetscSFSetGraph()
638 @*/
639 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
640 {
641   PetscErrorCode ierr;
642   PetscMPIInt    rank;
643   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
644   const PetscInt *ilocal;
645   PetscSFNode    *roots,*leaves;
646 
647   PetscFunctionBegin;
648   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
649   PetscSFCheckGraphSet(sf,1);
650   PetscValidPointer(isf,2);
651 
652   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
653   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
654 
655   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
656   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
657   for (i=0; i<maxlocal; i++) {
658     leaves[i].rank  = rank;
659     leaves[i].index = i;
660   }
661   for (i=0; i <nroots; i++) {
662     roots[i].rank  = -1;
663     roots[i].index = -1;
664   }
665   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
666   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
667 
668   /* Check whether our leaves are sparse */
669   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
670   if (count == nroots) newilocal = NULL;
671   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
672     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
673     for (i=0,count=0; i<nroots; i++) {
674       if (roots[i].rank >= 0) {
675         newilocal[count]   = i;
676         roots[count].rank  = roots[i].rank;
677         roots[count].index = roots[i].index;
678         count++;
679       }
680     }
681   }
682 
683   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
684   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
685   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 /*@
690    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
691 
692    Collective
693 
694    Input Arguments:
695 +  sf - communication object to duplicate
696 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
697 
698    Output Arguments:
699 .  newsf - new communication object
700 
701    Level: beginner
702 
703 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
704 @*/
705 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
706 {
707   PetscSFType    type;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
712   PetscValidLogicalCollectiveEnum(sf,opt,2);
713   PetscValidPointer(newsf,3);
714   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
715   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
716   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
717   if (opt == PETSCSF_DUPLICATE_GRAPH) {
718     PetscSFCheckGraphSet(sf,1);
719     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
720       PetscInt          nroots,nleaves;
721       const PetscInt    *ilocal;
722       const PetscSFNode *iremote;
723       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
724       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
725     } else {
726       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
727     }
728   }
729 #if defined(PETSC_HAVE_DEVICE)
730   (*newsf)->backend              = sf->backend;
731   (*newsf)->use_default_stream   = sf->use_default_stream;
732   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
733   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
734 #endif
735   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
736   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
737   PetscFunctionReturn(0);
738 }
739 
740 /*@C
741    PetscSFGetGraph - Get the graph specifying a parallel star forest
742 
743    Not Collective
744 
745    Input Arguments:
746 .  sf - star forest
747 
748    Output Arguments:
749 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
750 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
751 .  ilocal - locations of leaves in leafdata buffers
752 -  iremote - remote locations of root vertices for each leaf on the current process
753 
754    Notes:
755    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
756 
757    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
758    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
759 
760    Level: intermediate
761 
762 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
763 @*/
764 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
765 {
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
770   if (sf->ops->GetGraph) {
771     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
772   } else {
773     if (nroots) *nroots = sf->nroots;
774     if (nleaves) *nleaves = sf->nleaves;
775     if (ilocal) *ilocal = sf->mine;
776     if (iremote) *iremote = sf->remote;
777   }
778   PetscFunctionReturn(0);
779 }
780 
781 /*@
782    PetscSFGetLeafRange - Get the active leaf ranges
783 
784    Not Collective
785 
786    Input Arguments:
787 .  sf - star forest
788 
789    Output Arguments:
790 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
791 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
792 
793    Level: developer
794 
795 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
796 @*/
797 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
798 {
799   PetscFunctionBegin;
800   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
801   PetscSFCheckGraphSet(sf,1);
802   if (minleaf) *minleaf = sf->minleaf;
803   if (maxleaf) *maxleaf = sf->maxleaf;
804   PetscFunctionReturn(0);
805 }
806 
807 /*@C
808    PetscSFViewFromOptions - View from Options
809 
810    Collective on PetscSF
811 
812    Input Parameters:
813 +  A - the star forest
814 .  obj - Optional object
815 -  name - command line option
816 
817    Level: intermediate
818 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
819 @*/
820 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
821 {
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
826   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 /*@C
831    PetscSFView - view a star forest
832 
833    Collective
834 
835    Input Arguments:
836 +  sf - star forest
837 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
838 
839    Level: beginner
840 
841 .seealso: PetscSFCreate(), PetscSFSetGraph()
842 @*/
843 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
844 {
845   PetscErrorCode    ierr;
846   PetscBool         iascii;
847   PetscViewerFormat format;
848 
849   PetscFunctionBegin;
850   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
851   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
852   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
853   PetscCheckSameComm(sf,1,viewer,2);
854   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
855   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
856   if (iascii) {
857     PetscMPIInt rank;
858     PetscInt    ii,i,j;
859 
860     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
861     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
862     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
863     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
864       if (!sf->graphset) {
865         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
866         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
867         PetscFunctionReturn(0);
868       }
869       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
870       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
871       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
872       for (i=0; i<sf->nleaves; i++) {
873         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
874       }
875       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
876       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
877       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
878         PetscMPIInt *tmpranks,*perm;
879         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
880         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
881         for (i=0; i<sf->nranks; i++) perm[i] = i;
882         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
883         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
884         for (ii=0; ii<sf->nranks; ii++) {
885           i = perm[ii];
886           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
887           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
888             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
889           }
890         }
891         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
892       }
893       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
894       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
895     }
896     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 /*@C
902    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
903 
904    Not Collective
905 
906    Input Arguments:
907 .  sf - star forest
908 
909    Output Arguments:
910 +  nranks - number of ranks referenced by local part
911 .  ranks - array of ranks
912 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
913 .  rmine - concatenated array holding local indices referencing each remote rank
914 -  rremote - concatenated array holding remote indices referenced for each remote rank
915 
916    Level: developer
917 
918 .seealso: PetscSFGetLeafRanks()
919 @*/
920 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
921 {
922   PetscErrorCode ierr;
923 
924   PetscFunctionBegin;
925   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
926   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
927   if (sf->ops->GetRootRanks) {
928     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
929   } else {
930     /* The generic implementation */
931     if (nranks)  *nranks  = sf->nranks;
932     if (ranks)   *ranks   = sf->ranks;
933     if (roffset) *roffset = sf->roffset;
934     if (rmine)   *rmine   = sf->rmine;
935     if (rremote) *rremote = sf->rremote;
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 /*@C
941    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
942 
943    Not Collective
944 
945    Input Arguments:
946 .  sf - star forest
947 
948    Output Arguments:
949 +  niranks - number of leaf ranks referencing roots on this process
950 .  iranks - array of ranks
951 .  ioffset - offset in irootloc for each rank (length niranks+1)
952 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
953 
954    Level: developer
955 
956 .seealso: PetscSFGetRootRanks()
957 @*/
958 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
959 {
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
964   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
965   if (sf->ops->GetLeafRanks) {
966     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
967   } else {
968     PetscSFType type;
969     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
970     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
971   }
972   PetscFunctionReturn(0);
973 }
974 
975 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
976   PetscInt i;
977   for (i=0; i<n; i++) {
978     if (needle == list[i]) return PETSC_TRUE;
979   }
980   return PETSC_FALSE;
981 }
982 
983 /*@C
984    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
985 
986    Collective
987 
988    Input Arguments:
989 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
990 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
991 
992    Level: developer
993 
994 .seealso: PetscSFGetRootRanks()
995 @*/
996 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
997 {
998   PetscErrorCode     ierr;
999   PetscTable         table;
1000   PetscTablePosition pos;
1001   PetscMPIInt        size,groupsize,*groupranks;
1002   PetscInt           *rcount,*ranks;
1003   PetscInt           i, irank = -1,orank = -1;
1004 
1005   PetscFunctionBegin;
1006   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1007   PetscSFCheckGraphSet(sf,1);
1008   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
1009   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1010   for (i=0; i<sf->nleaves; i++) {
1011     /* Log 1-based rank */
1012     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1013   }
1014   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1015   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1016   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1017   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1018   for (i=0; i<sf->nranks; i++) {
1019     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1020     ranks[i]--;             /* Convert back to 0-based */
1021   }
1022   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1023 
1024   /* We expect that dgroup is reliably "small" while nranks could be large */
1025   {
1026     MPI_Group group = MPI_GROUP_NULL;
1027     PetscMPIInt *dgroupranks;
1028     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1029     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
1030     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1031     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1032     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1033     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
1034     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1035     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1036   }
1037 
1038   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1039   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1040     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1041       if (InList(ranks[i],groupsize,groupranks)) break;
1042     }
1043     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1044       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1045     }
1046     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1047       PetscInt tmprank,tmpcount;
1048 
1049       tmprank             = ranks[i];
1050       tmpcount            = rcount[i];
1051       ranks[i]            = ranks[sf->ndranks];
1052       rcount[i]           = rcount[sf->ndranks];
1053       ranks[sf->ndranks]  = tmprank;
1054       rcount[sf->ndranks] = tmpcount;
1055       sf->ndranks++;
1056     }
1057   }
1058   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1059   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1060   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1061   sf->roffset[0] = 0;
1062   for (i=0; i<sf->nranks; i++) {
1063     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1064     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1065     rcount[i]        = 0;
1066   }
1067   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1068     /* short circuit */
1069     if (orank != sf->remote[i].rank) {
1070       /* Search for index of iremote[i].rank in sf->ranks */
1071       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1072       if (irank < 0) {
1073         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1074         if (irank >= 0) irank += sf->ndranks;
1075       }
1076       orank = sf->remote[i].rank;
1077     }
1078     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1079     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1080     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1081     rcount[irank]++;
1082   }
1083   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /*@C
1088    PetscSFGetGroups - gets incoming and outgoing process groups
1089 
1090    Collective
1091 
1092    Input Argument:
1093 .  sf - star forest
1094 
1095    Output Arguments:
1096 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1097 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1098 
1099    Level: developer
1100 
1101 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1102 @*/
1103 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1104 {
1105   PetscErrorCode ierr;
1106   MPI_Group      group = MPI_GROUP_NULL;
1107 
1108   PetscFunctionBegin;
1109   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1110   if (sf->ingroup == MPI_GROUP_NULL) {
1111     PetscInt       i;
1112     const PetscInt *indegree;
1113     PetscMPIInt    rank,*outranks,*inranks;
1114     PetscSFNode    *remote;
1115     PetscSF        bgcount;
1116 
1117     /* Compute the number of incoming ranks */
1118     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1119     for (i=0; i<sf->nranks; i++) {
1120       remote[i].rank  = sf->ranks[i];
1121       remote[i].index = 0;
1122     }
1123     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1124     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1125     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1126     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1127     /* Enumerate the incoming ranks */
1128     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1129     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1130     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1131     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1132     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1133     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1134     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1135     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1136     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1137     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1138   }
1139   *incoming = sf->ingroup;
1140 
1141   if (sf->outgroup == MPI_GROUP_NULL) {
1142     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1143     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1144     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1145   }
1146   *outgoing = sf->outgroup;
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 /*@
1151    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1152 
1153    Collective
1154 
1155    Input Argument:
1156 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1157 
1158    Output Arguments:
1159 .  multi - star forest with split roots, such that each root has degree exactly 1
1160 
1161    Level: developer
1162 
1163    Notes:
1164 
1165    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1166    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1167    edge, it is a candidate for future optimization that might involve its removal.
1168 
1169 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1170 @*/
1171 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1172 {
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1177   PetscValidPointer(multi,2);
1178   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1179     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1180     *multi = sf->multi;
1181     PetscFunctionReturn(0);
1182   }
1183   if (!sf->multi) {
1184     const PetscInt *indegree;
1185     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1186     PetscSFNode    *remote;
1187     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1188     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1189     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1190     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1191     inoffset[0] = 0;
1192     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1193     for (i=0; i<maxlocal; i++) outones[i] = 1;
1194     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1195     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1196     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1197     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1198       for (i=0; i<sf->nroots; i++) {
1199         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1200       }
1201     }
1202     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1203     for (i=0; i<sf->nleaves; i++) {
1204       remote[i].rank  = sf->remote[i].rank;
1205       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1206     }
1207     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1208     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1209     if (sf->rankorder) {        /* Sort the ranks */
1210       PetscMPIInt rank;
1211       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1212       PetscSFNode *newremote;
1213       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1214       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1215       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1216       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1217       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1218       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1219       /* Sort the incoming ranks at each vertex, build the inverse map */
1220       for (i=0; i<sf->nroots; i++) {
1221         PetscInt j;
1222         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1223         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1224         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1225       }
1226       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1227       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1228       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1229       for (i=0; i<sf->nleaves; i++) {
1230         newremote[i].rank  = sf->remote[i].rank;
1231         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1232       }
1233       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1234       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1235     }
1236     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1237   }
1238   *multi = sf->multi;
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 /*@C
1243    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1244 
1245    Collective
1246 
1247    Input Arguments:
1248 +  sf - original star forest
1249 .  nselected  - number of selected roots on this process
1250 -  selected   - indices of the selected roots on this process
1251 
1252    Output Arguments:
1253 .  esf - new star forest
1254 
1255    Level: advanced
1256 
1257    Note:
1258    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1259    be done by calling PetscSFGetGraph().
1260 
1261 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1262 @*/
1263 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1264 {
1265   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1266   const PetscInt    *ilocal;
1267   signed char       *rootdata,*leafdata,*leafmem;
1268   const PetscSFNode *iremote;
1269   PetscSFNode       *new_iremote;
1270   MPI_Comm          comm;
1271   PetscErrorCode    ierr;
1272 
1273   PetscFunctionBegin;
1274   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1275   PetscSFCheckGraphSet(sf,1);
1276   if (nselected) PetscValidPointer(selected,3);
1277   PetscValidPointer(esf,4);
1278 
1279   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1280   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1281   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1282   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1283 
1284   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1285     PetscBool dups;
1286     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1287     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1288     for (i=0; i<nselected; i++)
1289       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1290   }
1291 
1292   if (sf->ops->CreateEmbeddedSF) {
1293     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1294   } else {
1295     /* A generic version of creating embedded sf */
1296     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1297     maxlocal = maxleaf - minleaf + 1;
1298     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1299     leafdata = leafmem - minleaf;
1300     /* Tag selected roots and bcast to leaves */
1301     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1302     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1303     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1304 
1305     /* Build esf with leaves that are still connected */
1306     esf_nleaves = 0;
1307     for (i=0; i<nleaves; i++) {
1308       j = ilocal ? ilocal[i] : i;
1309       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1310          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1311       */
1312       esf_nleaves += (leafdata[j] ? 1 : 0);
1313     }
1314     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1315     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1316     for (i=n=0; i<nleaves; i++) {
1317       j = ilocal ? ilocal[i] : i;
1318       if (leafdata[j]) {
1319         new_ilocal[n]        = j;
1320         new_iremote[n].rank  = iremote[i].rank;
1321         new_iremote[n].index = iremote[i].index;
1322         ++n;
1323       }
1324     }
1325     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1326     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1327     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1328     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1329   }
1330   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 /*@C
1335   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1336 
1337   Collective
1338 
1339   Input Arguments:
1340 + sf - original star forest
1341 . nselected  - number of selected leaves on this process
1342 - selected   - indices of the selected leaves on this process
1343 
1344   Output Arguments:
1345 .  newsf - new star forest
1346 
1347   Level: advanced
1348 
1349 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1350 @*/
1351 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1352 {
1353   const PetscSFNode *iremote;
1354   PetscSFNode       *new_iremote;
1355   const PetscInt    *ilocal;
1356   PetscInt          i,nroots,*leaves,*new_ilocal;
1357   MPI_Comm          comm;
1358   PetscErrorCode    ierr;
1359 
1360   PetscFunctionBegin;
1361   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1362   PetscSFCheckGraphSet(sf,1);
1363   if (nselected) PetscValidPointer(selected,3);
1364   PetscValidPointer(newsf,4);
1365 
1366   /* Uniq selected[] and put results in leaves[] */
1367   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1368   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1369   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1370   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1371   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1372 
1373   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1374   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1375     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1378     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1379     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1380     for (i=0; i<nselected; ++i) {
1381       const PetscInt l     = leaves[i];
1382       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1383       new_iremote[i].rank  = iremote[l].rank;
1384       new_iremote[i].index = iremote[l].index;
1385     }
1386     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1387     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1388   }
1389   ierr = PetscFree(leaves);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /*@C
1394    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1395 
1396    Collective on PetscSF
1397 
1398    Input Arguments:
1399 +  sf - star forest on which to communicate
1400 .  unit - data type associated with each node
1401 .  rootdata - buffer to broadcast
1402 -  op - operation to use for reduction
1403 
1404    Output Arguments:
1405 .  leafdata - buffer to be reduced with values from each leaf's respective root
1406 
1407    Level: intermediate
1408 
1409 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1410 @*/
1411 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1412 {
1413   PetscErrorCode ierr;
1414   PetscMemType   rootmtype,leafmtype;
1415 
1416   PetscFunctionBegin;
1417   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1418   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1419   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1420   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1421   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1422   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1423   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 /*@C
1428    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1429 
1430    Collective
1431 
1432    Input Arguments:
1433 +  sf - star forest
1434 .  unit - data type
1435 .  rootdata - buffer to broadcast
1436 -  op - operation to use for reduction
1437 
1438    Output Arguments:
1439 .  leafdata - buffer to be reduced with values from each leaf's respective root
1440 
1441    Level: intermediate
1442 
1443 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1444 @*/
1445 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1446 {
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1451   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1452   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1453   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 /*@C
1458    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1459 
1460    Collective
1461 
1462    Input Arguments:
1463 +  sf - star forest
1464 .  unit - data type
1465 .  leafdata - values to reduce
1466 -  op - reduction operation
1467 
1468    Output Arguments:
1469 .  rootdata - result of reduction of values from all leaves of each root
1470 
1471    Level: intermediate
1472 
1473 .seealso: PetscSFBcastBegin()
1474 @*/
1475 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1476 {
1477   PetscErrorCode ierr;
1478   PetscMemType   rootmtype,leafmtype;
1479 
1480   PetscFunctionBegin;
1481   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1482   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1483   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1484   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1485   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1486   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1487   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 /*@C
1492    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1493 
1494    Collective
1495 
1496    Input Arguments:
1497 +  sf - star forest
1498 .  unit - data type
1499 .  leafdata - values to reduce
1500 -  op - reduction operation
1501 
1502    Output Arguments:
1503 .  rootdata - result of reduction of values from all leaves of each root
1504 
1505    Level: intermediate
1506 
1507 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1508 @*/
1509 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1510 {
1511   PetscErrorCode ierr;
1512 
1513   PetscFunctionBegin;
1514   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1515   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1516   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1517   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 /*@C
1522    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1523 
1524    Collective
1525 
1526    Input Arguments:
1527 +  sf - star forest
1528 .  unit - data type
1529 .  leafdata - leaf values to use in reduction
1530 -  op - operation to use for reduction
1531 
1532    Output Arguments:
1533 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1534 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1535 
1536    Level: advanced
1537 
1538    Note:
1539    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1540    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1541    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1542    integers.
1543 
1544 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1545 @*/
1546 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1547 {
1548   PetscErrorCode ierr;
1549   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1550 
1551   PetscFunctionBegin;
1552   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1553   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1554   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1555   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1556   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1557   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1558   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1559   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1560   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 /*@C
1565    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1566 
1567    Collective
1568 
1569    Input Arguments:
1570 +  sf - star forest
1571 .  unit - data type
1572 .  leafdata - leaf values to use in reduction
1573 -  op - operation to use for reduction
1574 
1575    Output Arguments:
1576 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1577 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1578 
1579    Level: advanced
1580 
1581 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1582 @*/
1583 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1584 {
1585   PetscErrorCode ierr;
1586 
1587   PetscFunctionBegin;
1588   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1589   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1590   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1591   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 /*@C
1596    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1597 
1598    Collective
1599 
1600    Input Arguments:
1601 .  sf - star forest
1602 
1603    Output Arguments:
1604 .  degree - degree of each root vertex
1605 
1606    Level: advanced
1607 
1608    Notes:
1609    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1610 
1611 .seealso: PetscSFGatherBegin()
1612 @*/
1613 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1614 {
1615   PetscErrorCode ierr;
1616 
1617   PetscFunctionBegin;
1618   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1619   PetscSFCheckGraphSet(sf,1);
1620   PetscValidPointer(degree,2);
1621   if (!sf->degreeknown) {
1622     PetscInt i, nroots = sf->nroots, maxlocal;
1623     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1624     maxlocal = sf->maxleaf-sf->minleaf+1;
1625     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1626     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1627     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1628     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1629     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1630   }
1631   *degree = NULL;
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 /*@C
1636    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1637 
1638    Collective
1639 
1640    Input Arguments:
1641 .  sf - star forest
1642 
1643    Output Arguments:
1644 .  degree - degree of each root vertex
1645 
1646    Level: developer
1647 
1648    Notes:
1649    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1650 
1651 .seealso:
1652 @*/
1653 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1654 {
1655   PetscErrorCode ierr;
1656 
1657   PetscFunctionBegin;
1658   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1659   PetscSFCheckGraphSet(sf,1);
1660   PetscValidPointer(degree,2);
1661   if (!sf->degreeknown) {
1662     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1663     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1664     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1665     sf->degreeknown = PETSC_TRUE;
1666   }
1667   *degree = sf->degree;
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 
1672 /*@C
1673    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1674    Each multi-root is assigned index of the corresponding original root.
1675 
1676    Collective
1677 
1678    Input Arguments:
1679 +  sf - star forest
1680 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1681 
1682    Output Arguments:
1683 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1684 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1685 
1686    Level: developer
1687 
1688    Notes:
1689    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1690 
1691 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1692 @*/
1693 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1694 {
1695   PetscSF             msf;
1696   PetscInt            i, j, k, nroots, nmroots;
1697   PetscErrorCode      ierr;
1698 
1699   PetscFunctionBegin;
1700   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1701   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1702   if (nroots) PetscValidIntPointer(degree,2);
1703   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1704   PetscValidPointer(multiRootsOrigNumbering,4);
1705   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1706   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1707   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1708   for (i=0,j=0,k=0; i<nroots; i++) {
1709     if (!degree[i]) continue;
1710     for (j=0; j<degree[i]; j++,k++) {
1711       (*multiRootsOrigNumbering)[k] = i;
1712     }
1713   }
1714   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1715   if (nMultiRoots) *nMultiRoots = nmroots;
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 /*@C
1720    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1721 
1722    Collective
1723 
1724    Input Arguments:
1725 +  sf - star forest
1726 .  unit - data type
1727 -  leafdata - leaf data to gather to roots
1728 
1729    Output Argument:
1730 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1731 
1732    Level: intermediate
1733 
1734 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1735 @*/
1736 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1737 {
1738   PetscErrorCode ierr;
1739   PetscSF        multi = NULL;
1740 
1741   PetscFunctionBegin;
1742   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1743   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1744   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1745   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 /*@C
1750    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1751 
1752    Collective
1753 
1754    Input Arguments:
1755 +  sf - star forest
1756 .  unit - data type
1757 -  leafdata - leaf data to gather to roots
1758 
1759    Output Argument:
1760 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1761 
1762    Level: intermediate
1763 
1764 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1765 @*/
1766 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1767 {
1768   PetscErrorCode ierr;
1769   PetscSF        multi = NULL;
1770 
1771   PetscFunctionBegin;
1772   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1773   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1774   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 /*@C
1779    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1780 
1781    Collective
1782 
1783    Input Arguments:
1784 +  sf - star forest
1785 .  unit - data type
1786 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1787 
1788    Output Argument:
1789 .  leafdata - leaf data to be update with personal data from each respective root
1790 
1791    Level: intermediate
1792 
1793 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1794 @*/
1795 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1796 {
1797   PetscErrorCode ierr;
1798   PetscSF        multi = NULL;
1799 
1800   PetscFunctionBegin;
1801   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1802   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1803   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1804   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 /*@C
1809    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1810 
1811    Collective
1812 
1813    Input Arguments:
1814 +  sf - star forest
1815 .  unit - data type
1816 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1817 
1818    Output Argument:
1819 .  leafdata - leaf data to be update with personal data from each respective root
1820 
1821    Level: intermediate
1822 
1823 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1824 @*/
1825 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1826 {
1827   PetscErrorCode ierr;
1828   PetscSF        multi = NULL;
1829 
1830   PetscFunctionBegin;
1831   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1832   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1833   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1838 {
1839   PetscInt        i, n, nleaves;
1840   const PetscInt *ilocal = NULL;
1841   PetscHSetI      seen;
1842   PetscErrorCode  ierr;
1843 
1844   PetscFunctionBegin;
1845   if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
1846   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1847   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1848   for (i = 0; i < nleaves; i++) {
1849     const PetscInt leaf = ilocal ? ilocal[i] : i;
1850     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1851   }
1852   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1853   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1854   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 /*@
1859   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1860 
1861   Input Parameters:
1862 + sfA - The first PetscSF
1863 - sfB - The second PetscSF
1864 
1865   Output Parameters:
1866 . sfBA - The composite SF
1867 
1868   Level: developer
1869 
1870   Notes:
1871   Currently, the two SFs must be defined on congruent communicators and they must be true star
1872   forests, i.e. the same leaf is not connected with different roots.
1873 
1874   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1875   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1876   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1877   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1878 
1879 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1880 @*/
1881 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1882 {
1883   PetscErrorCode    ierr;
1884   const PetscSFNode *remotePointsA,*remotePointsB;
1885   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1886   const PetscInt    *localPointsA,*localPointsB;
1887   PetscInt          *localPointsBA;
1888   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1889   PetscBool         denseB;
1890 
1891   PetscFunctionBegin;
1892   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1893   PetscSFCheckGraphSet(sfA,1);
1894   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1895   PetscSFCheckGraphSet(sfB,2);
1896   PetscCheckSameComm(sfA,1,sfB,2);
1897   PetscValidPointer(sfBA,3);
1898   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1899   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1900 
1901   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1902   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1903   if (localPointsA) {
1904     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1905     for (i=0; i<numRootsB; i++) {
1906       reorderedRemotePointsA[i].rank = -1;
1907       reorderedRemotePointsA[i].index = -1;
1908     }
1909     for (i=0; i<numLeavesA; i++) {
1910       if (localPointsA[i] >= numRootsB) continue;
1911       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1912     }
1913     remotePointsA = reorderedRemotePointsA;
1914   }
1915   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1916   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1917   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1918   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1919   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1920 
1921   denseB = (PetscBool)!localPointsB;
1922   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1923     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1924     else numLeavesBA++;
1925   }
1926   if (denseB) {
1927     localPointsBA  = NULL;
1928     remotePointsBA = leafdataB;
1929   } else {
1930     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
1931     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
1932     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1933       const PetscInt l = localPointsB ? localPointsB[i] : i;
1934 
1935       if (leafdataB[l-minleaf].rank == -1) continue;
1936       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1937       localPointsBA[numLeavesBA] = l;
1938       numLeavesBA++;
1939     }
1940     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
1941   }
1942   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1943   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
1944   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 /*@
1949   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1950 
1951   Input Parameters:
1952 + sfA - The first PetscSF
1953 - sfB - The second PetscSF
1954 
1955   Output Parameters:
1956 . sfBA - The composite SF.
1957 
1958   Level: developer
1959 
1960   Notes:
1961   Currently, the two SFs must be defined on congruent communicators and they must be true star
1962   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1963   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1964 
1965   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1966   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1967   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1968   a Reduce on sfB, on connected roots.
1969 
1970 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1971 @*/
1972 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1973 {
1974   PetscErrorCode    ierr;
1975   const PetscSFNode *remotePointsA,*remotePointsB;
1976   PetscSFNode       *remotePointsBA;
1977   const PetscInt    *localPointsA,*localPointsB;
1978   PetscSFNode       *reorderedRemotePointsA = NULL;
1979   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1980   MPI_Op            op;
1981 #if defined(PETSC_USE_64BIT_INDICES)
1982   PetscBool         iswin;
1983 #endif
1984 
1985   PetscFunctionBegin;
1986   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1987   PetscSFCheckGraphSet(sfA,1);
1988   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1989   PetscSFCheckGraphSet(sfB,2);
1990   PetscCheckSameComm(sfA,1,sfB,2);
1991   PetscValidPointer(sfBA,3);
1992   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1993   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1994 
1995   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1996   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1997 
1998   /* TODO: Check roots of sfB have degree of 1 */
1999   /* Once we implement it, we can replace the MPI_MAXLOC
2000      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
2001      We use MPI_MAXLOC only to have a deterministic output from this routine if
2002      the root condition is not meet.
2003    */
2004   op = MPI_MAXLOC;
2005 #if defined(PETSC_USE_64BIT_INDICES)
2006   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2007   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2008   if (iswin) op = MPIU_REPLACE;
2009 #endif
2010 
2011   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2012   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2013   for (i=0; i<maxleaf - minleaf + 1; i++) {
2014     reorderedRemotePointsA[i].rank = -1;
2015     reorderedRemotePointsA[i].index = -1;
2016   }
2017   if (localPointsA) {
2018     for (i=0; i<numLeavesA; i++) {
2019       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2020       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2021     }
2022   } else {
2023     for (i=0; i<numLeavesA; i++) {
2024       if (i > maxleaf || i < minleaf) continue;
2025       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2026     }
2027   }
2028 
2029   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2030   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2031   for (i=0; i<numRootsB; i++) {
2032     remotePointsBA[i].rank = -1;
2033     remotePointsBA[i].index = -1;
2034   }
2035 
2036   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2037   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2038   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2039   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2040     if (remotePointsBA[i].rank == -1) continue;
2041     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2042     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2043     localPointsBA[numLeavesBA] = i;
2044     numLeavesBA++;
2045   }
2046   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2047   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2048   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 /*
2053   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2054 
2055   Input Parameters:
2056 . sf - The global PetscSF
2057 
2058   Output Parameters:
2059 . out - The local PetscSF
2060  */
2061 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2062 {
2063   MPI_Comm           comm;
2064   PetscMPIInt        myrank;
2065   const PetscInt     *ilocal;
2066   const PetscSFNode  *iremote;
2067   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2068   PetscSFNode        *liremote;
2069   PetscSF            lsf;
2070   PetscErrorCode     ierr;
2071 
2072   PetscFunctionBegin;
2073   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2074   if (sf->ops->CreateLocalSF) {
2075     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2076   } else {
2077     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2078     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2079     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
2080 
2081     /* Find out local edges and build a local SF */
2082     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2083     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2084     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2085     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2086 
2087     for (i=j=0; i<nleaves; i++) {
2088       if (iremote[i].rank == (PetscInt)myrank) {
2089         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2090         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2091         liremote[j].index = iremote[i].index;
2092         j++;
2093       }
2094     }
2095     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2096     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2097     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2098     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2099     *out = lsf;
2100   }
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2105 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2106 {
2107   PetscErrorCode     ierr;
2108   PetscMemType       rootmtype,leafmtype;
2109 
2110   PetscFunctionBegin;
2111   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2112   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2113   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2114   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2115   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2116   if (sf->ops->BcastToZero) {
2117     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2118   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2119   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123