xref: /petsc/src/vec/is/sf/interface/sf.c (revision d5c9c0c4eebc2f2a01a1bd0c86fca87e2acd2a03)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_USE_DEBUG)
6 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
7     if (PetscUnlikely(!(sf)->graphset))                              \
8       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9   } while (0)
10 #else
11 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12 #endif
13 
14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
15 
16 /*@
17    PetscSFCreate - create a star forest communication context
18 
19    Collective
20 
21    Input Arguments:
22 .  comm - communicator on which the star forest will operate
23 
24    Output Arguments:
25 .  sf - new star forest context
26 
27    Options Database Keys:
28 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
29 .  -sf_type window    -Use MPI-3 one-sided window for communication
30 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
31 
32    Level: intermediate
33 
34    Notes:
35    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37    SFs are optimized and they have better performance than general SFs.
38 
39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40 @*/
41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42 {
43   PetscErrorCode ierr;
44   PetscSF        b;
45 
46   PetscFunctionBegin;
47   PetscValidPointer(sf,2);
48   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
49 
50   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
51 
52   b->nroots    = -1;
53   b->nleaves   = -1;
54   b->minleaf   = PETSC_MAX_INT;
55   b->maxleaf   = PETSC_MIN_INT;
56   b->nranks    = -1;
57   b->rankorder = PETSC_TRUE;
58   b->ingroup   = MPI_GROUP_NULL;
59   b->outgroup  = MPI_GROUP_NULL;
60   b->graphset  = PETSC_FALSE;
61 
62   *sf = b;
63   PetscFunctionReturn(0);
64 }
65 
66 /*@
67    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
68 
69    Collective
70 
71    Input Arguments:
72 .  sf - star forest
73 
74    Level: advanced
75 
76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77 @*/
78 PetscErrorCode PetscSFReset(PetscSF sf)
79 {
80   PetscErrorCode ierr;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
84   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
85   sf->nroots   = -1;
86   sf->nleaves  = -1;
87   sf->minleaf  = PETSC_MAX_INT;
88   sf->maxleaf  = PETSC_MIN_INT;
89   sf->mine     = NULL;
90   sf->remote   = NULL;
91   sf->graphset = PETSC_FALSE;
92   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
93   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
94   sf->nranks = -1;
95   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
96 #if defined(PETSC_HAVE_CUDA)
97   {
98   PetscInt  i;
99   for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}}
100   }
101 #endif
102   sf->degreeknown = PETSC_FALSE;
103   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
104   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
105   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
106   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
107   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
108   sf->setupcalled = PETSC_FALSE;
109   PetscFunctionReturn(0);
110 }
111 
112 /*@C
113    PetscSFSetType - Set the PetscSF communication implementation
114 
115    Collective on PetscSF
116 
117    Input Parameters:
118 +  sf - the PetscSF context
119 -  type - a known method
120 
121    Options Database Key:
122 .  -sf_type <type> - Sets the method; use -help for a list
123    of available methods (for instance, window, basic, neighbor)
124 
125    Notes:
126    See "include/petscsf.h" for available methods (for instance)
127 +    PETSCSFWINDOW - MPI-2/3 one-sided
128 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
129 
130   Level: intermediate
131 
132 .seealso: PetscSFType, PetscSFCreate()
133 @*/
134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
135 {
136   PetscErrorCode ierr,(*r)(PetscSF);
137   PetscBool      match;
138 
139   PetscFunctionBegin;
140   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
141   PetscValidCharPointer(type,2);
142 
143   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
144   if (match) PetscFunctionReturn(0);
145 
146   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
147   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
148   /* Destroy the previous PetscSF implementation context */
149   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
150   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
151   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
152   ierr = (*r)(sf);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 /*@C
157   PetscSFGetType - Get the PetscSF communication implementation
158 
159   Not Collective
160 
161   Input Parameter:
162 . sf  - the PetscSF context
163 
164   Output Parameter:
165 . type - the PetscSF type name
166 
167   Level: intermediate
168 
169 .seealso: PetscSFSetType(), PetscSFCreate()
170 @*/
171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
172 {
173   PetscFunctionBegin;
174   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
175   PetscValidPointer(type,2);
176   *type = ((PetscObject)sf)->type_name;
177   PetscFunctionReturn(0);
178 }
179 
180 /*@
181    PetscSFDestroy - destroy star forest
182 
183    Collective
184 
185    Input Arguments:
186 .  sf - address of star forest
187 
188    Level: intermediate
189 
190 .seealso: PetscSFCreate(), PetscSFReset()
191 @*/
192 PetscErrorCode PetscSFDestroy(PetscSF *sf)
193 {
194   PetscErrorCode ierr;
195 
196   PetscFunctionBegin;
197   if (!*sf) PetscFunctionReturn(0);
198   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
199   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
200   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
201   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
202   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
207 {
208   PetscInt           i, nleaves;
209   PetscMPIInt        size;
210   const PetscInt    *ilocal;
211   const PetscSFNode *iremote;
212   PetscErrorCode     ierr;
213 
214   PetscFunctionBegin;
215   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
216   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
217   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
218   for (i = 0; i < nleaves; i++) {
219     const PetscInt rank = iremote[i].rank;
220     const PetscInt remote = iremote[i].index;
221     const PetscInt leaf = ilocal ? ilocal[i] : i;
222     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
223     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
224     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
225   }
226   PetscFunctionReturn(0);
227 }
228 
229 /*@
230    PetscSFSetUp - set up communication structures
231 
232    Collective
233 
234    Input Arguments:
235 .  sf - star forest communication object
236 
237    Level: beginner
238 
239 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
240 @*/
241 PetscErrorCode PetscSFSetUp(PetscSF sf)
242 {
243   PetscErrorCode ierr;
244 
245   PetscFunctionBegin;
246   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
247   PetscSFCheckGraphSet(sf,1);
248   if (sf->setupcalled) PetscFunctionReturn(0);
249   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
250   sf->use_gpu_aware_mpi = use_gpu_aware_mpi;
251   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
252   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
253   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
254   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
255   sf->setupcalled = PETSC_TRUE;
256   PetscFunctionReturn(0);
257 }
258 
259 /*@
260    PetscSFSetFromOptions - set PetscSF options using the options database
261 
262    Logically Collective
263 
264    Input Arguments:
265 .  sf - star forest
266 
267    Options Database Keys:
268 +  -sf_type               - implementation type, see PetscSFSetType()
269 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
270 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
271                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
272                             If true, this option only works with -use_cuda_aware_mpi 1.
273 -  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
274                                If true, this option only works with -use_cuda_aware_mpi 1.
275 
276    Level: intermediate
277 @*/
278 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
279 {
280   PetscSFType    deft;
281   char           type[256];
282   PetscErrorCode ierr;
283   PetscBool      flg;
284 
285   PetscFunctionBegin;
286   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
287   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
288   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
289   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
290   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
291   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
292 
293 #if defined(PETSC_HAVE_CUDA)
294   sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
295   ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
296   sf->use_stream_aware_mpi = PETSC_FALSE;
297   ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
298 #endif
299   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
300   ierr = PetscOptionsEnd();CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 /*@
305    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
306 
307    Logically Collective
308 
309    Input Arguments:
310 +  sf - star forest
311 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
312 
313    Level: advanced
314 
315 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
316 @*/
317 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
318 {
319   PetscFunctionBegin;
320   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
321   PetscValidLogicalCollectiveBool(sf,flg,2);
322   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
323   sf->rankorder = flg;
324   PetscFunctionReturn(0);
325 }
326 
327 /*@
328    PetscSFSetGraph - Set a parallel star forest
329 
330    Collective
331 
332    Input Arguments:
333 +  sf - star forest
334 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
335 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
336 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
337 during setup in debug mode)
338 .  localmode - copy mode for ilocal
339 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
340 during setup in debug mode)
341 -  remotemode - copy mode for iremote
342 
343    Level: intermediate
344 
345    Notes:
346     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
347 
348    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
349    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
350    needed
351 
352    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
353    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
354 
355 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
356 @*/
357 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
358 {
359   PetscErrorCode ierr;
360 
361   PetscFunctionBegin;
362   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
363   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
364   if (nleaves > 0) PetscValidPointer(iremote,6);
365   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
366   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
367 
368   if (sf->nroots >= 0) { /* Reset only if graph already set */
369     ierr = PetscSFReset(sf);CHKERRQ(ierr);
370   }
371 
372   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
373 
374   sf->nroots  = nroots;
375   sf->nleaves = nleaves;
376 
377   if (nleaves && ilocal) {
378     PetscInt i;
379     PetscInt minleaf = PETSC_MAX_INT;
380     PetscInt maxleaf = PETSC_MIN_INT;
381     int      contiguous = 1;
382     for (i=0; i<nleaves; i++) {
383       minleaf = PetscMin(minleaf,ilocal[i]);
384       maxleaf = PetscMax(maxleaf,ilocal[i]);
385       contiguous &= (ilocal[i] == i);
386     }
387     sf->minleaf = minleaf;
388     sf->maxleaf = maxleaf;
389     if (contiguous) {
390       if (localmode == PETSC_OWN_POINTER) {
391         ierr = PetscFree(ilocal);CHKERRQ(ierr);
392       }
393       ilocal = NULL;
394     }
395   } else {
396     sf->minleaf = 0;
397     sf->maxleaf = nleaves - 1;
398   }
399 
400   if (ilocal) {
401     switch (localmode) {
402     case PETSC_COPY_VALUES:
403       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
404       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
405       sf->mine = sf->mine_alloc;
406       break;
407     case PETSC_OWN_POINTER:
408       sf->mine_alloc = (PetscInt*)ilocal;
409       sf->mine       = sf->mine_alloc;
410       break;
411     case PETSC_USE_POINTER:
412       sf->mine_alloc = NULL;
413       sf->mine       = (PetscInt*)ilocal;
414       break;
415     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
416     }
417   }
418 
419   switch (remotemode) {
420   case PETSC_COPY_VALUES:
421     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
422     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
423     sf->remote = sf->remote_alloc;
424     break;
425   case PETSC_OWN_POINTER:
426     sf->remote_alloc = (PetscSFNode*)iremote;
427     sf->remote       = sf->remote_alloc;
428     break;
429   case PETSC_USE_POINTER:
430     sf->remote_alloc = NULL;
431     sf->remote       = (PetscSFNode*)iremote;
432     break;
433   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
434   }
435 
436   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
437   sf->graphset = PETSC_TRUE;
438   PetscFunctionReturn(0);
439 }
440 
441 /*@
442   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
443 
444   Collective
445 
446   Input Parameters:
447 + sf      - The PetscSF
448 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
449 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
450 
451   Notes:
452   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
453   n and N are local and global sizes of x respectively.
454 
455   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
456   sequential vectors y on all ranks.
457 
458   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
459   sequential vector y on rank 0.
460 
461   In above cases, entries of x are roots and entries of y are leaves.
462 
463   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
464   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
465   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
466   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
467   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
468 
469   In this case, roots and leaves are symmetric.
470 
471   Level: intermediate
472  @*/
473 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
474 {
475   MPI_Comm       comm;
476   PetscInt       n,N,res[2];
477   PetscMPIInt    rank,size;
478   PetscSFType    type;
479   PetscErrorCode ierr;
480 
481   PetscFunctionBegin;
482   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
483   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
484   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
485   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
486 
487   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
488     type = PETSCSFALLTOALL;
489     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
490     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
491     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
492     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
493   } else {
494     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
495     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
496     res[0] = n;
497     res[1] = -n;
498     /* Check if n are same over all ranks so that we can optimize it */
499     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
500     if (res[0] == -res[1]) { /* same n */
501       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
502     } else {
503       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
504     }
505     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
506   }
507   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
508 
509   sf->pattern = pattern;
510   sf->mine    = NULL; /* Contiguous */
511 
512   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
513      Also set other easy stuff.
514    */
515   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
516     sf->nleaves      = N;
517     sf->nroots       = n;
518     sf->nranks       = size;
519     sf->minleaf      = 0;
520     sf->maxleaf      = N - 1;
521   } else if (pattern == PETSCSF_PATTERN_GATHER) {
522     sf->nleaves      = rank ? 0 : N;
523     sf->nroots       = n;
524     sf->nranks       = rank ? 0 : size;
525     sf->minleaf      = 0;
526     sf->maxleaf      = rank ? -1 : N - 1;
527   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
528     sf->nleaves      = size;
529     sf->nroots       = size;
530     sf->nranks       = size;
531     sf->minleaf      = 0;
532     sf->maxleaf      = size - 1;
533   }
534   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
535   sf->graphset = PETSC_TRUE;
536   PetscFunctionReturn(0);
537 }
538 
539 /*@
540    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
541 
542    Collective
543 
544    Input Arguments:
545 
546 .  sf - star forest to invert
547 
548    Output Arguments:
549 .  isf - inverse of sf
550    Level: advanced
551 
552    Notes:
553    All roots must have degree 1.
554 
555    The local space may be a permutation, but cannot be sparse.
556 
557 .seealso: PetscSFSetGraph()
558 @*/
559 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
560 {
561   PetscErrorCode ierr;
562   PetscMPIInt    rank;
563   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
564   const PetscInt *ilocal;
565   PetscSFNode    *roots,*leaves;
566 
567   PetscFunctionBegin;
568   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
569   PetscSFCheckGraphSet(sf,1);
570   PetscValidPointer(isf,2);
571 
572   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
573   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
574 
575   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
576   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
577   for (i=0; i<maxlocal; i++) {
578     leaves[i].rank  = rank;
579     leaves[i].index = i;
580   }
581   for (i=0; i <nroots; i++) {
582     roots[i].rank  = -1;
583     roots[i].index = -1;
584   }
585   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
586   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
587 
588   /* Check whether our leaves are sparse */
589   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
590   if (count == nroots) newilocal = NULL;
591   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
592     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
593     for (i=0,count=0; i<nroots; i++) {
594       if (roots[i].rank >= 0) {
595         newilocal[count]   = i;
596         roots[count].rank  = roots[i].rank;
597         roots[count].index = roots[i].index;
598         count++;
599       }
600     }
601   }
602 
603   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
604   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
605   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 /*@
610    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
611 
612    Collective
613 
614    Input Arguments:
615 +  sf - communication object to duplicate
616 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
617 
618    Output Arguments:
619 .  newsf - new communication object
620 
621    Level: beginner
622 
623 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
624 @*/
625 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
626 {
627   PetscSFType    type;
628   PetscErrorCode ierr;
629 
630   PetscFunctionBegin;
631   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
632   PetscValidLogicalCollectiveEnum(sf,opt,2);
633   PetscValidPointer(newsf,3);
634   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
635   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
636   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
637   if (opt == PETSCSF_DUPLICATE_GRAPH) {
638     PetscSFCheckGraphSet(sf,1);
639     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
640       PetscInt          nroots,nleaves;
641       const PetscInt    *ilocal;
642       const PetscSFNode *iremote;
643       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
644       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
645     } else {
646       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
647     }
648   }
649   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
650   PetscFunctionReturn(0);
651 }
652 
653 /*@C
654    PetscSFGetGraph - Get the graph specifying a parallel star forest
655 
656    Not Collective
657 
658    Input Arguments:
659 .  sf - star forest
660 
661    Output Arguments:
662 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
663 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
664 .  ilocal - locations of leaves in leafdata buffers
665 -  iremote - remote locations of root vertices for each leaf on the current process
666 
667    Notes:
668    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
669 
670    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
671    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
672 
673    Level: intermediate
674 
675 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
676 @*/
677 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
678 {
679   PetscErrorCode ierr;
680 
681   PetscFunctionBegin;
682   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
683   if (sf->ops->GetGraph) {
684     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
685   } else {
686     if (nroots) *nroots = sf->nroots;
687     if (nleaves) *nleaves = sf->nleaves;
688     if (ilocal) *ilocal = sf->mine;
689     if (iremote) *iremote = sf->remote;
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 /*@
695    PetscSFGetLeafRange - Get the active leaf ranges
696 
697    Not Collective
698 
699    Input Arguments:
700 .  sf - star forest
701 
702    Output Arguments:
703 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
704 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
705 
706    Level: developer
707 
708 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
709 @*/
710 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
711 {
712 
713   PetscFunctionBegin;
714   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
715   PetscSFCheckGraphSet(sf,1);
716   if (minleaf) *minleaf = sf->minleaf;
717   if (maxleaf) *maxleaf = sf->maxleaf;
718   PetscFunctionReturn(0);
719 }
720 
721 /*@C
722    PetscSFViewFromOptions - View from Options
723 
724    Collective on PetscSF
725 
726    Input Parameters:
727 +  A - the star forest
728 .  obj - Optional object
729 -  name - command line option
730 
731    Level: intermediate
732 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
733 @*/
734 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
735 {
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
740   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 /*@C
745    PetscSFView - view a star forest
746 
747    Collective
748 
749    Input Arguments:
750 +  sf - star forest
751 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
752 
753    Level: beginner
754 
755 .seealso: PetscSFCreate(), PetscSFSetGraph()
756 @*/
757 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
758 {
759   PetscErrorCode    ierr;
760   PetscBool         iascii;
761   PetscViewerFormat format;
762 
763   PetscFunctionBegin;
764   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
765   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
766   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
767   PetscCheckSameComm(sf,1,viewer,2);
768   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
769   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
770   if (iascii) {
771     PetscMPIInt rank;
772     PetscInt    ii,i,j;
773 
774     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
775     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
776     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
777     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
778       if (!sf->graphset) {
779         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
780         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
781         PetscFunctionReturn(0);
782       }
783       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
784       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
785       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
786       for (i=0; i<sf->nleaves; i++) {
787         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
788       }
789       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
790       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
791       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
792         PetscMPIInt *tmpranks,*perm;
793         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
794         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
795         for (i=0; i<sf->nranks; i++) perm[i] = i;
796         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
797         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
798         for (ii=0; ii<sf->nranks; ii++) {
799           i = perm[ii];
800           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
801           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
802             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
803           }
804         }
805         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
806       }
807       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
808       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
809     }
810     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
811   }
812   PetscFunctionReturn(0);
813 }
814 
815 /*@C
816    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
817 
818    Not Collective
819 
820    Input Arguments:
821 .  sf - star forest
822 
823    Output Arguments:
824 +  nranks - number of ranks referenced by local part
825 .  ranks - array of ranks
826 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
827 .  rmine - concatenated array holding local indices referencing each remote rank
828 -  rremote - concatenated array holding remote indices referenced for each remote rank
829 
830    Level: developer
831 
832 .seealso: PetscSFGetLeafRanks()
833 @*/
834 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
835 {
836   PetscErrorCode ierr;
837 
838   PetscFunctionBegin;
839   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
840   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
841   if (sf->ops->GetRootRanks) {
842     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
843   } else {
844     /* The generic implementation */
845     if (nranks)  *nranks  = sf->nranks;
846     if (ranks)   *ranks   = sf->ranks;
847     if (roffset) *roffset = sf->roffset;
848     if (rmine)   *rmine   = sf->rmine;
849     if (rremote) *rremote = sf->rremote;
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 /*@C
855    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
856 
857    Not Collective
858 
859    Input Arguments:
860 .  sf - star forest
861 
862    Output Arguments:
863 +  niranks - number of leaf ranks referencing roots on this process
864 .  iranks - array of ranks
865 .  ioffset - offset in irootloc for each rank (length niranks+1)
866 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
867 
868    Level: developer
869 
870 .seealso: PetscSFGetRootRanks()
871 @*/
872 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
873 {
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
878   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
879   if (sf->ops->GetLeafRanks) {
880     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
881   } else {
882     PetscSFType type;
883     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
884     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
890   PetscInt i;
891   for (i=0; i<n; i++) {
892     if (needle == list[i]) return PETSC_TRUE;
893   }
894   return PETSC_FALSE;
895 }
896 
897 /*@C
898    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
899 
900    Collective
901 
902    Input Arguments:
903 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
904 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
905 
906    Level: developer
907 
908 .seealso: PetscSFGetRootRanks()
909 @*/
910 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
911 {
912   PetscErrorCode     ierr;
913   PetscTable         table;
914   PetscTablePosition pos;
915   PetscMPIInt        size,groupsize,*groupranks;
916   PetscInt           *rcount,*ranks;
917   PetscInt           i, irank = -1,orank = -1;
918 
919   PetscFunctionBegin;
920   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
921   PetscSFCheckGraphSet(sf,1);
922   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
923   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
924   for (i=0; i<sf->nleaves; i++) {
925     /* Log 1-based rank */
926     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
927   }
928   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
929   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
930   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
931   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
932   for (i=0; i<sf->nranks; i++) {
933     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
934     ranks[i]--;             /* Convert back to 0-based */
935   }
936   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
937 
938   /* We expect that dgroup is reliably "small" while nranks could be large */
939   {
940     MPI_Group group = MPI_GROUP_NULL;
941     PetscMPIInt *dgroupranks;
942     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
943     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
944     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
945     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
946     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
947     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
948     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
949     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
950   }
951 
952   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
953   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
954     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
955       if (InList(ranks[i],groupsize,groupranks)) break;
956     }
957     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
958       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
959     }
960     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
961       PetscInt tmprank,tmpcount;
962 
963       tmprank             = ranks[i];
964       tmpcount            = rcount[i];
965       ranks[i]            = ranks[sf->ndranks];
966       rcount[i]           = rcount[sf->ndranks];
967       ranks[sf->ndranks]  = tmprank;
968       rcount[sf->ndranks] = tmpcount;
969       sf->ndranks++;
970     }
971   }
972   ierr = PetscFree(groupranks);CHKERRQ(ierr);
973   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
974   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
975   sf->roffset[0] = 0;
976   for (i=0; i<sf->nranks; i++) {
977     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
978     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
979     rcount[i]        = 0;
980   }
981   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
982     /* short circuit */
983     if (orank != sf->remote[i].rank) {
984       /* Search for index of iremote[i].rank in sf->ranks */
985       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
986       if (irank < 0) {
987         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
988         if (irank >= 0) irank += sf->ndranks;
989       }
990       orank = sf->remote[i].rank;
991     }
992     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
993     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
994     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
995     rcount[irank]++;
996   }
997   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 /*@C
1002    PetscSFGetGroups - gets incoming and outgoing process groups
1003 
1004    Collective
1005 
1006    Input Argument:
1007 .  sf - star forest
1008 
1009    Output Arguments:
1010 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1011 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1012 
1013    Level: developer
1014 
1015 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1016 @*/
1017 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1018 {
1019   PetscErrorCode ierr;
1020   MPI_Group      group = MPI_GROUP_NULL;
1021 
1022   PetscFunctionBegin;
1023   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1024   if (sf->ingroup == MPI_GROUP_NULL) {
1025     PetscInt       i;
1026     const PetscInt *indegree;
1027     PetscMPIInt    rank,*outranks,*inranks;
1028     PetscSFNode    *remote;
1029     PetscSF        bgcount;
1030 
1031     /* Compute the number of incoming ranks */
1032     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1033     for (i=0; i<sf->nranks; i++) {
1034       remote[i].rank  = sf->ranks[i];
1035       remote[i].index = 0;
1036     }
1037     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1038     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1039     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1040     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1041     /* Enumerate the incoming ranks */
1042     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1043     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1044     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1045     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1046     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1047     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1048     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1049     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1050     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1051     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1052   }
1053   *incoming = sf->ingroup;
1054 
1055   if (sf->outgroup == MPI_GROUP_NULL) {
1056     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1057     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1058     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1059   }
1060   *outgoing = sf->outgroup;
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 /*@
1065    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1066 
1067    Collective
1068 
1069    Input Argument:
1070 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1071 
1072    Output Arguments:
1073 .  multi - star forest with split roots, such that each root has degree exactly 1
1074 
1075    Level: developer
1076 
1077    Notes:
1078 
1079    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1080    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1081    edge, it is a candidate for future optimization that might involve its removal.
1082 
1083 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1084 @*/
1085 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1086 {
1087   PetscErrorCode ierr;
1088 
1089   PetscFunctionBegin;
1090   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1091   PetscValidPointer(multi,2);
1092   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1093     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1094     *multi = sf->multi;
1095     PetscFunctionReturn(0);
1096   }
1097   if (!sf->multi) {
1098     const PetscInt *indegree;
1099     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1100     PetscSFNode    *remote;
1101     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1102     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1103     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1104     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1105     inoffset[0] = 0;
1106     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1107     for (i=0; i<maxlocal; i++) outones[i] = 1;
1108     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1109     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1110     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1111     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1112       for (i=0; i<sf->nroots; i++) {
1113         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1114       }
1115     }
1116     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1117     for (i=0; i<sf->nleaves; i++) {
1118       remote[i].rank  = sf->remote[i].rank;
1119       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1120     }
1121     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1122     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1123     if (sf->rankorder) {        /* Sort the ranks */
1124       PetscMPIInt rank;
1125       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1126       PetscSFNode *newremote;
1127       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1128       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1129       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1130       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1131       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1132       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1133       /* Sort the incoming ranks at each vertex, build the inverse map */
1134       for (i=0; i<sf->nroots; i++) {
1135         PetscInt j;
1136         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1137         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1138         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1139       }
1140       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1141       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1142       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1143       for (i=0; i<sf->nleaves; i++) {
1144         newremote[i].rank  = sf->remote[i].rank;
1145         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1146       }
1147       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1148       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1149     }
1150     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1151   }
1152   *multi = sf->multi;
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 /*@C
1157    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1158 
1159    Collective
1160 
1161    Input Arguments:
1162 +  sf - original star forest
1163 .  nselected  - number of selected roots on this process
1164 -  selected   - indices of the selected roots on this process
1165 
1166    Output Arguments:
1167 .  esf - new star forest
1168 
1169    Level: advanced
1170 
1171    Note:
1172    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1173    be done by calling PetscSFGetGraph().
1174 
1175 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1176 @*/
1177 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1178 {
1179   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1180   const PetscInt    *ilocal;
1181   signed char       *rootdata,*leafdata,*leafmem;
1182   const PetscSFNode *iremote;
1183   PetscSFNode       *new_iremote;
1184   MPI_Comm          comm;
1185   PetscErrorCode    ierr;
1186 
1187   PetscFunctionBegin;
1188   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1189   PetscSFCheckGraphSet(sf,1);
1190   if (nselected) PetscValidPointer(selected,3);
1191   PetscValidPointer(esf,4);
1192 
1193   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1194 
1195   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1196   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1197   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1198 
1199   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1200 
1201     PetscBool dups;
1202     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1203     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1204     for (i=0; i<nselected; i++)
1205       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1206   }
1207 
1208   if (sf->ops->CreateEmbeddedSF) {
1209     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1210   } else {
1211     /* A generic version of creating embedded sf */
1212     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1213     maxlocal = maxleaf - minleaf + 1;
1214     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1215     leafdata = leafmem - minleaf;
1216     /* Tag selected roots and bcast to leaves */
1217     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1218     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1219     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1220 
1221     /* Build esf with leaves that are still connected */
1222     esf_nleaves = 0;
1223     for (i=0; i<nleaves; i++) {
1224       j = ilocal ? ilocal[i] : i;
1225       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1226          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1227       */
1228       esf_nleaves += (leafdata[j] ? 1 : 0);
1229     }
1230     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1231     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1232     for (i=n=0; i<nleaves; i++) {
1233       j = ilocal ? ilocal[i] : i;
1234       if (leafdata[j]) {
1235         new_ilocal[n]        = j;
1236         new_iremote[n].rank  = iremote[i].rank;
1237         new_iremote[n].index = iremote[i].index;
1238         ++n;
1239       }
1240     }
1241     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1242     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1243     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1244     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1245   }
1246   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 /*@C
1251   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1252 
1253   Collective
1254 
1255   Input Arguments:
1256 + sf - original star forest
1257 . nselected  - number of selected leaves on this process
1258 - selected   - indices of the selected leaves on this process
1259 
1260   Output Arguments:
1261 .  newsf - new star forest
1262 
1263   Level: advanced
1264 
1265 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1266 @*/
1267 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1268 {
1269   const PetscSFNode *iremote;
1270   PetscSFNode       *new_iremote;
1271   const PetscInt    *ilocal;
1272   PetscInt          i,nroots,*leaves,*new_ilocal;
1273   MPI_Comm          comm;
1274   PetscErrorCode    ierr;
1275 
1276   PetscFunctionBegin;
1277   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1278   PetscSFCheckGraphSet(sf,1);
1279   if (nselected) PetscValidPointer(selected,3);
1280   PetscValidPointer(newsf,4);
1281 
1282   /* Uniq selected[] and put results in leaves[] */
1283   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1284   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1285   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1286   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1287   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1288 
1289   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1290   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1291     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1292   } else {
1293     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1294     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1295     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1296     for (i=0; i<nselected; ++i) {
1297       const PetscInt l     = leaves[i];
1298       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1299       new_iremote[i].rank  = iremote[l].rank;
1300       new_iremote[i].index = iremote[l].index;
1301     }
1302     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1303     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1304   }
1305   ierr = PetscFree(leaves);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 /*@C
1310    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1311 
1312    Collective on PetscSF
1313 
1314    Input Arguments:
1315 +  sf - star forest on which to communicate
1316 .  unit - data type associated with each node
1317 .  rootdata - buffer to broadcast
1318 -  op - operation to use for reduction
1319 
1320    Output Arguments:
1321 .  leafdata - buffer to be reduced with values from each leaf's respective root
1322 
1323    Level: intermediate
1324 
1325 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1326 @*/
1327 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1328 {
1329   PetscErrorCode ierr;
1330   PetscMemType   rootmtype,leafmtype;
1331 
1332   PetscFunctionBegin;
1333   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1334   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1335   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1336   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1337   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1338   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1339   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 /*@C
1344    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1345 
1346    Collective
1347 
1348    Input Arguments:
1349 +  sf - star forest
1350 .  unit - data type
1351 .  rootdata - buffer to broadcast
1352 -  op - operation to use for reduction
1353 
1354    Output Arguments:
1355 .  leafdata - buffer to be reduced with values from each leaf's respective root
1356 
1357    Level: intermediate
1358 
1359 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1360 @*/
1361 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1362 {
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1367   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1368   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1369   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1370   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 /*@C
1375    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1376 
1377    Collective
1378 
1379    Input Arguments:
1380 +  sf - star forest
1381 .  unit - data type
1382 .  leafdata - values to reduce
1383 -  op - reduction operation
1384 
1385    Output Arguments:
1386 .  rootdata - result of reduction of values from all leaves of each root
1387 
1388    Level: intermediate
1389 
1390 .seealso: PetscSFBcastBegin()
1391 @*/
1392 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1393 {
1394   PetscErrorCode ierr;
1395   PetscMemType   rootmtype,leafmtype;
1396 
1397   PetscFunctionBegin;
1398   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1399   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1400   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1401   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1402   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1403   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1404   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 /*@C
1409    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1410 
1411    Collective
1412 
1413    Input Arguments:
1414 +  sf - star forest
1415 .  unit - data type
1416 .  leafdata - values to reduce
1417 -  op - reduction operation
1418 
1419    Output Arguments:
1420 .  rootdata - result of reduction of values from all leaves of each root
1421 
1422    Level: intermediate
1423 
1424 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1425 @*/
1426 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1427 {
1428   PetscErrorCode ierr;
1429 
1430   PetscFunctionBegin;
1431   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1432   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1433   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1434   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1435   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 /*@C
1440    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1441 
1442    Collective
1443 
1444    Input Arguments:
1445 +  sf - star forest
1446 .  unit - data type
1447 .  leafdata - leaf values to use in reduction
1448 -  op - operation to use for reduction
1449 
1450    Output Arguments:
1451 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1452 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1453 
1454    Level: advanced
1455 
1456    Note:
1457    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1458    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1459    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1460    integers.
1461 
1462 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1463 @*/
1464 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1465 {
1466   PetscErrorCode ierr;
1467   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1468 
1469   PetscFunctionBegin;
1470   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1471   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1472   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1473   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1474   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1475   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1476   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1477   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1478   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 /*@C
1483    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1484 
1485    Collective
1486 
1487    Input Arguments:
1488 +  sf - star forest
1489 .  unit - data type
1490 .  leafdata - leaf values to use in reduction
1491 -  op - operation to use for reduction
1492 
1493    Output Arguments:
1494 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1495 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1496 
1497    Level: advanced
1498 
1499 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1500 @*/
1501 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1502 {
1503   PetscErrorCode ierr;
1504 
1505   PetscFunctionBegin;
1506   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1507   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1508   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1509   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1510   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 /*@C
1515    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1516 
1517    Collective
1518 
1519    Input Arguments:
1520 .  sf - star forest
1521 
1522    Output Arguments:
1523 .  degree - degree of each root vertex
1524 
1525    Level: advanced
1526 
1527    Notes:
1528    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1529 
1530 .seealso: PetscSFGatherBegin()
1531 @*/
1532 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1533 {
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1538   PetscSFCheckGraphSet(sf,1);
1539   PetscValidPointer(degree,2);
1540   if (!sf->degreeknown) {
1541     PetscInt i, nroots = sf->nroots, maxlocal;
1542     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1543     maxlocal = sf->maxleaf-sf->minleaf+1;
1544     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1545     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1546     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1547     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1548     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1549   }
1550   *degree = NULL;
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 /*@C
1555    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1556 
1557    Collective
1558 
1559    Input Arguments:
1560 .  sf - star forest
1561 
1562    Output Arguments:
1563 .  degree - degree of each root vertex
1564 
1565    Level: developer
1566 
1567    Notes:
1568    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1569 
1570 .seealso:
1571 @*/
1572 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1573 {
1574   PetscErrorCode ierr;
1575 
1576   PetscFunctionBegin;
1577   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1578   PetscSFCheckGraphSet(sf,1);
1579   PetscValidPointer(degree,2);
1580   if (!sf->degreeknown) {
1581     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1582     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1583     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1584     sf->degreeknown = PETSC_TRUE;
1585   }
1586   *degree = sf->degree;
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 
1591 /*@C
1592    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1593    Each multi-root is assigned index of the corresponding original root.
1594 
1595    Collective
1596 
1597    Input Arguments:
1598 +  sf - star forest
1599 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1600 
1601    Output Arguments:
1602 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1603 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1604 
1605    Level: developer
1606 
1607    Notes:
1608    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1609 
1610 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1611 @*/
1612 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1613 {
1614   PetscSF             msf;
1615   PetscInt            i, j, k, nroots, nmroots;
1616   PetscErrorCode      ierr;
1617 
1618   PetscFunctionBegin;
1619   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1620   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1621   if (nroots) PetscValidIntPointer(degree,2);
1622   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1623   PetscValidPointer(multiRootsOrigNumbering,4);
1624   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1625   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1626   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1627   for (i=0,j=0,k=0; i<nroots; i++) {
1628     if (!degree[i]) continue;
1629     for (j=0; j<degree[i]; j++,k++) {
1630       (*multiRootsOrigNumbering)[k] = i;
1631     }
1632   }
1633   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1634   if (nMultiRoots) *nMultiRoots = nmroots;
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 /*@C
1639    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1640 
1641    Collective
1642 
1643    Input Arguments:
1644 +  sf - star forest
1645 .  unit - data type
1646 -  leafdata - leaf data to gather to roots
1647 
1648    Output Argument:
1649 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1650 
1651    Level: intermediate
1652 
1653 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1654 @*/
1655 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1656 {
1657   PetscErrorCode ierr;
1658   PetscSF        multi = NULL;
1659 
1660   PetscFunctionBegin;
1661   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1662   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1663   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1664   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 /*@C
1669    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1670 
1671    Collective
1672 
1673    Input Arguments:
1674 +  sf - star forest
1675 .  unit - data type
1676 -  leafdata - leaf data to gather to roots
1677 
1678    Output Argument:
1679 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1680 
1681    Level: intermediate
1682 
1683 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1684 @*/
1685 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1686 {
1687   PetscErrorCode ierr;
1688   PetscSF        multi = NULL;
1689 
1690   PetscFunctionBegin;
1691   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1692   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1693   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1694   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 /*@C
1699    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1700 
1701    Collective
1702 
1703    Input Arguments:
1704 +  sf - star forest
1705 .  unit - data type
1706 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1707 
1708    Output Argument:
1709 .  leafdata - leaf data to be update with personal data from each respective root
1710 
1711    Level: intermediate
1712 
1713 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1714 @*/
1715 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1716 {
1717   PetscErrorCode ierr;
1718   PetscSF        multi = NULL;
1719 
1720   PetscFunctionBegin;
1721   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1722   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1723   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1724   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 /*@C
1729    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1730 
1731    Collective
1732 
1733    Input Arguments:
1734 +  sf - star forest
1735 .  unit - data type
1736 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1737 
1738    Output Argument:
1739 .  leafdata - leaf data to be update with personal data from each respective root
1740 
1741    Level: intermediate
1742 
1743 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1744 @*/
1745 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1746 {
1747   PetscErrorCode ierr;
1748   PetscSF        multi = NULL;
1749 
1750   PetscFunctionBegin;
1751   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1752   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1753   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1754   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1759 {
1760   PetscInt        i, n, nleaves;
1761   const PetscInt *ilocal = NULL;
1762   PetscHSetI      seen;
1763   PetscErrorCode  ierr;
1764 
1765   PetscFunctionBegin;
1766   if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
1767   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1768   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1769   for (i = 0; i < nleaves; i++) {
1770     const PetscInt leaf = ilocal ? ilocal[i] : i;
1771     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1772   }
1773   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1774   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1775   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 /*@
1780   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1781 
1782   Input Parameters:
1783 + sfA - The first PetscSF
1784 - sfB - The second PetscSF
1785 
1786   Output Parameters:
1787 . sfBA - The composite SF
1788 
1789   Level: developer
1790 
1791   Notes:
1792   Currently, the two SFs must be defined on congruent communicators and they must be true star
1793   forests, i.e. the same leaf is not connected with different roots.
1794 
1795   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1796   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1797   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1798   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1799 
1800 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1801 @*/
1802 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1803 {
1804   PetscErrorCode    ierr;
1805   const PetscSFNode *remotePointsA,*remotePointsB;
1806   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1807   const PetscInt    *localPointsA,*localPointsB;
1808   PetscInt          *localPointsBA;
1809   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1810   PetscBool         denseB;
1811 
1812   PetscFunctionBegin;
1813   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1814   PetscSFCheckGraphSet(sfA,1);
1815   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1816   PetscSFCheckGraphSet(sfB,2);
1817   PetscCheckSameComm(sfA,1,sfB,2);
1818   PetscValidPointer(sfBA,3);
1819   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1820   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1821 
1822   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1823   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1824   if (localPointsA) {
1825     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1826     for (i=0; i<numRootsB; i++) {
1827       reorderedRemotePointsA[i].rank = -1;
1828       reorderedRemotePointsA[i].index = -1;
1829     }
1830     for (i=0; i<numLeavesA; i++) {
1831       if (localPointsA[i] >= numRootsB) continue;
1832       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1833     }
1834     remotePointsA = reorderedRemotePointsA;
1835   }
1836   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1837   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1838   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1839   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1840   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1841 
1842   denseB = (PetscBool)!localPointsB;
1843   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1844     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1845     else numLeavesBA++;
1846   }
1847   if (denseB) {
1848     localPointsBA  = NULL;
1849     remotePointsBA = leafdataB;
1850   } else {
1851     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
1852     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
1853     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1854       const PetscInt l = localPointsB ? localPointsB[i] : i;
1855 
1856       if (leafdataB[l-minleaf].rank == -1) continue;
1857       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1858       localPointsBA[numLeavesBA] = l;
1859       numLeavesBA++;
1860     }
1861     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
1862   }
1863   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1864   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 /*@
1869   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1870 
1871   Input Parameters:
1872 + sfA - The first PetscSF
1873 - sfB - The second PetscSF
1874 
1875   Output Parameters:
1876 . sfBA - The composite SF.
1877 
1878   Level: developer
1879 
1880   Notes:
1881   Currently, the two SFs must be defined on congruent communicators and they must be true star
1882   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1883   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1884 
1885   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1886   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1887   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1888   a Reduce on sfB, on connected roots.
1889 
1890 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1891 @*/
1892 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1893 {
1894   PetscErrorCode    ierr;
1895   const PetscSFNode *remotePointsA,*remotePointsB;
1896   PetscSFNode       *remotePointsBA;
1897   const PetscInt    *localPointsA,*localPointsB;
1898   PetscSFNode       *reorderedRemotePointsA = NULL;
1899   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1900   MPI_Op            op;
1901 #if defined(PETSC_USE_64BIT_INDICES)
1902   PetscBool         iswin;
1903 #endif
1904 
1905   PetscFunctionBegin;
1906   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1907   PetscSFCheckGraphSet(sfA,1);
1908   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1909   PetscSFCheckGraphSet(sfB,2);
1910   PetscCheckSameComm(sfA,1,sfB,2);
1911   PetscValidPointer(sfBA,3);
1912   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1913   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1914 
1915   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1916   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1917 
1918   /* TODO: Check roots of sfB have degree of 1 */
1919   /* Once we implement it, we can replace the MPI_MAXLOC
1920      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1921      We use MPI_MAXLOC only to have a deterministic output from this routine if
1922      the root condition is not meet.
1923    */
1924   op = MPI_MAXLOC;
1925 #if defined(PETSC_USE_64BIT_INDICES)
1926   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
1927   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
1928   if (iswin) op = MPIU_REPLACE;
1929 #endif
1930 
1931   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
1932   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
1933   for (i=0; i<maxleaf - minleaf + 1; i++) {
1934     reorderedRemotePointsA[i].rank = -1;
1935     reorderedRemotePointsA[i].index = -1;
1936   }
1937   if (localPointsA) {
1938     for (i=0; i<numLeavesA; i++) {
1939       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1940       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1941     }
1942   } else {
1943     for (i=0; i<numLeavesA; i++) {
1944       if (i > maxleaf || i < minleaf) continue;
1945       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1946     }
1947   }
1948 
1949   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
1950   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
1951   for (i=0; i<numRootsB; i++) {
1952     remotePointsBA[i].rank = -1;
1953     remotePointsBA[i].index = -1;
1954   }
1955 
1956   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
1957   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
1958   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1959   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1960     if (remotePointsBA[i].rank == -1) continue;
1961     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1962     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1963     localPointsBA[numLeavesBA] = i;
1964     numLeavesBA++;
1965   }
1966   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1967   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 /*
1972   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1973 
1974   Input Parameters:
1975 . sf - The global PetscSF
1976 
1977   Output Parameters:
1978 . out - The local PetscSF
1979  */
1980 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1981 {
1982   MPI_Comm           comm;
1983   PetscMPIInt        myrank;
1984   const PetscInt     *ilocal;
1985   const PetscSFNode  *iremote;
1986   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1987   PetscSFNode        *liremote;
1988   PetscSF            lsf;
1989   PetscErrorCode     ierr;
1990 
1991   PetscFunctionBegin;
1992   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1993   if (sf->ops->CreateLocalSF) {
1994     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
1995   } else {
1996     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1997     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1998     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
1999 
2000     /* Find out local edges and build a local SF */
2001     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2002     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2003     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2004     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2005 
2006     for (i=j=0; i<nleaves; i++) {
2007       if (iremote[i].rank == (PetscInt)myrank) {
2008         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2009         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2010         liremote[j].index = iremote[i].index;
2011         j++;
2012       }
2013     }
2014     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2015     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2016     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2017     *out = lsf;
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2023 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2024 {
2025   PetscErrorCode     ierr;
2026   PetscMemType       rootmtype,leafmtype;
2027 
2028   PetscFunctionBegin;
2029   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2030   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2031   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2032   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2033   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2034   if (sf->ops->BcastToZero) {
2035     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2036   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2037   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041