xref: /petsc/src/vec/is/sf/interface/sf.c (revision 2da392cc7c10228af19ad9843ce5155178acb644)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_USE_DEBUG)
6 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
7     if (PetscUnlikely(!(sf)->graphset))                              \
8       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9   } while (0)
10 #else
11 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12 #endif
13 
14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
15 
16 /*@
17    PetscSFCreate - create a star forest communication context
18 
19    Collective
20 
21    Input Arguments:
22 .  comm - communicator on which the star forest will operate
23 
24    Output Arguments:
25 .  sf - new star forest context
26 
27    Options Database Keys:
28 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
29 .  -sf_type window    -Use MPI-3 one-sided window for communication
30 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
31 
32    Level: intermediate
33 
34    Notes:
35    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37    SFs are optimized and they have better performance than general SFs.
38 
39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40 @*/
41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42 {
43   PetscErrorCode ierr;
44   PetscSF        b;
45 
46   PetscFunctionBegin;
47   PetscValidPointer(sf,2);
48   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
49 
50   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
51 
52   b->nroots    = -1;
53   b->nleaves   = -1;
54   b->minleaf   = PETSC_MAX_INT;
55   b->maxleaf   = PETSC_MIN_INT;
56   b->nranks    = -1;
57   b->rankorder = PETSC_TRUE;
58   b->ingroup   = MPI_GROUP_NULL;
59   b->outgroup  = MPI_GROUP_NULL;
60   b->graphset  = PETSC_FALSE;
61 
62   *sf = b;
63   PetscFunctionReturn(0);
64 }
65 
66 /*@
67    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
68 
69    Collective
70 
71    Input Arguments:
72 .  sf - star forest
73 
74    Level: advanced
75 
76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77 @*/
78 PetscErrorCode PetscSFReset(PetscSF sf)
79 {
80   PetscErrorCode ierr;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
84   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
85   sf->nroots   = -1;
86   sf->nleaves  = -1;
87   sf->minleaf  = PETSC_MAX_INT;
88   sf->maxleaf  = PETSC_MIN_INT;
89   sf->mine     = NULL;
90   sf->remote   = NULL;
91   sf->graphset = PETSC_FALSE;
92   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
93   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
94   sf->nranks = -1;
95   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
96 #if defined(PETSC_HAVE_CUDA)
97   {
98   PetscInt  i;
99   for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}}
100   }
101 #endif
102   sf->degreeknown = PETSC_FALSE;
103   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
104   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
105   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
106   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
107   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
108   sf->setupcalled = PETSC_FALSE;
109   PetscFunctionReturn(0);
110 }
111 
112 /*@C
113    PetscSFSetType - Set the PetscSF communication implementation
114 
115    Collective on PetscSF
116 
117    Input Parameters:
118 +  sf - the PetscSF context
119 -  type - a known method
120 
121    Options Database Key:
122 .  -sf_type <type> - Sets the method; use -help for a list
123    of available methods (for instance, window, basic, neighbor)
124 
125    Notes:
126    See "include/petscsf.h" for available methods (for instance)
127 +    PETSCSFWINDOW - MPI-2/3 one-sided
128 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
129 
130   Level: intermediate
131 
132 .seealso: PetscSFType, PetscSFCreate()
133 @*/
134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
135 {
136   PetscErrorCode ierr,(*r)(PetscSF);
137   PetscBool      match;
138 
139   PetscFunctionBegin;
140   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
141   PetscValidCharPointer(type,2);
142 
143   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
144   if (match) PetscFunctionReturn(0);
145 
146   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
147   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
148   /* Destroy the previous PetscSF implementation context */
149   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
150   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
151   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
152   ierr = (*r)(sf);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 /*@C
157   PetscSFGetType - Get the PetscSF communication implementation
158 
159   Not Collective
160 
161   Input Parameter:
162 . sf  - the PetscSF context
163 
164   Output Parameter:
165 . type - the PetscSF type name
166 
167   Level: intermediate
168 
169 .seealso: PetscSFSetType(), PetscSFCreate()
170 @*/
171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
172 {
173   PetscFunctionBegin;
174   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
175   PetscValidPointer(type,2);
176   *type = ((PetscObject)sf)->type_name;
177   PetscFunctionReturn(0);
178 }
179 
180 /*@
181    PetscSFDestroy - destroy star forest
182 
183    Collective
184 
185    Input Arguments:
186 .  sf - address of star forest
187 
188    Level: intermediate
189 
190 .seealso: PetscSFCreate(), PetscSFReset()
191 @*/
192 PetscErrorCode PetscSFDestroy(PetscSF *sf)
193 {
194   PetscErrorCode ierr;
195 
196   PetscFunctionBegin;
197   if (!*sf) PetscFunctionReturn(0);
198   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
199   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
200   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
201   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
202   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
207 {
208   PetscInt           i, nleaves;
209   PetscMPIInt        size;
210   const PetscInt    *ilocal;
211   const PetscSFNode *iremote;
212   PetscErrorCode     ierr;
213 
214   PetscFunctionBegin;
215   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
216   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
217   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
218   for (i = 0; i < nleaves; i++) {
219     const PetscInt rank = iremote[i].rank;
220     const PetscInt remote = iremote[i].index;
221     const PetscInt leaf = ilocal ? ilocal[i] : i;
222     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
223     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
224     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
225   }
226   PetscFunctionReturn(0);
227 }
228 
229 /*@
230    PetscSFSetUp - set up communication structures
231 
232    Collective
233 
234    Input Arguments:
235 .  sf - star forest communication object
236 
237    Level: beginner
238 
239 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
240 @*/
241 PetscErrorCode PetscSFSetUp(PetscSF sf)
242 {
243   PetscErrorCode ierr;
244 
245   PetscFunctionBegin;
246   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
247   PetscSFCheckGraphSet(sf,1);
248   if (sf->setupcalled) PetscFunctionReturn(0);
249   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
250   sf->use_gpu_aware_mpi = use_gpu_aware_mpi;
251   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
252   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
253   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
254   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
255   sf->setupcalled = PETSC_TRUE;
256   PetscFunctionReturn(0);
257 }
258 
259 /*@
260    PetscSFSetFromOptions - set PetscSF options using the options database
261 
262    Logically Collective
263 
264    Input Arguments:
265 .  sf - star forest
266 
267    Options Database Keys:
268 +  -sf_type               - implementation type, see PetscSFSetType()
269 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
270 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
271                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
272                             If true, this option only works with -use_cuda_aware_mpi 1.
273 -  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
274                                If true, this option only works with -use_cuda_aware_mpi 1.
275 
276    Level: intermediate
277 @*/
278 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
279 {
280   PetscSFType    deft;
281   char           type[256];
282   PetscErrorCode ierr;
283   PetscBool      flg;
284 
285   PetscFunctionBegin;
286   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
287   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
288   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
289   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
290   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
291   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
292 
293 #if defined(PETSC_HAVE_CUDA)
294   sf->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
295   ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
296   sf->use_stream_aware_mpi = PETSC_FALSE;
297   ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
298 #endif
299   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
300   ierr = PetscOptionsEnd();CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 /*@
305    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
306 
307    Logically Collective
308 
309    Input Arguments:
310 +  sf - star forest
311 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
312 
313    Level: advanced
314 
315 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
316 @*/
317 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
318 {
319   PetscFunctionBegin;
320   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
321   PetscValidLogicalCollectiveBool(sf,flg,2);
322   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
323   sf->rankorder = flg;
324   PetscFunctionReturn(0);
325 }
326 
327 /*@
328    PetscSFSetGraph - Set a parallel star forest
329 
330    Collective
331 
332    Input Arguments:
333 +  sf - star forest
334 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
335 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
336 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
337 during setup in debug mode)
338 .  localmode - copy mode for ilocal
339 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
340 during setup in debug mode)
341 -  remotemode - copy mode for iremote
342 
343    Level: intermediate
344 
345    Notes:
346     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
347 
348    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
349    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
350    needed
351 
352    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
353    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
354 
355 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
356 @*/
357 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
358 {
359   PetscErrorCode ierr;
360 
361   PetscFunctionBegin;
362   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
363   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
364   if (nleaves > 0) PetscValidPointer(iremote,6);
365   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
366   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
367 
368   if (sf->nroots >= 0) { /* Reset only if graph already set */
369     ierr = PetscSFReset(sf);CHKERRQ(ierr);
370   }
371 
372   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
373 
374   sf->nroots  = nroots;
375   sf->nleaves = nleaves;
376 
377   if (nleaves && ilocal) {
378     PetscInt i;
379     PetscInt minleaf = PETSC_MAX_INT;
380     PetscInt maxleaf = PETSC_MIN_INT;
381     int      contiguous = 1;
382     for (i=0; i<nleaves; i++) {
383       minleaf = PetscMin(minleaf,ilocal[i]);
384       maxleaf = PetscMax(maxleaf,ilocal[i]);
385       contiguous &= (ilocal[i] == i);
386     }
387     sf->minleaf = minleaf;
388     sf->maxleaf = maxleaf;
389     if (contiguous) {
390       if (localmode == PETSC_OWN_POINTER) {
391         ierr = PetscFree(ilocal);CHKERRQ(ierr);
392       }
393       ilocal = NULL;
394     }
395   } else {
396     sf->minleaf = 0;
397     sf->maxleaf = nleaves - 1;
398   }
399 
400   if (ilocal) {
401     switch (localmode) {
402     case PETSC_COPY_VALUES:
403       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
404       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
405       sf->mine = sf->mine_alloc;
406       break;
407     case PETSC_OWN_POINTER:
408       sf->mine_alloc = (PetscInt*)ilocal;
409       sf->mine       = sf->mine_alloc;
410       break;
411     case PETSC_USE_POINTER:
412       sf->mine_alloc = NULL;
413       sf->mine       = (PetscInt*)ilocal;
414       break;
415     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
416     }
417   }
418 
419   switch (remotemode) {
420   case PETSC_COPY_VALUES:
421     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
422     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
423     sf->remote = sf->remote_alloc;
424     break;
425   case PETSC_OWN_POINTER:
426     sf->remote_alloc = (PetscSFNode*)iremote;
427     sf->remote       = sf->remote_alloc;
428     break;
429   case PETSC_USE_POINTER:
430     sf->remote_alloc = NULL;
431     sf->remote       = (PetscSFNode*)iremote;
432     break;
433   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
434   }
435 
436   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
437   sf->graphset = PETSC_TRUE;
438   PetscFunctionReturn(0);
439 }
440 
441 /*@
442   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
443 
444   Collective
445 
446   Input Parameters:
447 + sf      - The PetscSF
448 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
449 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
450 
451   Notes:
452   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
453   n and N are local and global sizes of x respectively.
454 
455   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
456   sequential vectors y on all ranks.
457 
458   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
459   sequential vector y on rank 0.
460 
461   In above cases, entries of x are roots and entries of y are leaves.
462 
463   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
464   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
465   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
466   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
467   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
468 
469   In this case, roots and leaves are symmetric.
470 
471   Level: intermediate
472  @*/
473 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
474 {
475   MPI_Comm       comm;
476   PetscInt       n,N,res[2];
477   PetscMPIInt    rank,size;
478   PetscSFType    type;
479   PetscErrorCode ierr;
480 
481   PetscFunctionBegin;
482   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
483   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
484   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
485   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
486 
487   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
488     type = PETSCSFALLTOALL;
489     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
490     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
491     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
492     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
493   } else {
494     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
495     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
496     res[0] = n;
497     res[1] = -n;
498     /* Check if n are same over all ranks so that we can optimize it */
499     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
500     if (res[0] == -res[1]) { /* same n */
501       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
502     } else {
503       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
504     }
505     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
506   }
507   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
508 
509   sf->pattern = pattern;
510   sf->mine    = NULL; /* Contiguous */
511 
512   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
513      Also set other easy stuff.
514    */
515   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
516     sf->nleaves      = N;
517     sf->nroots       = n;
518     sf->nranks       = size;
519     sf->minleaf      = 0;
520     sf->maxleaf      = N - 1;
521   } else if (pattern == PETSCSF_PATTERN_GATHER) {
522     sf->nleaves      = rank ? 0 : N;
523     sf->nroots       = n;
524     sf->nranks       = rank ? 0 : size;
525     sf->minleaf      = 0;
526     sf->maxleaf      = rank ? -1 : N - 1;
527   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
528     sf->nleaves      = size;
529     sf->nroots       = size;
530     sf->nranks       = size;
531     sf->minleaf      = 0;
532     sf->maxleaf      = size - 1;
533   }
534   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
535   sf->graphset = PETSC_TRUE;
536   PetscFunctionReturn(0);
537 }
538 
539 /*@
540    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
541 
542    Collective
543 
544    Input Arguments:
545 
546 .  sf - star forest to invert
547 
548    Output Arguments:
549 .  isf - inverse of sf
550    Level: advanced
551 
552    Notes:
553    All roots must have degree 1.
554 
555    The local space may be a permutation, but cannot be sparse.
556 
557 .seealso: PetscSFSetGraph()
558 @*/
559 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
560 {
561   PetscErrorCode ierr;
562   PetscMPIInt    rank;
563   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
564   const PetscInt *ilocal;
565   PetscSFNode    *roots,*leaves;
566 
567   PetscFunctionBegin;
568   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
569   PetscSFCheckGraphSet(sf,1);
570   PetscValidPointer(isf,2);
571 
572   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
573   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
574 
575   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
576   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
577   for (i=0; i<maxlocal; i++) {
578     leaves[i].rank  = rank;
579     leaves[i].index = i;
580   }
581   for (i=0; i <nroots; i++) {
582     roots[i].rank  = -1;
583     roots[i].index = -1;
584   }
585   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
586   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
587 
588   /* Check whether our leaves are sparse */
589   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
590   if (count == nroots) newilocal = NULL;
591   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
592     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
593     for (i=0,count=0; i<nroots; i++) {
594       if (roots[i].rank >= 0) {
595         newilocal[count]   = i;
596         roots[count].rank  = roots[i].rank;
597         roots[count].index = roots[i].index;
598         count++;
599       }
600     }
601   }
602 
603   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
604   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
605   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 /*@
610    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
611 
612    Collective
613 
614    Input Arguments:
615 +  sf - communication object to duplicate
616 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
617 
618    Output Arguments:
619 .  newsf - new communication object
620 
621    Level: beginner
622 
623 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
624 @*/
625 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
626 {
627   PetscSFType    type;
628   PetscErrorCode ierr;
629 
630   PetscFunctionBegin;
631   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
632   PetscValidLogicalCollectiveEnum(sf,opt,2);
633   PetscValidPointer(newsf,3);
634   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
635   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
636   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
637   if (opt == PETSCSF_DUPLICATE_GRAPH) {
638     PetscSFCheckGraphSet(sf,1);
639     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
640       PetscInt          nroots,nleaves;
641       const PetscInt    *ilocal;
642       const PetscSFNode *iremote;
643       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
644       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
645     } else {
646       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
647     }
648   }
649   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
650   PetscFunctionReturn(0);
651 }
652 
653 /*@C
654    PetscSFGetGraph - Get the graph specifying a parallel star forest
655 
656    Not Collective
657 
658    Input Arguments:
659 .  sf - star forest
660 
661    Output Arguments:
662 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
663 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
664 .  ilocal - locations of leaves in leafdata buffers
665 -  iremote - remote locations of root vertices for each leaf on the current process
666 
667    Notes:
668    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
669 
670    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
671    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
672 
673    Level: intermediate
674 
675 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
676 @*/
677 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
678 {
679   PetscErrorCode ierr;
680 
681   PetscFunctionBegin;
682   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
683   if (sf->ops->GetGraph) {
684     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
685   } else {
686     if (nroots) *nroots = sf->nroots;
687     if (nleaves) *nleaves = sf->nleaves;
688     if (ilocal) *ilocal = sf->mine;
689     if (iremote) *iremote = sf->remote;
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 /*@
695    PetscSFGetLeafRange - Get the active leaf ranges
696 
697    Not Collective
698 
699    Input Arguments:
700 .  sf - star forest
701 
702    Output Arguments:
703 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
704 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
705 
706    Level: developer
707 
708 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
709 @*/
710 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
711 {
712   PetscFunctionBegin;
713   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
714   PetscSFCheckGraphSet(sf,1);
715   if (minleaf) *minleaf = sf->minleaf;
716   if (maxleaf) *maxleaf = sf->maxleaf;
717   PetscFunctionReturn(0);
718 }
719 
720 /*@C
721    PetscSFViewFromOptions - View from Options
722 
723    Collective on PetscSF
724 
725    Input Parameters:
726 +  A - the star forest
727 .  obj - Optional object
728 -  name - command line option
729 
730    Level: intermediate
731 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
732 @*/
733 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
734 {
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
739   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 /*@C
744    PetscSFView - view a star forest
745 
746    Collective
747 
748    Input Arguments:
749 +  sf - star forest
750 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
751 
752    Level: beginner
753 
754 .seealso: PetscSFCreate(), PetscSFSetGraph()
755 @*/
756 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
757 {
758   PetscErrorCode    ierr;
759   PetscBool         iascii;
760   PetscViewerFormat format;
761 
762   PetscFunctionBegin;
763   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
764   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
765   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
766   PetscCheckSameComm(sf,1,viewer,2);
767   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
768   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
769   if (iascii) {
770     PetscMPIInt rank;
771     PetscInt    ii,i,j;
772 
773     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
774     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
775     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
776     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
777       if (!sf->graphset) {
778         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
779         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
780         PetscFunctionReturn(0);
781       }
782       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
783       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
784       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
785       for (i=0; i<sf->nleaves; i++) {
786         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
787       }
788       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
789       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
790       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
791         PetscMPIInt *tmpranks,*perm;
792         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
793         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
794         for (i=0; i<sf->nranks; i++) perm[i] = i;
795         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
796         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
797         for (ii=0; ii<sf->nranks; ii++) {
798           i = perm[ii];
799           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
800           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
801             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
802           }
803         }
804         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
805       }
806       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
807       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
808     }
809     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
810   }
811   PetscFunctionReturn(0);
812 }
813 
814 /*@C
815    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
816 
817    Not Collective
818 
819    Input Arguments:
820 .  sf - star forest
821 
822    Output Arguments:
823 +  nranks - number of ranks referenced by local part
824 .  ranks - array of ranks
825 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
826 .  rmine - concatenated array holding local indices referencing each remote rank
827 -  rremote - concatenated array holding remote indices referenced for each remote rank
828 
829    Level: developer
830 
831 .seealso: PetscSFGetLeafRanks()
832 @*/
833 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
834 {
835   PetscErrorCode ierr;
836 
837   PetscFunctionBegin;
838   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
839   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
840   if (sf->ops->GetRootRanks) {
841     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
842   } else {
843     /* The generic implementation */
844     if (nranks)  *nranks  = sf->nranks;
845     if (ranks)   *ranks   = sf->ranks;
846     if (roffset) *roffset = sf->roffset;
847     if (rmine)   *rmine   = sf->rmine;
848     if (rremote) *rremote = sf->rremote;
849   }
850   PetscFunctionReturn(0);
851 }
852 
853 /*@C
854    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
855 
856    Not Collective
857 
858    Input Arguments:
859 .  sf - star forest
860 
861    Output Arguments:
862 +  niranks - number of leaf ranks referencing roots on this process
863 .  iranks - array of ranks
864 .  ioffset - offset in irootloc for each rank (length niranks+1)
865 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
866 
867    Level: developer
868 
869 .seealso: PetscSFGetRootRanks()
870 @*/
871 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
872 {
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
877   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
878   if (sf->ops->GetLeafRanks) {
879     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
880   } else {
881     PetscSFType type;
882     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
883     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
884   }
885   PetscFunctionReturn(0);
886 }
887 
888 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
889   PetscInt i;
890   for (i=0; i<n; i++) {
891     if (needle == list[i]) return PETSC_TRUE;
892   }
893   return PETSC_FALSE;
894 }
895 
896 /*@C
897    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
898 
899    Collective
900 
901    Input Arguments:
902 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
903 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
904 
905    Level: developer
906 
907 .seealso: PetscSFGetRootRanks()
908 @*/
909 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
910 {
911   PetscErrorCode     ierr;
912   PetscTable         table;
913   PetscTablePosition pos;
914   PetscMPIInt        size,groupsize,*groupranks;
915   PetscInt           *rcount,*ranks;
916   PetscInt           i, irank = -1,orank = -1;
917 
918   PetscFunctionBegin;
919   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
920   PetscSFCheckGraphSet(sf,1);
921   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
922   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
923   for (i=0; i<sf->nleaves; i++) {
924     /* Log 1-based rank */
925     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
926   }
927   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
928   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
929   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
930   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
931   for (i=0; i<sf->nranks; i++) {
932     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
933     ranks[i]--;             /* Convert back to 0-based */
934   }
935   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
936 
937   /* We expect that dgroup is reliably "small" while nranks could be large */
938   {
939     MPI_Group group = MPI_GROUP_NULL;
940     PetscMPIInt *dgroupranks;
941     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
942     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
943     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
944     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
945     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
946     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
947     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
948     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
949   }
950 
951   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
952   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
953     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
954       if (InList(ranks[i],groupsize,groupranks)) break;
955     }
956     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
957       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
958     }
959     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
960       PetscInt tmprank,tmpcount;
961 
962       tmprank             = ranks[i];
963       tmpcount            = rcount[i];
964       ranks[i]            = ranks[sf->ndranks];
965       rcount[i]           = rcount[sf->ndranks];
966       ranks[sf->ndranks]  = tmprank;
967       rcount[sf->ndranks] = tmpcount;
968       sf->ndranks++;
969     }
970   }
971   ierr = PetscFree(groupranks);CHKERRQ(ierr);
972   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
973   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
974   sf->roffset[0] = 0;
975   for (i=0; i<sf->nranks; i++) {
976     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
977     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
978     rcount[i]        = 0;
979   }
980   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
981     /* short circuit */
982     if (orank != sf->remote[i].rank) {
983       /* Search for index of iremote[i].rank in sf->ranks */
984       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
985       if (irank < 0) {
986         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
987         if (irank >= 0) irank += sf->ndranks;
988       }
989       orank = sf->remote[i].rank;
990     }
991     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
992     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
993     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
994     rcount[irank]++;
995   }
996   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 
1000 /*@C
1001    PetscSFGetGroups - gets incoming and outgoing process groups
1002 
1003    Collective
1004 
1005    Input Argument:
1006 .  sf - star forest
1007 
1008    Output Arguments:
1009 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1010 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1011 
1012    Level: developer
1013 
1014 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1015 @*/
1016 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1017 {
1018   PetscErrorCode ierr;
1019   MPI_Group      group = MPI_GROUP_NULL;
1020 
1021   PetscFunctionBegin;
1022   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1023   if (sf->ingroup == MPI_GROUP_NULL) {
1024     PetscInt       i;
1025     const PetscInt *indegree;
1026     PetscMPIInt    rank,*outranks,*inranks;
1027     PetscSFNode    *remote;
1028     PetscSF        bgcount;
1029 
1030     /* Compute the number of incoming ranks */
1031     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1032     for (i=0; i<sf->nranks; i++) {
1033       remote[i].rank  = sf->ranks[i];
1034       remote[i].index = 0;
1035     }
1036     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1037     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1038     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1039     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1040     /* Enumerate the incoming ranks */
1041     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1042     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1043     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1044     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1045     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1046     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1047     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1048     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1049     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1050     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1051   }
1052   *incoming = sf->ingroup;
1053 
1054   if (sf->outgroup == MPI_GROUP_NULL) {
1055     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1056     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1057     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1058   }
1059   *outgoing = sf->outgroup;
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 /*@
1064    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1065 
1066    Collective
1067 
1068    Input Argument:
1069 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1070 
1071    Output Arguments:
1072 .  multi - star forest with split roots, such that each root has degree exactly 1
1073 
1074    Level: developer
1075 
1076    Notes:
1077 
1078    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1079    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1080    edge, it is a candidate for future optimization that might involve its removal.
1081 
1082 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1083 @*/
1084 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1085 {
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1090   PetscValidPointer(multi,2);
1091   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1092     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1093     *multi = sf->multi;
1094     PetscFunctionReturn(0);
1095   }
1096   if (!sf->multi) {
1097     const PetscInt *indegree;
1098     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1099     PetscSFNode    *remote;
1100     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1101     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1102     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1103     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1104     inoffset[0] = 0;
1105     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1106     for (i=0; i<maxlocal; i++) outones[i] = 1;
1107     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1108     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1109     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1110     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1111       for (i=0; i<sf->nroots; i++) {
1112         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1113       }
1114     }
1115     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1116     for (i=0; i<sf->nleaves; i++) {
1117       remote[i].rank  = sf->remote[i].rank;
1118       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1119     }
1120     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1121     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1122     if (sf->rankorder) {        /* Sort the ranks */
1123       PetscMPIInt rank;
1124       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1125       PetscSFNode *newremote;
1126       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1127       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1128       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1129       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1130       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1131       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1132       /* Sort the incoming ranks at each vertex, build the inverse map */
1133       for (i=0; i<sf->nroots; i++) {
1134         PetscInt j;
1135         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1136         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1137         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1138       }
1139       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1140       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1141       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1142       for (i=0; i<sf->nleaves; i++) {
1143         newremote[i].rank  = sf->remote[i].rank;
1144         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1145       }
1146       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1147       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1148     }
1149     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1150   }
1151   *multi = sf->multi;
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 /*@C
1156    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1157 
1158    Collective
1159 
1160    Input Arguments:
1161 +  sf - original star forest
1162 .  nselected  - number of selected roots on this process
1163 -  selected   - indices of the selected roots on this process
1164 
1165    Output Arguments:
1166 .  esf - new star forest
1167 
1168    Level: advanced
1169 
1170    Note:
1171    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1172    be done by calling PetscSFGetGraph().
1173 
1174 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1175 @*/
1176 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1177 {
1178   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1179   const PetscInt    *ilocal;
1180   signed char       *rootdata,*leafdata,*leafmem;
1181   const PetscSFNode *iremote;
1182   PetscSFNode       *new_iremote;
1183   MPI_Comm          comm;
1184   PetscErrorCode    ierr;
1185 
1186   PetscFunctionBegin;
1187   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1188   PetscSFCheckGraphSet(sf,1);
1189   if (nselected) PetscValidPointer(selected,3);
1190   PetscValidPointer(esf,4);
1191 
1192   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1193 
1194   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1195   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1196   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1197 
1198   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1199 
1200     PetscBool dups;
1201     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1202     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1203     for (i=0; i<nselected; i++)
1204       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1205   }
1206 
1207   if (sf->ops->CreateEmbeddedSF) {
1208     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1209   } else {
1210     /* A generic version of creating embedded sf */
1211     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1212     maxlocal = maxleaf - minleaf + 1;
1213     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1214     leafdata = leafmem - minleaf;
1215     /* Tag selected roots and bcast to leaves */
1216     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1217     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1218     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1219 
1220     /* Build esf with leaves that are still connected */
1221     esf_nleaves = 0;
1222     for (i=0; i<nleaves; i++) {
1223       j = ilocal ? ilocal[i] : i;
1224       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1225          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1226       */
1227       esf_nleaves += (leafdata[j] ? 1 : 0);
1228     }
1229     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1230     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1231     for (i=n=0; i<nleaves; i++) {
1232       j = ilocal ? ilocal[i] : i;
1233       if (leafdata[j]) {
1234         new_ilocal[n]        = j;
1235         new_iremote[n].rank  = iremote[i].rank;
1236         new_iremote[n].index = iremote[i].index;
1237         ++n;
1238       }
1239     }
1240     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1241     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1242     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1243     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1244   }
1245   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 /*@C
1250   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1251 
1252   Collective
1253 
1254   Input Arguments:
1255 + sf - original star forest
1256 . nselected  - number of selected leaves on this process
1257 - selected   - indices of the selected leaves on this process
1258 
1259   Output Arguments:
1260 .  newsf - new star forest
1261 
1262   Level: advanced
1263 
1264 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1265 @*/
1266 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1267 {
1268   const PetscSFNode *iremote;
1269   PetscSFNode       *new_iremote;
1270   const PetscInt    *ilocal;
1271   PetscInt          i,nroots,*leaves,*new_ilocal;
1272   MPI_Comm          comm;
1273   PetscErrorCode    ierr;
1274 
1275   PetscFunctionBegin;
1276   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1277   PetscSFCheckGraphSet(sf,1);
1278   if (nselected) PetscValidPointer(selected,3);
1279   PetscValidPointer(newsf,4);
1280 
1281   /* Uniq selected[] and put results in leaves[] */
1282   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1283   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1284   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1285   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1286   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1287 
1288   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1289   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1290     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1291   } else {
1292     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1293     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1294     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1295     for (i=0; i<nselected; ++i) {
1296       const PetscInt l     = leaves[i];
1297       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1298       new_iremote[i].rank  = iremote[l].rank;
1299       new_iremote[i].index = iremote[l].index;
1300     }
1301     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1302     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1303   }
1304   ierr = PetscFree(leaves);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 /*@C
1309    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1310 
1311    Collective on PetscSF
1312 
1313    Input Arguments:
1314 +  sf - star forest on which to communicate
1315 .  unit - data type associated with each node
1316 .  rootdata - buffer to broadcast
1317 -  op - operation to use for reduction
1318 
1319    Output Arguments:
1320 .  leafdata - buffer to be reduced with values from each leaf's respective root
1321 
1322    Level: intermediate
1323 
1324 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1325 @*/
1326 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1327 {
1328   PetscErrorCode ierr;
1329   PetscMemType   rootmtype,leafmtype;
1330 
1331   PetscFunctionBegin;
1332   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1333   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1334   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1335   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1336   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1337   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1338   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 /*@C
1343    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1344 
1345    Collective
1346 
1347    Input Arguments:
1348 +  sf - star forest
1349 .  unit - data type
1350 .  rootdata - buffer to broadcast
1351 -  op - operation to use for reduction
1352 
1353    Output Arguments:
1354 .  leafdata - buffer to be reduced with values from each leaf's respective root
1355 
1356    Level: intermediate
1357 
1358 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1359 @*/
1360 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1361 {
1362   PetscErrorCode ierr;
1363 
1364   PetscFunctionBegin;
1365   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1366   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1367   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1368   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1369   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 /*@C
1374    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1375 
1376    Collective
1377 
1378    Input Arguments:
1379 +  sf - star forest
1380 .  unit - data type
1381 .  leafdata - values to reduce
1382 -  op - reduction operation
1383 
1384    Output Arguments:
1385 .  rootdata - result of reduction of values from all leaves of each root
1386 
1387    Level: intermediate
1388 
1389 .seealso: PetscSFBcastBegin()
1390 @*/
1391 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1392 {
1393   PetscErrorCode ierr;
1394   PetscMemType   rootmtype,leafmtype;
1395 
1396   PetscFunctionBegin;
1397   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1398   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1399   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1400   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1401   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1402   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1403   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 /*@C
1408    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1409 
1410    Collective
1411 
1412    Input Arguments:
1413 +  sf - star forest
1414 .  unit - data type
1415 .  leafdata - values to reduce
1416 -  op - reduction operation
1417 
1418    Output Arguments:
1419 .  rootdata - result of reduction of values from all leaves of each root
1420 
1421    Level: intermediate
1422 
1423 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1424 @*/
1425 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1426 {
1427   PetscErrorCode ierr;
1428 
1429   PetscFunctionBegin;
1430   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1431   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1432   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1433   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1434   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 /*@C
1439    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1440 
1441    Collective
1442 
1443    Input Arguments:
1444 +  sf - star forest
1445 .  unit - data type
1446 .  leafdata - leaf values to use in reduction
1447 -  op - operation to use for reduction
1448 
1449    Output Arguments:
1450 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1451 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1452 
1453    Level: advanced
1454 
1455    Note:
1456    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1457    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1458    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1459    integers.
1460 
1461 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1462 @*/
1463 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1464 {
1465   PetscErrorCode ierr;
1466   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1467 
1468   PetscFunctionBegin;
1469   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1470   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1471   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1472   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1473   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1474   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1475   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1476   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1477   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 /*@C
1482    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1483 
1484    Collective
1485 
1486    Input Arguments:
1487 +  sf - star forest
1488 .  unit - data type
1489 .  leafdata - leaf values to use in reduction
1490 -  op - operation to use for reduction
1491 
1492    Output Arguments:
1493 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1494 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1495 
1496    Level: advanced
1497 
1498 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1499 @*/
1500 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1501 {
1502   PetscErrorCode ierr;
1503 
1504   PetscFunctionBegin;
1505   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1506   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1507   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1508   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1509   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 /*@C
1514    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1515 
1516    Collective
1517 
1518    Input Arguments:
1519 .  sf - star forest
1520 
1521    Output Arguments:
1522 .  degree - degree of each root vertex
1523 
1524    Level: advanced
1525 
1526    Notes:
1527    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1528 
1529 .seealso: PetscSFGatherBegin()
1530 @*/
1531 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1532 {
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1537   PetscSFCheckGraphSet(sf,1);
1538   PetscValidPointer(degree,2);
1539   if (!sf->degreeknown) {
1540     PetscInt i, nroots = sf->nroots, maxlocal;
1541     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1542     maxlocal = sf->maxleaf-sf->minleaf+1;
1543     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1544     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1545     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1546     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1547     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1548   }
1549   *degree = NULL;
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 /*@C
1554    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1555 
1556    Collective
1557 
1558    Input Arguments:
1559 .  sf - star forest
1560 
1561    Output Arguments:
1562 .  degree - degree of each root vertex
1563 
1564    Level: developer
1565 
1566    Notes:
1567    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1568 
1569 .seealso:
1570 @*/
1571 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1572 {
1573   PetscErrorCode ierr;
1574 
1575   PetscFunctionBegin;
1576   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1577   PetscSFCheckGraphSet(sf,1);
1578   PetscValidPointer(degree,2);
1579   if (!sf->degreeknown) {
1580     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1581     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1582     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1583     sf->degreeknown = PETSC_TRUE;
1584   }
1585   *degree = sf->degree;
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 
1590 /*@C
1591    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1592    Each multi-root is assigned index of the corresponding original root.
1593 
1594    Collective
1595 
1596    Input Arguments:
1597 +  sf - star forest
1598 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1599 
1600    Output Arguments:
1601 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1602 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1603 
1604    Level: developer
1605 
1606    Notes:
1607    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1608 
1609 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1610 @*/
1611 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1612 {
1613   PetscSF             msf;
1614   PetscInt            i, j, k, nroots, nmroots;
1615   PetscErrorCode      ierr;
1616 
1617   PetscFunctionBegin;
1618   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1619   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1620   if (nroots) PetscValidIntPointer(degree,2);
1621   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1622   PetscValidPointer(multiRootsOrigNumbering,4);
1623   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1624   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1625   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1626   for (i=0,j=0,k=0; i<nroots; i++) {
1627     if (!degree[i]) continue;
1628     for (j=0; j<degree[i]; j++,k++) {
1629       (*multiRootsOrigNumbering)[k] = i;
1630     }
1631   }
1632   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1633   if (nMultiRoots) *nMultiRoots = nmroots;
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 /*@C
1638    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1639 
1640    Collective
1641 
1642    Input Arguments:
1643 +  sf - star forest
1644 .  unit - data type
1645 -  leafdata - leaf data to gather to roots
1646 
1647    Output Argument:
1648 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1649 
1650    Level: intermediate
1651 
1652 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1653 @*/
1654 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1655 {
1656   PetscErrorCode ierr;
1657   PetscSF        multi = NULL;
1658 
1659   PetscFunctionBegin;
1660   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1661   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1662   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1663   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 /*@C
1668    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1669 
1670    Collective
1671 
1672    Input Arguments:
1673 +  sf - star forest
1674 .  unit - data type
1675 -  leafdata - leaf data to gather to roots
1676 
1677    Output Argument:
1678 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1679 
1680    Level: intermediate
1681 
1682 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1683 @*/
1684 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1685 {
1686   PetscErrorCode ierr;
1687   PetscSF        multi = NULL;
1688 
1689   PetscFunctionBegin;
1690   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1691   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1692   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1693   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 /*@C
1698    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1699 
1700    Collective
1701 
1702    Input Arguments:
1703 +  sf - star forest
1704 .  unit - data type
1705 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1706 
1707    Output Argument:
1708 .  leafdata - leaf data to be update with personal data from each respective root
1709 
1710    Level: intermediate
1711 
1712 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1713 @*/
1714 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1715 {
1716   PetscErrorCode ierr;
1717   PetscSF        multi = NULL;
1718 
1719   PetscFunctionBegin;
1720   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1721   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1722   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1723   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 /*@C
1728    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1729 
1730    Collective
1731 
1732    Input Arguments:
1733 +  sf - star forest
1734 .  unit - data type
1735 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1736 
1737    Output Argument:
1738 .  leafdata - leaf data to be update with personal data from each respective root
1739 
1740    Level: intermediate
1741 
1742 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1743 @*/
1744 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1745 {
1746   PetscErrorCode ierr;
1747   PetscSF        multi = NULL;
1748 
1749   PetscFunctionBegin;
1750   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1751   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1752   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1753   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1758 {
1759   PetscInt        i, n, nleaves;
1760   const PetscInt *ilocal = NULL;
1761   PetscHSetI      seen;
1762   PetscErrorCode  ierr;
1763 
1764   PetscFunctionBegin;
1765   if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
1766   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1767   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1768   for (i = 0; i < nleaves; i++) {
1769     const PetscInt leaf = ilocal ? ilocal[i] : i;
1770     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1771   }
1772   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1773   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1774   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 /*@
1779   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1780 
1781   Input Parameters:
1782 + sfA - The first PetscSF
1783 - sfB - The second PetscSF
1784 
1785   Output Parameters:
1786 . sfBA - The composite SF
1787 
1788   Level: developer
1789 
1790   Notes:
1791   Currently, the two SFs must be defined on congruent communicators and they must be true star
1792   forests, i.e. the same leaf is not connected with different roots.
1793 
1794   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1795   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1796   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1797   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1798 
1799 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1800 @*/
1801 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1802 {
1803   PetscErrorCode    ierr;
1804   const PetscSFNode *remotePointsA,*remotePointsB;
1805   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1806   const PetscInt    *localPointsA,*localPointsB;
1807   PetscInt          *localPointsBA;
1808   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1809   PetscBool         denseB;
1810 
1811   PetscFunctionBegin;
1812   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1813   PetscSFCheckGraphSet(sfA,1);
1814   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1815   PetscSFCheckGraphSet(sfB,2);
1816   PetscCheckSameComm(sfA,1,sfB,2);
1817   PetscValidPointer(sfBA,3);
1818   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1819   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1820 
1821   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1822   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1823   if (localPointsA) {
1824     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1825     for (i=0; i<numRootsB; i++) {
1826       reorderedRemotePointsA[i].rank = -1;
1827       reorderedRemotePointsA[i].index = -1;
1828     }
1829     for (i=0; i<numLeavesA; i++) {
1830       if (localPointsA[i] >= numRootsB) continue;
1831       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1832     }
1833     remotePointsA = reorderedRemotePointsA;
1834   }
1835   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1836   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1837   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1838   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1839   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1840 
1841   denseB = (PetscBool)!localPointsB;
1842   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1843     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1844     else numLeavesBA++;
1845   }
1846   if (denseB) {
1847     localPointsBA  = NULL;
1848     remotePointsBA = leafdataB;
1849   } else {
1850     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
1851     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
1852     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1853       const PetscInt l = localPointsB ? localPointsB[i] : i;
1854 
1855       if (leafdataB[l-minleaf].rank == -1) continue;
1856       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1857       localPointsBA[numLeavesBA] = l;
1858       numLeavesBA++;
1859     }
1860     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
1861   }
1862   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1863   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 /*@
1868   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1869 
1870   Input Parameters:
1871 + sfA - The first PetscSF
1872 - sfB - The second PetscSF
1873 
1874   Output Parameters:
1875 . sfBA - The composite SF.
1876 
1877   Level: developer
1878 
1879   Notes:
1880   Currently, the two SFs must be defined on congruent communicators and they must be true star
1881   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1882   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1883 
1884   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1885   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1886   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1887   a Reduce on sfB, on connected roots.
1888 
1889 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1890 @*/
1891 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1892 {
1893   PetscErrorCode    ierr;
1894   const PetscSFNode *remotePointsA,*remotePointsB;
1895   PetscSFNode       *remotePointsBA;
1896   const PetscInt    *localPointsA,*localPointsB;
1897   PetscSFNode       *reorderedRemotePointsA = NULL;
1898   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1899   MPI_Op            op;
1900 #if defined(PETSC_USE_64BIT_INDICES)
1901   PetscBool         iswin;
1902 #endif
1903 
1904   PetscFunctionBegin;
1905   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1906   PetscSFCheckGraphSet(sfA,1);
1907   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1908   PetscSFCheckGraphSet(sfB,2);
1909   PetscCheckSameComm(sfA,1,sfB,2);
1910   PetscValidPointer(sfBA,3);
1911   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1912   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1913 
1914   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1915   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1916 
1917   /* TODO: Check roots of sfB have degree of 1 */
1918   /* Once we implement it, we can replace the MPI_MAXLOC
1919      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1920      We use MPI_MAXLOC only to have a deterministic output from this routine if
1921      the root condition is not meet.
1922    */
1923   op = MPI_MAXLOC;
1924 #if defined(PETSC_USE_64BIT_INDICES)
1925   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
1926   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
1927   if (iswin) op = MPIU_REPLACE;
1928 #endif
1929 
1930   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
1931   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
1932   for (i=0; i<maxleaf - minleaf + 1; i++) {
1933     reorderedRemotePointsA[i].rank = -1;
1934     reorderedRemotePointsA[i].index = -1;
1935   }
1936   if (localPointsA) {
1937     for (i=0; i<numLeavesA; i++) {
1938       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1939       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1940     }
1941   } else {
1942     for (i=0; i<numLeavesA; i++) {
1943       if (i > maxleaf || i < minleaf) continue;
1944       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1945     }
1946   }
1947 
1948   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
1949   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
1950   for (i=0; i<numRootsB; i++) {
1951     remotePointsBA[i].rank = -1;
1952     remotePointsBA[i].index = -1;
1953   }
1954 
1955   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
1956   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
1957   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1958   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1959     if (remotePointsBA[i].rank == -1) continue;
1960     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1961     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1962     localPointsBA[numLeavesBA] = i;
1963     numLeavesBA++;
1964   }
1965   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1966   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 /*
1971   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1972 
1973   Input Parameters:
1974 . sf - The global PetscSF
1975 
1976   Output Parameters:
1977 . out - The local PetscSF
1978  */
1979 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1980 {
1981   MPI_Comm           comm;
1982   PetscMPIInt        myrank;
1983   const PetscInt     *ilocal;
1984   const PetscSFNode  *iremote;
1985   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1986   PetscSFNode        *liremote;
1987   PetscSF            lsf;
1988   PetscErrorCode     ierr;
1989 
1990   PetscFunctionBegin;
1991   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1992   if (sf->ops->CreateLocalSF) {
1993     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
1994   } else {
1995     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1996     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1997     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
1998 
1999     /* Find out local edges and build a local SF */
2000     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2001     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2002     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2003     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2004 
2005     for (i=j=0; i<nleaves; i++) {
2006       if (iremote[i].rank == (PetscInt)myrank) {
2007         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2008         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2009         liremote[j].index = iremote[i].index;
2010         j++;
2011       }
2012     }
2013     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2014     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2015     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2016     *out = lsf;
2017   }
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2022 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2023 {
2024   PetscErrorCode     ierr;
2025   PetscMemType       rootmtype,leafmtype;
2026 
2027   PetscFunctionBegin;
2028   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2029   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2030   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2031   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2032   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2033   if (sf->ops->BcastToZero) {
2034     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2035   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2036   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2037   PetscFunctionReturn(0);
2038 }
2039 
2040