xref: /petsc/src/vec/is/sf/interface/sf.c (revision 5bd1e5768a3b141382a3ebb2e780cd2d3b3bfbf0)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_USE_DEBUG)
6 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
7     if (PetscUnlikely(!(sf)->graphset))                              \
8       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9   } while (0)
10 #else
11 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12 #endif
13 
14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
15 
16 /*@
17    PetscSFCreate - create a star forest communication context
18 
19    Collective
20 
21    Input Arguments:
22 .  comm - communicator on which the star forest will operate
23 
24    Output Arguments:
25 .  sf - new star forest context
26 
27    Options Database Keys:
28 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
29 .  -sf_type window    -Use MPI-3 one-sided window for communication
30 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
31 
32    Level: intermediate
33 
34    Notes:
35    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37    SFs are optimized and they have better performance than general SFs.
38 
39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40 @*/
41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42 {
43   PetscErrorCode ierr;
44   PetscSF        b;
45 
46   PetscFunctionBegin;
47   PetscValidPointer(sf,2);
48   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
49 
50   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
51 
52   b->nroots    = -1;
53   b->nleaves   = -1;
54   b->minleaf   = PETSC_MAX_INT;
55   b->maxleaf   = PETSC_MIN_INT;
56   b->nranks    = -1;
57   b->rankorder = PETSC_TRUE;
58   b->ingroup   = MPI_GROUP_NULL;
59   b->outgroup  = MPI_GROUP_NULL;
60   b->graphset  = PETSC_FALSE;
61 
62   *sf = b;
63   PetscFunctionReturn(0);
64 }
65 
66 /*@
67    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
68 
69    Collective
70 
71    Input Arguments:
72 .  sf - star forest
73 
74    Level: advanced
75 
76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77 @*/
78 PetscErrorCode PetscSFReset(PetscSF sf)
79 {
80   PetscErrorCode ierr;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
84   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
85   sf->nroots   = -1;
86   sf->nleaves  = -1;
87   sf->minleaf  = PETSC_MAX_INT;
88   sf->maxleaf  = PETSC_MIN_INT;
89   sf->mine     = NULL;
90   sf->remote   = NULL;
91   sf->graphset = PETSC_FALSE;
92   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
93   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
94   sf->nranks = -1;
95   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
96   sf->degreeknown = PETSC_FALSE;
97   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
98   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
99   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
100   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
101   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
102   sf->setupcalled = PETSC_FALSE;
103   PetscFunctionReturn(0);
104 }
105 
106 /*@C
107    PetscSFSetType - Set the PetscSF communication implementation
108 
109    Collective on PetscSF
110 
111    Input Parameters:
112 +  sf - the PetscSF context
113 -  type - a known method
114 
115    Options Database Key:
116 .  -sf_type <type> - Sets the method; use -help for a list
117    of available methods (for instance, window, pt2pt, neighbor)
118 
119    Notes:
120    See "include/petscsf.h" for available methods (for instance)
121 +    PETSCSFWINDOW - MPI-2/3 one-sided
122 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
123 
124   Level: intermediate
125 
126 .seealso: PetscSFType, PetscSFCreate()
127 @*/
128 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
129 {
130   PetscErrorCode ierr,(*r)(PetscSF);
131   PetscBool      match;
132 
133   PetscFunctionBegin;
134   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
135   PetscValidCharPointer(type,2);
136 
137   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
138   if (match) PetscFunctionReturn(0);
139 
140   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
141   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
142   /* Destroy the previous PetscSF implementation context */
143   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
144   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
145   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
146   ierr = (*r)(sf);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   PetscSFGetType - Get the PetscSF communication implementation
152 
153   Not Collective
154 
155   Input Parameter:
156 . sf  - the PetscSF context
157 
158   Output Parameter:
159 . type - the PetscSF type name
160 
161   Level: intermediate
162 
163 .seealso: PetscSFSetType(), PetscSFCreate()
164 @*/
165 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
166 {
167   PetscFunctionBegin;
168   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
169   PetscValidPointer(type,2);
170   *type = ((PetscObject)sf)->type_name;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@
175    PetscSFDestroy - destroy star forest
176 
177    Collective
178 
179    Input Arguments:
180 .  sf - address of star forest
181 
182    Level: intermediate
183 
184 .seealso: PetscSFCreate(), PetscSFReset()
185 @*/
186 PetscErrorCode PetscSFDestroy(PetscSF *sf)
187 {
188   PetscErrorCode ierr;
189 
190   PetscFunctionBegin;
191   if (!*sf) PetscFunctionReturn(0);
192   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
193   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
194   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
195   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
196   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
201 {
202 #if defined(PETSC_USE_DEBUG)
203   PetscInt           i, nleaves;
204   PetscMPIInt        size;
205   const PetscInt    *ilocal;
206   const PetscSFNode *iremote;
207   PetscErrorCode     ierr;
208 
209   PetscFunctionBegin;
210   if (!sf->graphset) PetscFunctionReturn(0);
211   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
212   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
213   for (i = 0; i < nleaves; i++) {
214     const PetscInt rank = iremote[i].rank;
215     const PetscInt remote = iremote[i].index;
216     const PetscInt leaf = ilocal ? ilocal[i] : i;
217     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
218     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
219     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
220   }
221   PetscFunctionReturn(0);
222 #else
223   PetscFunctionBegin;
224   PetscFunctionReturn(0);
225 #endif
226 }
227 
228 /*@
229    PetscSFSetUp - set up communication structures
230 
231    Collective
232 
233    Input Arguments:
234 .  sf - star forest communication object
235 
236    Level: beginner
237 
238 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
239 @*/
240 PetscErrorCode PetscSFSetUp(PetscSF sf)
241 {
242   PetscErrorCode ierr;
243 
244   PetscFunctionBegin;
245   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
246   PetscSFCheckGraphSet(sf,1);
247   if (sf->setupcalled) PetscFunctionReturn(0);
248   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
249   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
250   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
251   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
252   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
253   sf->setupcalled = PETSC_TRUE;
254   PetscFunctionReturn(0);
255 }
256 
257 /*@
258    PetscSFSetFromOptions - set PetscSF options using the options database
259 
260    Logically Collective
261 
262    Input Arguments:
263 .  sf - star forest
264 
265    Options Database Keys:
266 +  -sf_type - implementation type, see PetscSFSetType()
267 -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
268 
269    Level: intermediate
270 
271 .seealso: PetscSFWindowSetSyncType()
272 @*/
273 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
274 {
275   PetscSFType    deft;
276   char           type[256];
277   PetscErrorCode ierr;
278   PetscBool      flg;
279 
280   PetscFunctionBegin;
281   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
282   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
283   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
284   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
285   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
286   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
287   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
288   ierr = PetscOptionsEnd();CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 /*@
293    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
294 
295    Logically Collective
296 
297    Input Arguments:
298 +  sf - star forest
299 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
300 
301    Level: advanced
302 
303 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
304 @*/
305 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
306 {
307 
308   PetscFunctionBegin;
309   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
310   PetscValidLogicalCollectiveBool(sf,flg,2);
311   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
312   sf->rankorder = flg;
313   PetscFunctionReturn(0);
314 }
315 
316 /*@
317    PetscSFSetGraph - Set a parallel star forest
318 
319    Collective
320 
321    Input Arguments:
322 +  sf - star forest
323 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
324 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
325 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
326 during setup in debug mode)
327 .  localmode - copy mode for ilocal
328 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
329 during setup in debug mode)
330 -  remotemode - copy mode for iremote
331 
332    Level: intermediate
333 
334    Notes:
335     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
336 
337    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
338    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
339    needed
340 
341    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
342    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
343 
344 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
345 @*/
346 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
347 {
348   PetscErrorCode ierr;
349 
350   PetscFunctionBegin;
351   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
352   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
353   if (nleaves > 0) PetscValidPointer(iremote,6);
354   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
355   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
356 
357   ierr = PetscSFReset(sf);CHKERRQ(ierr);
358   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
359 
360   sf->nroots  = nroots;
361   sf->nleaves = nleaves;
362 
363   if (nleaves && ilocal) {
364     PetscInt i;
365     PetscInt minleaf = PETSC_MAX_INT;
366     PetscInt maxleaf = PETSC_MIN_INT;
367     int      contiguous = 1;
368     for (i=0; i<nleaves; i++) {
369       minleaf = PetscMin(minleaf,ilocal[i]);
370       maxleaf = PetscMax(maxleaf,ilocal[i]);
371       contiguous &= (ilocal[i] == i);
372     }
373     sf->minleaf = minleaf;
374     sf->maxleaf = maxleaf;
375     if (contiguous) {
376       if (localmode == PETSC_OWN_POINTER) {
377         ierr = PetscFree(ilocal);CHKERRQ(ierr);
378       }
379       ilocal = NULL;
380     }
381   } else {
382     sf->minleaf = 0;
383     sf->maxleaf = nleaves - 1;
384   }
385 
386   if (ilocal) {
387     switch (localmode) {
388     case PETSC_COPY_VALUES:
389       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
390       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
391       sf->mine = sf->mine_alloc;
392       break;
393     case PETSC_OWN_POINTER:
394       sf->mine_alloc = (PetscInt*)ilocal;
395       sf->mine       = sf->mine_alloc;
396       break;
397     case PETSC_USE_POINTER:
398       sf->mine_alloc = NULL;
399       sf->mine       = (PetscInt*)ilocal;
400       break;
401     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
402     }
403   }
404 
405   switch (remotemode) {
406   case PETSC_COPY_VALUES:
407     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
408     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
409     sf->remote = sf->remote_alloc;
410     break;
411   case PETSC_OWN_POINTER:
412     sf->remote_alloc = (PetscSFNode*)iremote;
413     sf->remote       = sf->remote_alloc;
414     break;
415   case PETSC_USE_POINTER:
416     sf->remote_alloc = NULL;
417     sf->remote       = (PetscSFNode*)iremote;
418     break;
419   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
420   }
421 
422   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
423   sf->graphset = PETSC_TRUE;
424   PetscFunctionReturn(0);
425 }
426 
427 /*@
428   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
429 
430   Collective
431 
432   Input Parameters:
433 + sf      - The PetscSF
434 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
435 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
436 
437   Notes:
438   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
439   n and N are local and global sizes of x respectively.
440 
441   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
442   sequential vectors y on all ranks.
443 
444   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
445   sequential vector y on rank 0.
446 
447   In above cases, entries of x are roots and entries of y are leaves.
448 
449   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
450   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
451   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
452   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
453   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
454 
455   In this case, roots and leaves are symmetric.
456 
457   Level: intermediate
458  @*/
459 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
460 {
461   MPI_Comm       comm;
462   PetscInt       n,N,res[2];
463   PetscMPIInt    rank,size;
464   PetscSFType    type;
465   PetscErrorCode ierr;
466 
467   PetscFunctionBegin;
468   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
469   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
470   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
471   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
472 
473   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
474     type = PETSCSFALLTOALL;
475     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
476     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
477     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
478     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
479   } else {
480     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
481     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
482     res[0] = n;
483     res[1] = -n;
484     /* Check if n are same over all ranks so that we can optimize it */
485     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
486     if (res[0] == -res[1]) { /* same n */
487       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
488     } else {
489       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
490     }
491     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
492   }
493   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
494 
495   sf->pattern = pattern;
496   sf->mine    = NULL; /* Contiguous */
497 
498   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
499      Also set other easy stuff.
500    */
501   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
502     sf->nleaves      = N;
503     sf->nroots       = n;
504     sf->nranks       = size;
505     sf->minleaf      = 0;
506     sf->maxleaf      = N - 1;
507   } else if (pattern == PETSCSF_PATTERN_GATHER) {
508     sf->nleaves      = rank ? 0 : N;
509     sf->nroots       = n;
510     sf->nranks       = rank ? 0 : size;
511     sf->minleaf      = 0;
512     sf->maxleaf      = rank ? -1 : N - 1;
513   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
514     sf->nleaves      = size;
515     sf->nroots       = size;
516     sf->nranks       = size;
517     sf->minleaf      = 0;
518     sf->maxleaf      = size - 1;
519   }
520   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
521   sf->graphset = PETSC_TRUE;
522   PetscFunctionReturn(0);
523 }
524 
525 /*@
526    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
527 
528    Collective
529 
530    Input Arguments:
531 
532 .  sf - star forest to invert
533 
534    Output Arguments:
535 .  isf - inverse of sf
536    Level: advanced
537 
538    Notes:
539    All roots must have degree 1.
540 
541    The local space may be a permutation, but cannot be sparse.
542 
543 .seealso: PetscSFSetGraph()
544 @*/
545 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
546 {
547   PetscErrorCode ierr;
548   PetscMPIInt    rank;
549   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
550   const PetscInt *ilocal;
551   PetscSFNode    *roots,*leaves;
552 
553   PetscFunctionBegin;
554   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
555   PetscSFCheckGraphSet(sf,1);
556   PetscValidPointer(isf,2);
557 
558   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
559   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
560 
561   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
562   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
563   for (i=0; i<maxlocal; i++) {
564     leaves[i].rank  = rank;
565     leaves[i].index = i;
566   }
567   for (i=0; i <nroots; i++) {
568     roots[i].rank  = -1;
569     roots[i].index = -1;
570   }
571   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
572   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
573 
574   /* Check whether our leaves are sparse */
575   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
576   if (count == nroots) newilocal = NULL;
577   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
578     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
579     for (i=0,count=0; i<nroots; i++) {
580       if (roots[i].rank >= 0) {
581         newilocal[count]   = i;
582         roots[count].rank  = roots[i].rank;
583         roots[count].index = roots[i].index;
584         count++;
585       }
586     }
587   }
588 
589   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
590   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
591   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 /*@
596    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
597 
598    Collective
599 
600    Input Arguments:
601 +  sf - communication object to duplicate
602 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
603 
604    Output Arguments:
605 .  newsf - new communication object
606 
607    Level: beginner
608 
609 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
610 @*/
611 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
612 {
613   PetscSFType    type;
614   PetscErrorCode ierr;
615 
616   PetscFunctionBegin;
617   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
618   PetscValidLogicalCollectiveEnum(sf,opt,2);
619   PetscValidPointer(newsf,3);
620   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
621   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
622   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
623   if (opt == PETSCSF_DUPLICATE_GRAPH) {
624     PetscSFCheckGraphSet(sf,1);
625     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
626       PetscInt          nroots,nleaves;
627       const PetscInt    *ilocal;
628       const PetscSFNode *iremote;
629       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
630       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
631     } else {
632       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
633     }
634   }
635   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
636   PetscFunctionReturn(0);
637 }
638 
639 /*@C
640    PetscSFGetGraph - Get the graph specifying a parallel star forest
641 
642    Not Collective
643 
644    Input Arguments:
645 .  sf - star forest
646 
647    Output Arguments:
648 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
649 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
650 .  ilocal - locations of leaves in leafdata buffers
651 -  iremote - remote locations of root vertices for each leaf on the current process
652 
653    Notes:
654    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
655 
656    Level: intermediate
657 
658 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
659 @*/
660 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
661 {
662   PetscErrorCode ierr;
663 
664   PetscFunctionBegin;
665   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
666   if (sf->ops->GetGraph) {
667     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
668   } else {
669     if (nroots) *nroots = sf->nroots;
670     if (nleaves) *nleaves = sf->nleaves;
671     if (ilocal) *ilocal = sf->mine;
672     if (iremote) *iremote = sf->remote;
673   }
674   PetscFunctionReturn(0);
675 }
676 
677 /*@
678    PetscSFGetLeafRange - Get the active leaf ranges
679 
680    Not Collective
681 
682    Input Arguments:
683 .  sf - star forest
684 
685    Output Arguments:
686 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
687 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
688 
689    Level: developer
690 
691 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
692 @*/
693 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
694 {
695 
696   PetscFunctionBegin;
697   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
698   PetscSFCheckGraphSet(sf,1);
699   if (minleaf) *minleaf = sf->minleaf;
700   if (maxleaf) *maxleaf = sf->maxleaf;
701   PetscFunctionReturn(0);
702 }
703 
704 /*@C
705    PetscSFView - view a star forest
706 
707    Collective
708 
709    Input Arguments:
710 +  sf - star forest
711 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
712 
713    Level: beginner
714 
715 .seealso: PetscSFCreate(), PetscSFSetGraph()
716 @*/
717 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
718 {
719   PetscErrorCode    ierr;
720   PetscBool         iascii;
721   PetscViewerFormat format;
722 
723   PetscFunctionBegin;
724   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
725   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
726   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
727   PetscCheckSameComm(sf,1,viewer,2);
728   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
729   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
730   if (iascii) {
731     PetscMPIInt rank;
732     PetscInt    ii,i,j;
733 
734     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
735     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
736     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
737     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
738       if (!sf->graphset) {
739         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
740         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
741         PetscFunctionReturn(0);
742       }
743       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
744       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
745       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
746       for (i=0; i<sf->nleaves; i++) {
747         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
748       }
749       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
750       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
751       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
752         PetscMPIInt *tmpranks,*perm;
753         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
754         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
755         for (i=0; i<sf->nranks; i++) perm[i] = i;
756         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
757         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
758         for (ii=0; ii<sf->nranks; ii++) {
759           i = perm[ii];
760           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
761           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
762             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
763           }
764         }
765         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
766       }
767       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
768       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
769     }
770     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 /*@C
776    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
777 
778    Not Collective
779 
780    Input Arguments:
781 .  sf - star forest
782 
783    Output Arguments:
784 +  nranks - number of ranks referenced by local part
785 .  ranks - array of ranks
786 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
787 .  rmine - concatenated array holding local indices referencing each remote rank
788 -  rremote - concatenated array holding remote indices referenced for each remote rank
789 
790    Level: developer
791 
792 .seealso: PetscSFGetLeafRanks()
793 @*/
794 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
795 {
796   PetscErrorCode ierr;
797 
798   PetscFunctionBegin;
799   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
800   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
801   if (sf->ops->GetRootRanks) {
802     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
803   } else {
804     /* The generic implementation */
805     if (nranks)  *nranks  = sf->nranks;
806     if (ranks)   *ranks   = sf->ranks;
807     if (roffset) *roffset = sf->roffset;
808     if (rmine)   *rmine   = sf->rmine;
809     if (rremote) *rremote = sf->rremote;
810   }
811   PetscFunctionReturn(0);
812 }
813 
814 /*@C
815    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
816 
817    Not Collective
818 
819    Input Arguments:
820 .  sf - star forest
821 
822    Output Arguments:
823 +  niranks - number of leaf ranks referencing roots on this process
824 .  iranks - array of ranks
825 .  ioffset - offset in irootloc for each rank (length niranks+1)
826 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
827 
828    Level: developer
829 
830 .seealso: PetscSFGetRootRanks()
831 @*/
832 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
833 {
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
838   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
839   if (sf->ops->GetLeafRanks) {
840     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
841   } else {
842     PetscSFType type;
843     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
844     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
845   }
846   PetscFunctionReturn(0);
847 }
848 
849 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
850   PetscInt i;
851   for (i=0; i<n; i++) {
852     if (needle == list[i]) return PETSC_TRUE;
853   }
854   return PETSC_FALSE;
855 }
856 
857 /*@C
858    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
859 
860    Collective
861 
862    Input Arguments:
863 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
864 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
865 
866    Level: developer
867 
868 .seealso: PetscSFGetRootRanks()
869 @*/
870 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
871 {
872   PetscErrorCode     ierr;
873   PetscTable         table;
874   PetscTablePosition pos;
875   PetscMPIInt        size,groupsize,*groupranks;
876   PetscInt           *rcount,*ranks;
877   PetscInt           i, irank = -1,orank = -1;
878 
879   PetscFunctionBegin;
880   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
881   PetscSFCheckGraphSet(sf,1);
882   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
883   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
884   for (i=0; i<sf->nleaves; i++) {
885     /* Log 1-based rank */
886     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
887   }
888   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
889   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
890   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
891   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
892   for (i=0; i<sf->nranks; i++) {
893     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
894     ranks[i]--;             /* Convert back to 0-based */
895   }
896   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
897 
898   /* We expect that dgroup is reliably "small" while nranks could be large */
899   {
900     MPI_Group group = MPI_GROUP_NULL;
901     PetscMPIInt *dgroupranks;
902     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
903     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
904     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
905     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
906     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
907     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
908     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
909     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
910   }
911 
912   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
913   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
914     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
915       if (InList(ranks[i],groupsize,groupranks)) break;
916     }
917     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
918       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
919     }
920     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
921       PetscInt    tmprank,tmpcount;
922 
923       tmprank             = ranks[i];
924       tmpcount            = rcount[i];
925       ranks[i]            = ranks[sf->ndranks];
926       rcount[i]           = rcount[sf->ndranks];
927       ranks[sf->ndranks]  = tmprank;
928       rcount[sf->ndranks] = tmpcount;
929       sf->ndranks++;
930     }
931   }
932   ierr = PetscFree(groupranks);CHKERRQ(ierr);
933   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
934   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
935   sf->roffset[0] = 0;
936   for (i=0; i<sf->nranks; i++) {
937     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
938     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
939     rcount[i]        = 0;
940   }
941   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
942     /* short circuit */
943     if (orank != sf->remote[i].rank) {
944       /* Search for index of iremote[i].rank in sf->ranks */
945       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
946       if (irank < 0) {
947         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
948         if (irank >= 0) irank += sf->ndranks;
949       }
950       orank = sf->remote[i].rank;
951     }
952     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
953     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
954     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
955     rcount[irank]++;
956   }
957   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 /*@C
962    PetscSFGetGroups - gets incoming and outgoing process groups
963 
964    Collective
965 
966    Input Argument:
967 .  sf - star forest
968 
969    Output Arguments:
970 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
971 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
972 
973    Level: developer
974 
975 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
976 @*/
977 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
978 {
979   PetscErrorCode ierr;
980   MPI_Group      group = MPI_GROUP_NULL;
981 
982   PetscFunctionBegin;
983   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
984   if (sf->ingroup == MPI_GROUP_NULL) {
985     PetscInt       i;
986     const PetscInt *indegree;
987     PetscMPIInt    rank,*outranks,*inranks;
988     PetscSFNode    *remote;
989     PetscSF        bgcount;
990 
991     /* Compute the number of incoming ranks */
992     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
993     for (i=0; i<sf->nranks; i++) {
994       remote[i].rank  = sf->ranks[i];
995       remote[i].index = 0;
996     }
997     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
998     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
999     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1000     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1001 
1002     /* Enumerate the incoming ranks */
1003     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1004     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1005     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1006     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1007     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1008     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1009     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1010     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1011     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1012     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1013   }
1014   *incoming = sf->ingroup;
1015 
1016   if (sf->outgroup == MPI_GROUP_NULL) {
1017     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1018     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1019     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1020   }
1021   *outgoing = sf->outgroup;
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 /*@
1026    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1027 
1028    Collective
1029 
1030    Input Argument:
1031 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1032 
1033    Output Arguments:
1034 .  multi - star forest with split roots, such that each root has degree exactly 1
1035 
1036    Level: developer
1037 
1038    Notes:
1039 
1040    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1041    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1042    edge, it is a candidate for future optimization that might involve its removal.
1043 
1044 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1045 @*/
1046 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1047 {
1048   PetscErrorCode ierr;
1049 
1050   PetscFunctionBegin;
1051   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1052   PetscValidPointer(multi,2);
1053   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1054     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1055     *multi = sf->multi;
1056     PetscFunctionReturn(0);
1057   }
1058   if (!sf->multi) {
1059     const PetscInt *indegree;
1060     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1061     PetscSFNode    *remote;
1062     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1063     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1064     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1065     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1066     inoffset[0] = 0;
1067     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1068     for (i=0; i<maxlocal; i++) outones[i] = 1;
1069     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1070     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1071     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1072 #if 0
1073 #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1074     for (i=0; i<sf->nroots; i++) {
1075       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1076     }
1077 #endif
1078 #endif
1079     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1080     for (i=0; i<sf->nleaves; i++) {
1081       remote[i].rank  = sf->remote[i].rank;
1082       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1083     }
1084     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1085     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1086     if (sf->rankorder) {        /* Sort the ranks */
1087       PetscMPIInt rank;
1088       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1089       PetscSFNode *newremote;
1090       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1091       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1092       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1093       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1094       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1095       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1096       /* Sort the incoming ranks at each vertex, build the inverse map */
1097       for (i=0; i<sf->nroots; i++) {
1098         PetscInt j;
1099         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1100         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1101         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1102       }
1103       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1104       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1105       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1106       for (i=0; i<sf->nleaves; i++) {
1107         newremote[i].rank  = sf->remote[i].rank;
1108         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1109       }
1110       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1111       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1112     }
1113     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1114   }
1115   *multi = sf->multi;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*@C
1120    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1121 
1122    Collective
1123 
1124    Input Arguments:
1125 +  sf - original star forest
1126 .  nselected  - number of selected roots on this process
1127 -  selected   - indices of the selected roots on this process
1128 
1129    Output Arguments:
1130 .  newsf - new star forest
1131 
1132    Level: advanced
1133 
1134    Note:
1135    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1136    be done by calling PetscSFGetGraph().
1137 
1138 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1139 @*/
1140 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1141 {
1142   PetscInt          i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
1143   const PetscSFNode *iremote;
1144   PetscSFNode       *new_iremote;
1145   PetscSF           tmpsf;
1146   MPI_Comm          comm;
1147   PetscErrorCode    ierr;
1148 
1149   PetscFunctionBegin;
1150   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1151   PetscSFCheckGraphSet(sf,1);
1152   if (nselected) PetscValidPointer(selected,3);
1153   PetscValidPointer(newsf,4);
1154 
1155   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1156 
1157   /* Uniq selected[] and put results in roots[] */
1158   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1159   ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr);
1160   ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr);
1161   ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr);
1162   if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);
1163 
1164   if (sf->ops->CreateEmbeddedSF) {
1165     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr);
1166   } else {
1167     /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1168     /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1169     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr);
1170     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr);
1171     ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr);
1172     ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr);
1173     for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1174     ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1175     ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1176     ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr);
1177 
1178     /* Build newsf with leaves that are still connected */
1179     connected_leaves = 0;
1180     for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1181     ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
1182     ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
1183     for (i=0, n=0; i<nleaves; ++i) {
1184       if (leafdata[i]) {
1185         new_ilocal[n]        = sf->mine ? sf->mine[i] : i;
1186         new_iremote[n].rank  = sf->remote[i].rank;
1187         new_iremote[n].index = sf->remote[i].index;
1188         ++n;
1189       }
1190     }
1191 
1192     if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1193     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1194     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1195     ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1196     ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
1197   }
1198   ierr = PetscFree(roots);CHKERRQ(ierr);
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 /*@C
1203   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1204 
1205   Collective
1206 
1207   Input Arguments:
1208 + sf - original star forest
1209 . nselected  - number of selected leaves on this process
1210 - selected   - indices of the selected leaves on this process
1211 
1212   Output Arguments:
1213 .  newsf - new star forest
1214 
1215   Level: advanced
1216 
1217 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1218 @*/
1219 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1220 {
1221   const PetscSFNode *iremote;
1222   PetscSFNode       *new_iremote;
1223   const PetscInt    *ilocal;
1224   PetscInt          i,nroots,*leaves,*new_ilocal;
1225   MPI_Comm          comm;
1226   PetscErrorCode    ierr;
1227 
1228   PetscFunctionBegin;
1229   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1230   PetscSFCheckGraphSet(sf,1);
1231   if (nselected) PetscValidPointer(selected,3);
1232   PetscValidPointer(newsf,4);
1233 
1234   /* Uniq selected[] and put results in leaves[] */
1235   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1236   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1237   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1238   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1239   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1240 
1241   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1242   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1243     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1246     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1247     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1248     for (i=0; i<nselected; ++i) {
1249       const PetscInt l     = leaves[i];
1250       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1251       new_iremote[i].rank  = iremote[l].rank;
1252       new_iremote[i].index = iremote[l].index;
1253     }
1254     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1255     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1256     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1257   }
1258   ierr = PetscFree(leaves);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 /*@C
1263    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1264 
1265    Collective on PetscSF
1266 
1267    Input Arguments:
1268 +  sf - star forest on which to communicate
1269 .  unit - data type associated with each node
1270 .  rootdata - buffer to broadcast
1271 -  op - operation to use for reduction
1272 
1273    Output Arguments:
1274 .  leafdata - buffer to be reduced with values from each leaf's respective root
1275 
1276    Level: intermediate
1277 
1278 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1279 @*/
1280 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1281 {
1282   PetscErrorCode ierr;
1283 
1284   PetscFunctionBegin;
1285   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1286   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1287   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1288   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1289   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 /*@C
1294    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1295 
1296    Collective
1297 
1298    Input Arguments:
1299 +  sf - star forest
1300 .  unit - data type
1301 .  rootdata - buffer to broadcast
1302 -  op - operation to use for reduction
1303 
1304    Output Arguments:
1305 .  leafdata - buffer to be reduced with values from each leaf's respective root
1306 
1307    Level: intermediate
1308 
1309 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1310 @*/
1311 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1312 {
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1317   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1318   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1319   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1320   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 /*@C
1325    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1326 
1327    Collective
1328 
1329    Input Arguments:
1330 +  sf - star forest
1331 .  unit - data type
1332 .  leafdata - values to reduce
1333 -  op - reduction operation
1334 
1335    Output Arguments:
1336 .  rootdata - result of reduction of values from all leaves of each root
1337 
1338    Level: intermediate
1339 
1340 .seealso: PetscSFBcastBegin()
1341 @*/
1342 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1343 {
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1348   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1349   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1350   ierr = (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1351   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 /*@C
1356    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1357 
1358    Collective
1359 
1360    Input Arguments:
1361 +  sf - star forest
1362 .  unit - data type
1363 .  leafdata - values to reduce
1364 -  op - reduction operation
1365 
1366    Output Arguments:
1367 .  rootdata - result of reduction of values from all leaves of each root
1368 
1369    Level: intermediate
1370 
1371 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1372 @*/
1373 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1374 {
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1379   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1380   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1381   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1382   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 /*@C
1387    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1388 
1389    Collective
1390 
1391    Input Arguments:
1392 .  sf - star forest
1393 
1394    Output Arguments:
1395 .  degree - degree of each root vertex
1396 
1397    Level: advanced
1398 
1399    Notes:
1400    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1401 
1402 .seealso: PetscSFGatherBegin()
1403 @*/
1404 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1405 {
1406   PetscErrorCode ierr;
1407 
1408   PetscFunctionBegin;
1409   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1410   PetscSFCheckGraphSet(sf,1);
1411   PetscValidPointer(degree,2);
1412   if (!sf->degreeknown) {
1413     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1414     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1415     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1416     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1417     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1418     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1419     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1420   }
1421   *degree = NULL;
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 /*@C
1426    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1427 
1428    Collective
1429 
1430    Input Arguments:
1431 .  sf - star forest
1432 
1433    Output Arguments:
1434 .  degree - degree of each root vertex
1435 
1436    Level: developer
1437 
1438    Notes:
1439    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1440 
1441 .seealso:
1442 @*/
1443 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1444 {
1445   PetscErrorCode ierr;
1446 
1447   PetscFunctionBegin;
1448   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1449   PetscSFCheckGraphSet(sf,1);
1450   PetscValidPointer(degree,2);
1451   if (!sf->degreeknown) {
1452     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1453     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1454     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1455     sf->degreeknown = PETSC_TRUE;
1456   }
1457   *degree = sf->degree;
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 
1462 /*@C
1463    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1464    Each multi-root is assigned index of the corresponding original root.
1465 
1466    Collective
1467 
1468    Input Arguments:
1469 +  sf - star forest
1470 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1471 
1472    Output Arguments:
1473 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1474 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1475 
1476    Level: developer
1477 
1478    Notes:
1479    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1480 
1481 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1482 @*/
1483 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1484 {
1485   PetscSF             msf;
1486   PetscInt            i, j, k, nroots, nmroots;
1487   PetscErrorCode      ierr;
1488 
1489   PetscFunctionBegin;
1490   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1491   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1492   if (nroots) PetscValidIntPointer(degree,2);
1493   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1494   PetscValidPointer(multiRootsOrigNumbering,4);
1495   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1496   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1497   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1498   for (i=0,j=0,k=0; i<nroots; i++) {
1499     if (!degree[i]) continue;
1500     for (j=0; j<degree[i]; j++,k++) {
1501       (*multiRootsOrigNumbering)[k] = i;
1502     }
1503   }
1504 #if defined(PETSC_USE_DEBUG)
1505   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1506 #endif
1507   if (nMultiRoots) *nMultiRoots = nmroots;
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 /*@C
1512    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1513 
1514    Collective
1515 
1516    Input Arguments:
1517 +  sf - star forest
1518 .  unit - data type
1519 .  leafdata - leaf values to use in reduction
1520 -  op - operation to use for reduction
1521 
1522    Output Arguments:
1523 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1524 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1525 
1526    Level: advanced
1527 
1528    Note:
1529    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1530    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1531    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1532    integers.
1533 
1534 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1535 @*/
1536 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1537 {
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1542   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1543   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1544   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1545   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 /*@C
1550    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1551 
1552    Collective
1553 
1554    Input Arguments:
1555 +  sf - star forest
1556 .  unit - data type
1557 .  leafdata - leaf values to use in reduction
1558 -  op - operation to use for reduction
1559 
1560    Output Arguments:
1561 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1562 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1563 
1564    Level: advanced
1565 
1566 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1567 @*/
1568 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1569 {
1570   PetscErrorCode ierr;
1571 
1572   PetscFunctionBegin;
1573   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1574   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1575   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1576   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1577   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 /*@C
1582    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1583 
1584    Collective
1585 
1586    Input Arguments:
1587 +  sf - star forest
1588 .  unit - data type
1589 -  leafdata - leaf data to gather to roots
1590 
1591    Output Argument:
1592 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1593 
1594    Level: intermediate
1595 
1596 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1597 @*/
1598 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1599 {
1600   PetscErrorCode ierr;
1601   PetscSF        multi;
1602 
1603   PetscFunctionBegin;
1604   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1605   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1606   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1607   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 /*@C
1612    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1613 
1614    Collective
1615 
1616    Input Arguments:
1617 +  sf - star forest
1618 .  unit - data type
1619 -  leafdata - leaf data to gather to roots
1620 
1621    Output Argument:
1622 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1623 
1624    Level: intermediate
1625 
1626 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1627 @*/
1628 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1629 {
1630   PetscErrorCode ierr;
1631   PetscSF        multi;
1632 
1633   PetscFunctionBegin;
1634   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1635   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1636   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1637   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 /*@C
1642    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1643 
1644    Collective
1645 
1646    Input Arguments:
1647 +  sf - star forest
1648 .  unit - data type
1649 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1650 
1651    Output Argument:
1652 .  leafdata - leaf data to be update with personal data from each respective root
1653 
1654    Level: intermediate
1655 
1656 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1657 @*/
1658 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1659 {
1660   PetscErrorCode ierr;
1661   PetscSF        multi;
1662 
1663   PetscFunctionBegin;
1664   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1665   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1666   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1667   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 /*@C
1672    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1673 
1674    Collective
1675 
1676    Input Arguments:
1677 +  sf - star forest
1678 .  unit - data type
1679 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1680 
1681    Output Argument:
1682 .  leafdata - leaf data to be update with personal data from each respective root
1683 
1684    Level: intermediate
1685 
1686 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1687 @*/
1688 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1689 {
1690   PetscErrorCode ierr;
1691   PetscSF        multi;
1692 
1693   PetscFunctionBegin;
1694   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1695   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1696   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1697   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1702 {
1703 #if defined(PETSC_USE_DEBUG)
1704   PetscInt        i, n, nleaves;
1705   const PetscInt *ilocal = NULL;
1706   PetscHSetI      seen;
1707   PetscErrorCode  ierr;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1711   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1712   for (i = 0; i < nleaves; i++) {
1713     const PetscInt leaf = ilocal ? ilocal[i] : i;
1714     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1715   }
1716   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1717   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1718   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 #else
1721   PetscFunctionBegin;
1722   PetscFunctionReturn(0);
1723 #endif
1724 }
1725 /*@
1726   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1727 
1728   Input Parameters:
1729 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1730 - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor
1731 
1732   Output Parameters:
1733 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB
1734 
1735   Level: developer
1736 
1737   Notes:
1738   For the resulting composed SF to be valid, the input SFs must be true star forests: the leaves must be unique. This is
1739   checked in debug mode.
1740 
1741 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1742 @*/
1743 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1744 {
1745   PetscErrorCode    ierr;
1746   MPI_Comm          comm;
1747   const PetscSFNode *remotePointsA,*remotePointsB;
1748   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1749   const PetscInt    *localPointsA,*localPointsB,*localPointsBA;
1750   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;
1751 
1752   PetscFunctionBegin;
1753   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1754   PetscSFCheckGraphSet(sfA,1);
1755   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1756   PetscSFCheckGraphSet(sfB,2);
1757   PetscValidPointer(sfBA,3);
1758   ierr = PetscObjectGetComm((PetscObject)sfA,&comm);CHKERRQ(ierr);
1759   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1760   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1761   ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr);
1762   if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves");
1763   if (maxleaf+1 != numLeavesA || minleaf) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1764   /* The above check is fast, but not sufficient, since we cannot guarantee that the SF has unique leaves. So in debug
1765    mode, check properly. */
1766   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1767   if (localPointsA) {
1768     /* Local space is dense permutation of identity. Need to rewire order of the remote points */
1769     ierr = PetscMalloc1(numLeavesA,&reorderedRemotePointsA);CHKERRQ(ierr);
1770     for (i=0; i<numLeavesA; i++) reorderedRemotePointsA[localPointsA[i]-minleaf] = remotePointsA[i];
1771     remotePointsA = reorderedRemotePointsA;
1772   }
1773   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1774   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1777   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1778 
1779   /* sfB's leaves must be unique, otherwise BcastAndOp(B o A) != BcastAndOp(B) o BcastAndOp(A) */
1780   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1781   if (minleaf == 0 && maxleaf + 1 == numLeavesB) { /* Local space of sfB is an identity or permutation */
1782     localPointsBA  = NULL;
1783     remotePointsBA = leafdataB;
1784   } else {
1785     localPointsBA  = localPointsB;
1786     ierr = PetscMalloc1(numLeavesB,&remotePointsBA);CHKERRQ(ierr);
1787     for (i=0; i<numLeavesB; i++) remotePointsBA[i] = leafdataB[localPointsB[i]-minleaf];
1788     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
1789   }
1790   ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr);
1791   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsBA,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 /*@
1796   PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one
1797 
1798   Input Parameters:
1799 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1800 - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1.
1801 
1802   Output Parameters:
1803 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB
1804 
1805   Level: developer
1806 
1807 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
1808 @*/
1809 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1810 {
1811   PetscErrorCode    ierr;
1812   MPI_Comm          comm;
1813   const PetscSFNode *remotePointsA,*remotePointsB;
1814   PetscSFNode       *remotePointsBA;
1815   const PetscInt    *localPointsA,*localPointsB;
1816   PetscInt          numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;
1817 
1818   PetscFunctionBegin;
1819   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1820   PetscSFCheckGraphSet(sfA,1);
1821   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1822   PetscSFCheckGraphSet(sfB,2);
1823   PetscValidPointer(sfBA,3);
1824   ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr);
1825   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1826   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1827   ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr);
1828   if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1829   if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves");
1830   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
1831   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr);
1832   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr);
1833   ierr = PetscSFCreate(comm,sfBA);CHKERRQ(ierr);
1834   ierr = PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 /*
1839   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1840 
1841   Input Parameters:
1842 . sf - The global PetscSF
1843 
1844   Output Parameters:
1845 . out - The local PetscSF
1846  */
1847 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1848 {
1849   MPI_Comm           comm;
1850   PetscMPIInt        myrank;
1851   const PetscInt     *ilocal;
1852   const PetscSFNode  *iremote;
1853   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1854   PetscSFNode        *liremote;
1855   PetscSF            lsf;
1856   PetscErrorCode     ierr;
1857 
1858   PetscFunctionBegin;
1859   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1860   if (sf->ops->CreateLocalSF) {
1861     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
1862   } else {
1863     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1864     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1865     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
1866 
1867     /* Find out local edges and build a local SF */
1868     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1869     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
1870     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
1871     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
1872 
1873     for (i=j=0; i<nleaves; i++) {
1874       if (iremote[i].rank == (PetscInt)myrank) {
1875         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
1876         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
1877         liremote[j].index = iremote[i].index;
1878         j++;
1879       }
1880     }
1881     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
1882     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1883     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
1884     *out = lsf;
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
1890 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1891 {
1892   PetscErrorCode     ierr;
1893 
1894   PetscFunctionBegin;
1895   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1896   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1897   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1898   if (sf->ops->BcastToZero) {
1899     ierr = (*sf->ops->BcastToZero)(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
1900   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
1901   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1902   PetscFunctionReturn(0);
1903 }
1904 
1905