xref: /petsc/src/vec/is/sf/interface/sf.c (revision 00816365229eff0fcb3567887feb4c4abe2b0e86)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_USE_DEBUG)
6 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
7     if (PetscUnlikely(!(sf)->graphset))                              \
8       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9   } while (0)
10 #else
11 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12 #endif
13 
14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
15 
16 /*@
17    PetscSFCreate - create a star forest communication context
18 
19    Collective
20 
21    Input Arguments:
22 .  comm - communicator on which the star forest will operate
23 
24    Output Arguments:
25 .  sf - new star forest context
26 
27    Options Database Keys:
28 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
29 .  -sf_type window    -Use MPI-3 one-sided window for communication
30 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
31 
32    Level: intermediate
33 
34    Notes:
35    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37    SFs are optimized and they have better performance than general SFs.
38 
39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40 @*/
41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42 {
43   PetscErrorCode ierr;
44   PetscSF        b;
45 
46   PetscFunctionBegin;
47   PetscValidPointer(sf,2);
48   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
49 
50   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
51 
52   b->nroots    = -1;
53   b->nleaves   = -1;
54   b->minleaf   = PETSC_MAX_INT;
55   b->maxleaf   = PETSC_MIN_INT;
56   b->nranks    = -1;
57   b->rankorder = PETSC_TRUE;
58   b->ingroup   = MPI_GROUP_NULL;
59   b->outgroup  = MPI_GROUP_NULL;
60   b->graphset  = PETSC_FALSE;
61 
62   *sf = b;
63   PetscFunctionReturn(0);
64 }
65 
66 /*@
67    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
68 
69    Collective
70 
71    Input Arguments:
72 .  sf - star forest
73 
74    Level: advanced
75 
76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77 @*/
78 PetscErrorCode PetscSFReset(PetscSF sf)
79 {
80   PetscErrorCode ierr;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
84   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
85   sf->nroots   = -1;
86   sf->nleaves  = -1;
87   sf->minleaf  = PETSC_MAX_INT;
88   sf->maxleaf  = PETSC_MIN_INT;
89   sf->mine     = NULL;
90   sf->remote   = NULL;
91   sf->graphset = PETSC_FALSE;
92   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
93   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
94   sf->nranks = -1;
95   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
96 #if defined(PETSC_HAVE_CUDA)
97   {
98   PetscInt  i;
99   for (i=0; i<2; i++) {if (sf->rmine_d[i]) {cudaError_t err = cudaFree(sf->rmine_d[i]);CHKERRCUDA(err);sf->rmine_d[i]=NULL;}}
100   }
101 #endif
102   sf->degreeknown = PETSC_FALSE;
103   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
104   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
105   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
106   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
107   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
108   sf->setupcalled = PETSC_FALSE;
109   PetscFunctionReturn(0);
110 }
111 
112 /*@C
113    PetscSFSetType - Set the PetscSF communication implementation
114 
115    Collective on PetscSF
116 
117    Input Parameters:
118 +  sf - the PetscSF context
119 -  type - a known method
120 
121    Options Database Key:
122 .  -sf_type <type> - Sets the method; use -help for a list
123    of available methods (for instance, window, basic, neighbor)
124 
125    Notes:
126    See "include/petscsf.h" for available methods (for instance)
127 +    PETSCSFWINDOW - MPI-2/3 one-sided
128 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
129 
130   Level: intermediate
131 
132 .seealso: PetscSFType, PetscSFCreate()
133 @*/
134 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
135 {
136   PetscErrorCode ierr,(*r)(PetscSF);
137   PetscBool      match;
138 
139   PetscFunctionBegin;
140   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
141   PetscValidCharPointer(type,2);
142 
143   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
144   if (match) PetscFunctionReturn(0);
145 
146   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
147   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
148   /* Destroy the previous PetscSF implementation context */
149   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
150   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
151   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
152   ierr = (*r)(sf);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 /*@C
157   PetscSFGetType - Get the PetscSF communication implementation
158 
159   Not Collective
160 
161   Input Parameter:
162 . sf  - the PetscSF context
163 
164   Output Parameter:
165 . type - the PetscSF type name
166 
167   Level: intermediate
168 
169 .seealso: PetscSFSetType(), PetscSFCreate()
170 @*/
171 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
172 {
173   PetscFunctionBegin;
174   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
175   PetscValidPointer(type,2);
176   *type = ((PetscObject)sf)->type_name;
177   PetscFunctionReturn(0);
178 }
179 
180 /*@
181    PetscSFDestroy - destroy star forest
182 
183    Collective
184 
185    Input Arguments:
186 .  sf - address of star forest
187 
188    Level: intermediate
189 
190 .seealso: PetscSFCreate(), PetscSFReset()
191 @*/
192 PetscErrorCode PetscSFDestroy(PetscSF *sf)
193 {
194   PetscErrorCode ierr;
195 
196   PetscFunctionBegin;
197   if (!*sf) PetscFunctionReturn(0);
198   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
199   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
200   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
201   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
202   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
207 {
208 #if defined(PETSC_USE_DEBUG)
209   PetscInt           i, nleaves;
210   PetscMPIInt        size;
211   const PetscInt    *ilocal;
212   const PetscSFNode *iremote;
213   PetscErrorCode     ierr;
214 
215   PetscFunctionBegin;
216   if (!sf->graphset) PetscFunctionReturn(0);
217   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
218   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
219   for (i = 0; i < nleaves; i++) {
220     const PetscInt rank = iremote[i].rank;
221     const PetscInt remote = iremote[i].index;
222     const PetscInt leaf = ilocal ? ilocal[i] : i;
223     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
224     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
225     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
226   }
227   PetscFunctionReturn(0);
228 #else
229   PetscFunctionBegin;
230   PetscFunctionReturn(0);
231 #endif
232 }
233 
234 /*@
235    PetscSFSetUp - set up communication structures
236 
237    Collective
238 
239    Input Arguments:
240 .  sf - star forest communication object
241 
242    Level: beginner
243 
244 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
245 @*/
246 PetscErrorCode PetscSFSetUp(PetscSF sf)
247 {
248   PetscErrorCode ierr;
249 
250   PetscFunctionBegin;
251   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
252   PetscSFCheckGraphSet(sf,1);
253   if (sf->setupcalled) PetscFunctionReturn(0);
254   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
255   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
256   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
257   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
258   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
259   sf->setupcalled = PETSC_TRUE;
260   PetscFunctionReturn(0);
261 }
262 
263 /*@
264    PetscSFSetFromOptions - set PetscSF options using the options database
265 
266    Logically Collective
267 
268    Input Arguments:
269 .  sf - star forest
270 
271    Options Database Keys:
272 +  -sf_type               - implementation type, see PetscSFSetType()
273 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
274 -  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
275                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller.
276 
277    Level: intermediate
278 @*/
279 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
280 {
281   PetscSFType    deft;
282   char           type[256];
283   PetscErrorCode ierr;
284   PetscBool      flg;
285 
286   PetscFunctionBegin;
287   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
288   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
289   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
290   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
291   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
292   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
293   sf->use_default_stream = PETSC_FALSE;
294   ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
295   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
296   ierr = PetscOptionsEnd();CHKERRQ(ierr);
297   PetscFunctionReturn(0);
298 }
299 
300 /*@
301    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
302 
303    Logically Collective
304 
305    Input Arguments:
306 +  sf - star forest
307 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
308 
309    Level: advanced
310 
311 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
312 @*/
313 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
314 {
315   PetscFunctionBegin;
316   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
317   PetscValidLogicalCollectiveBool(sf,flg,2);
318   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
319   sf->rankorder = flg;
320   PetscFunctionReturn(0);
321 }
322 
323 /*@
324    PetscSFSetGraph - Set a parallel star forest
325 
326    Collective
327 
328    Input Arguments:
329 +  sf - star forest
330 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
331 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
332 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
333 during setup in debug mode)
334 .  localmode - copy mode for ilocal
335 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
336 during setup in debug mode)
337 -  remotemode - copy mode for iremote
338 
339    Level: intermediate
340 
341    Notes:
342     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
343 
344    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
345    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
346    needed
347 
348    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
349    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
350 
351 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
352 @*/
353 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
354 {
355   PetscErrorCode ierr;
356 
357   PetscFunctionBegin;
358   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
359   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
360   if (nleaves > 0) PetscValidPointer(iremote,6);
361   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
362   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
363 
364   if (sf->nroots >= 0) { /* Reset only if graph already set */
365     ierr = PetscSFReset(sf);CHKERRQ(ierr);
366   }
367 
368   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
369 
370   sf->nroots  = nroots;
371   sf->nleaves = nleaves;
372 
373   if (nleaves && ilocal) {
374     PetscInt i;
375     PetscInt minleaf = PETSC_MAX_INT;
376     PetscInt maxleaf = PETSC_MIN_INT;
377     int      contiguous = 1;
378     for (i=0; i<nleaves; i++) {
379       minleaf = PetscMin(minleaf,ilocal[i]);
380       maxleaf = PetscMax(maxleaf,ilocal[i]);
381       contiguous &= (ilocal[i] == i);
382     }
383     sf->minleaf = minleaf;
384     sf->maxleaf = maxleaf;
385     if (contiguous) {
386       if (localmode == PETSC_OWN_POINTER) {
387         ierr = PetscFree(ilocal);CHKERRQ(ierr);
388       }
389       ilocal = NULL;
390     }
391   } else {
392     sf->minleaf = 0;
393     sf->maxleaf = nleaves - 1;
394   }
395 
396   if (ilocal) {
397     switch (localmode) {
398     case PETSC_COPY_VALUES:
399       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
400       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
401       sf->mine = sf->mine_alloc;
402       break;
403     case PETSC_OWN_POINTER:
404       sf->mine_alloc = (PetscInt*)ilocal;
405       sf->mine       = sf->mine_alloc;
406       break;
407     case PETSC_USE_POINTER:
408       sf->mine_alloc = NULL;
409       sf->mine       = (PetscInt*)ilocal;
410       break;
411     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
412     }
413   }
414 
415   switch (remotemode) {
416   case PETSC_COPY_VALUES:
417     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
418     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
419     sf->remote = sf->remote_alloc;
420     break;
421   case PETSC_OWN_POINTER:
422     sf->remote_alloc = (PetscSFNode*)iremote;
423     sf->remote       = sf->remote_alloc;
424     break;
425   case PETSC_USE_POINTER:
426     sf->remote_alloc = NULL;
427     sf->remote       = (PetscSFNode*)iremote;
428     break;
429   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
430   }
431 
432   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
433   sf->graphset = PETSC_TRUE;
434   PetscFunctionReturn(0);
435 }
436 
437 /*@
438   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
439 
440   Collective
441 
442   Input Parameters:
443 + sf      - The PetscSF
444 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
445 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
446 
447   Notes:
448   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
449   n and N are local and global sizes of x respectively.
450 
451   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
452   sequential vectors y on all ranks.
453 
454   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
455   sequential vector y on rank 0.
456 
457   In above cases, entries of x are roots and entries of y are leaves.
458 
459   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
460   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
461   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
462   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
463   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
464 
465   In this case, roots and leaves are symmetric.
466 
467   Level: intermediate
468  @*/
469 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
470 {
471   MPI_Comm       comm;
472   PetscInt       n,N,res[2];
473   PetscMPIInt    rank,size;
474   PetscSFType    type;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
479   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
480   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
481   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
482 
483   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
484     type = PETSCSFALLTOALL;
485     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
486     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
487     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
488     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
489   } else {
490     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
491     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
492     res[0] = n;
493     res[1] = -n;
494     /* Check if n are same over all ranks so that we can optimize it */
495     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
496     if (res[0] == -res[1]) { /* same n */
497       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
498     } else {
499       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
500     }
501     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
502   }
503   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
504 
505   sf->pattern = pattern;
506   sf->mine    = NULL; /* Contiguous */
507 
508   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
509      Also set other easy stuff.
510    */
511   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
512     sf->nleaves      = N;
513     sf->nroots       = n;
514     sf->nranks       = size;
515     sf->minleaf      = 0;
516     sf->maxleaf      = N - 1;
517   } else if (pattern == PETSCSF_PATTERN_GATHER) {
518     sf->nleaves      = rank ? 0 : N;
519     sf->nroots       = n;
520     sf->nranks       = rank ? 0 : size;
521     sf->minleaf      = 0;
522     sf->maxleaf      = rank ? -1 : N - 1;
523   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
524     sf->nleaves      = size;
525     sf->nroots       = size;
526     sf->nranks       = size;
527     sf->minleaf      = 0;
528     sf->maxleaf      = size - 1;
529   }
530   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
531   sf->graphset = PETSC_TRUE;
532   PetscFunctionReturn(0);
533 }
534 
535 /*@
536    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
537 
538    Collective
539 
540    Input Arguments:
541 
542 .  sf - star forest to invert
543 
544    Output Arguments:
545 .  isf - inverse of sf
546    Level: advanced
547 
548    Notes:
549    All roots must have degree 1.
550 
551    The local space may be a permutation, but cannot be sparse.
552 
553 .seealso: PetscSFSetGraph()
554 @*/
555 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
556 {
557   PetscErrorCode ierr;
558   PetscMPIInt    rank;
559   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
560   const PetscInt *ilocal;
561   PetscSFNode    *roots,*leaves;
562 
563   PetscFunctionBegin;
564   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
565   PetscSFCheckGraphSet(sf,1);
566   PetscValidPointer(isf,2);
567 
568   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
569   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
570 
571   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
572   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
573   for (i=0; i<maxlocal; i++) {
574     leaves[i].rank  = rank;
575     leaves[i].index = i;
576   }
577   for (i=0; i <nroots; i++) {
578     roots[i].rank  = -1;
579     roots[i].index = -1;
580   }
581   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
582   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
583 
584   /* Check whether our leaves are sparse */
585   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
586   if (count == nroots) newilocal = NULL;
587   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
588     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
589     for (i=0,count=0; i<nroots; i++) {
590       if (roots[i].rank >= 0) {
591         newilocal[count]   = i;
592         roots[count].rank  = roots[i].rank;
593         roots[count].index = roots[i].index;
594         count++;
595       }
596     }
597   }
598 
599   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
600   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
601   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 /*@
606    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
607 
608    Collective
609 
610    Input Arguments:
611 +  sf - communication object to duplicate
612 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
613 
614    Output Arguments:
615 .  newsf - new communication object
616 
617    Level: beginner
618 
619 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
620 @*/
621 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
622 {
623   PetscSFType    type;
624   PetscErrorCode ierr;
625 
626   PetscFunctionBegin;
627   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
628   PetscValidLogicalCollectiveEnum(sf,opt,2);
629   PetscValidPointer(newsf,3);
630   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
631   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
632   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
633   if (opt == PETSCSF_DUPLICATE_GRAPH) {
634     PetscSFCheckGraphSet(sf,1);
635     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
636       PetscInt          nroots,nleaves;
637       const PetscInt    *ilocal;
638       const PetscSFNode *iremote;
639       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
640       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
641     } else {
642       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
643     }
644   }
645   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
646   PetscFunctionReturn(0);
647 }
648 
649 /*@C
650    PetscSFGetGraph - Get the graph specifying a parallel star forest
651 
652    Not Collective
653 
654    Input Arguments:
655 .  sf - star forest
656 
657    Output Arguments:
658 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
659 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
660 .  ilocal - locations of leaves in leafdata buffers
661 -  iremote - remote locations of root vertices for each leaf on the current process
662 
663    Notes:
664    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
665 
666    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
667    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
668 
669    Level: intermediate
670 
671 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
672 @*/
673 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
674 {
675   PetscErrorCode ierr;
676 
677   PetscFunctionBegin;
678   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
679   if (sf->ops->GetGraph) {
680     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
681   } else {
682     if (nroots) *nroots = sf->nroots;
683     if (nleaves) *nleaves = sf->nleaves;
684     if (ilocal) *ilocal = sf->mine;
685     if (iremote) *iremote = sf->remote;
686   }
687   PetscFunctionReturn(0);
688 }
689 
690 /*@
691    PetscSFGetLeafRange - Get the active leaf ranges
692 
693    Not Collective
694 
695    Input Arguments:
696 .  sf - star forest
697 
698    Output Arguments:
699 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
700 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
701 
702    Level: developer
703 
704 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
705 @*/
706 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
707 {
708 
709   PetscFunctionBegin;
710   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
711   PetscSFCheckGraphSet(sf,1);
712   if (minleaf) *minleaf = sf->minleaf;
713   if (maxleaf) *maxleaf = sf->maxleaf;
714   PetscFunctionReturn(0);
715 }
716 
717 /*@C
718    PetscSFViewFromOptions - View from Options
719 
720    Collective on PetscSF
721 
722    Input Parameters:
723 +  A - the star forest
724 .  obj - Optional object
725 -  name - command line option
726 
727    Level: intermediate
728 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
729 @*/
730 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
731 {
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
736   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 /*@C
741    PetscSFView - view a star forest
742 
743    Collective
744 
745    Input Arguments:
746 +  sf - star forest
747 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
748 
749    Level: beginner
750 
751 .seealso: PetscSFCreate(), PetscSFSetGraph()
752 @*/
753 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
754 {
755   PetscErrorCode    ierr;
756   PetscBool         iascii;
757   PetscViewerFormat format;
758 
759   PetscFunctionBegin;
760   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
761   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
762   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
763   PetscCheckSameComm(sf,1,viewer,2);
764   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
765   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
766   if (iascii) {
767     PetscMPIInt rank;
768     PetscInt    ii,i,j;
769 
770     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
771     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
772     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
773     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
774       if (!sf->graphset) {
775         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
776         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
777         PetscFunctionReturn(0);
778       }
779       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
780       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
781       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
782       for (i=0; i<sf->nleaves; i++) {
783         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
784       }
785       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
786       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
787       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
788         PetscMPIInt *tmpranks,*perm;
789         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
790         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
791         for (i=0; i<sf->nranks; i++) perm[i] = i;
792         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
793         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
794         for (ii=0; ii<sf->nranks; ii++) {
795           i = perm[ii];
796           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
797           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
798             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
799           }
800         }
801         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
802       }
803       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
804       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
805     }
806     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 /*@C
812    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
813 
814    Not Collective
815 
816    Input Arguments:
817 .  sf - star forest
818 
819    Output Arguments:
820 +  nranks - number of ranks referenced by local part
821 .  ranks - array of ranks
822 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
823 .  rmine - concatenated array holding local indices referencing each remote rank
824 -  rremote - concatenated array holding remote indices referenced for each remote rank
825 
826    Level: developer
827 
828 .seealso: PetscSFGetLeafRanks()
829 @*/
830 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
831 {
832   PetscErrorCode ierr;
833 
834   PetscFunctionBegin;
835   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
836   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
837   if (sf->ops->GetRootRanks) {
838     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
839   } else {
840     /* The generic implementation */
841     if (nranks)  *nranks  = sf->nranks;
842     if (ranks)   *ranks   = sf->ranks;
843     if (roffset) *roffset = sf->roffset;
844     if (rmine)   *rmine   = sf->rmine;
845     if (rremote) *rremote = sf->rremote;
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 /*@C
851    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
852 
853    Not Collective
854 
855    Input Arguments:
856 .  sf - star forest
857 
858    Output Arguments:
859 +  niranks - number of leaf ranks referencing roots on this process
860 .  iranks - array of ranks
861 .  ioffset - offset in irootloc for each rank (length niranks+1)
862 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
863 
864    Level: developer
865 
866 .seealso: PetscSFGetRootRanks()
867 @*/
868 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
869 {
870   PetscErrorCode ierr;
871 
872   PetscFunctionBegin;
873   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
874   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
875   if (sf->ops->GetLeafRanks) {
876     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
877   } else {
878     PetscSFType type;
879     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
880     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
881   }
882   PetscFunctionReturn(0);
883 }
884 
885 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
886   PetscInt i;
887   for (i=0; i<n; i++) {
888     if (needle == list[i]) return PETSC_TRUE;
889   }
890   return PETSC_FALSE;
891 }
892 
893 /*@C
894    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
895 
896    Collective
897 
898    Input Arguments:
899 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
900 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
901 
902    Level: developer
903 
904 .seealso: PetscSFGetRootRanks()
905 @*/
906 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
907 {
908   PetscErrorCode     ierr;
909   PetscTable         table;
910   PetscTablePosition pos;
911   PetscMPIInt        size,groupsize,*groupranks;
912   PetscInt           *rcount,*ranks;
913   PetscInt           i, irank = -1,orank = -1;
914 
915   PetscFunctionBegin;
916   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
917   PetscSFCheckGraphSet(sf,1);
918   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
919   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
920   for (i=0; i<sf->nleaves; i++) {
921     /* Log 1-based rank */
922     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
923   }
924   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
925   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
926   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
927   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
928   for (i=0; i<sf->nranks; i++) {
929     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
930     ranks[i]--;             /* Convert back to 0-based */
931   }
932   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
933 
934   /* We expect that dgroup is reliably "small" while nranks could be large */
935   {
936     MPI_Group group = MPI_GROUP_NULL;
937     PetscMPIInt *dgroupranks;
938     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
939     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
940     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
941     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
942     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
943     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
944     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
945     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
946   }
947 
948   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
949   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
950     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
951       if (InList(ranks[i],groupsize,groupranks)) break;
952     }
953     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
954       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
955     }
956     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
957       PetscInt tmprank,tmpcount;
958 
959       tmprank             = ranks[i];
960       tmpcount            = rcount[i];
961       ranks[i]            = ranks[sf->ndranks];
962       rcount[i]           = rcount[sf->ndranks];
963       ranks[sf->ndranks]  = tmprank;
964       rcount[sf->ndranks] = tmpcount;
965       sf->ndranks++;
966     }
967   }
968   ierr = PetscFree(groupranks);CHKERRQ(ierr);
969   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
970   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
971   sf->roffset[0] = 0;
972   for (i=0; i<sf->nranks; i++) {
973     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
974     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
975     rcount[i]        = 0;
976   }
977   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
978     /* short circuit */
979     if (orank != sf->remote[i].rank) {
980       /* Search for index of iremote[i].rank in sf->ranks */
981       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
982       if (irank < 0) {
983         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
984         if (irank >= 0) irank += sf->ndranks;
985       }
986       orank = sf->remote[i].rank;
987     }
988     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
989     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
990     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
991     rcount[irank]++;
992   }
993   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 /*@C
998    PetscSFGetGroups - gets incoming and outgoing process groups
999 
1000    Collective
1001 
1002    Input Argument:
1003 .  sf - star forest
1004 
1005    Output Arguments:
1006 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1007 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1008 
1009    Level: developer
1010 
1011 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1012 @*/
1013 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1014 {
1015   PetscErrorCode ierr;
1016   MPI_Group      group = MPI_GROUP_NULL;
1017 
1018   PetscFunctionBegin;
1019   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1020   if (sf->ingroup == MPI_GROUP_NULL) {
1021     PetscInt       i;
1022     const PetscInt *indegree;
1023     PetscMPIInt    rank,*outranks,*inranks;
1024     PetscSFNode    *remote;
1025     PetscSF        bgcount;
1026 
1027     /* Compute the number of incoming ranks */
1028     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1029     for (i=0; i<sf->nranks; i++) {
1030       remote[i].rank  = sf->ranks[i];
1031       remote[i].index = 0;
1032     }
1033     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1034     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1035     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1036     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1037     /* Enumerate the incoming ranks */
1038     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1039     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1040     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1041     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1042     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1043     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1044     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1045     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1046     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1047     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1048   }
1049   *incoming = sf->ingroup;
1050 
1051   if (sf->outgroup == MPI_GROUP_NULL) {
1052     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1053     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1054     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1055   }
1056   *outgoing = sf->outgroup;
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 /*@
1061    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1062 
1063    Collective
1064 
1065    Input Argument:
1066 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1067 
1068    Output Arguments:
1069 .  multi - star forest with split roots, such that each root has degree exactly 1
1070 
1071    Level: developer
1072 
1073    Notes:
1074 
1075    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1076    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1077    edge, it is a candidate for future optimization that might involve its removal.
1078 
1079 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1080 @*/
1081 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1082 {
1083   PetscErrorCode ierr;
1084 
1085   PetscFunctionBegin;
1086   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1087   PetscValidPointer(multi,2);
1088   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1089     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1090     *multi = sf->multi;
1091     PetscFunctionReturn(0);
1092   }
1093   if (!sf->multi) {
1094     const PetscInt *indegree;
1095     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1096     PetscSFNode    *remote;
1097     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1098     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1099     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1100     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1101     inoffset[0] = 0;
1102     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1103     for (i=0; i<maxlocal; i++) outones[i] = 1;
1104     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1105     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1106     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1107 #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1108     for (i=0; i<sf->nroots; i++) {
1109       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1110     }
1111 #endif
1112     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1113     for (i=0; i<sf->nleaves; i++) {
1114       remote[i].rank  = sf->remote[i].rank;
1115       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1116     }
1117     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1118     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1119     if (sf->rankorder) {        /* Sort the ranks */
1120       PetscMPIInt rank;
1121       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1122       PetscSFNode *newremote;
1123       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1124       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1125       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1126       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1127       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1128       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1129       /* Sort the incoming ranks at each vertex, build the inverse map */
1130       for (i=0; i<sf->nroots; i++) {
1131         PetscInt j;
1132         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1133         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1134         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1135       }
1136       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1137       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1138       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1139       for (i=0; i<sf->nleaves; i++) {
1140         newremote[i].rank  = sf->remote[i].rank;
1141         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1142       }
1143       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1144       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1145     }
1146     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1147   }
1148   *multi = sf->multi;
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 /*@C
1153    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1154 
1155    Collective
1156 
1157    Input Arguments:
1158 +  sf - original star forest
1159 .  nselected  - number of selected roots on this process
1160 -  selected   - indices of the selected roots on this process
1161 
1162    Output Arguments:
1163 .  esf - new star forest
1164 
1165    Level: advanced
1166 
1167    Note:
1168    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1169    be done by calling PetscSFGetGraph().
1170 
1171 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1172 @*/
1173 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1174 {
1175   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1176   const PetscInt    *ilocal;
1177   signed char       *rootdata,*leafdata,*leafmem;
1178   const PetscSFNode *iremote;
1179   PetscSFNode       *new_iremote;
1180   MPI_Comm          comm;
1181   PetscErrorCode    ierr;
1182 
1183   PetscFunctionBegin;
1184   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1185   PetscSFCheckGraphSet(sf,1);
1186   if (nselected) PetscValidPointer(selected,3);
1187   PetscValidPointer(esf,4);
1188 
1189   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1190 
1191   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1192   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1193   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1194 
1195 #if defined(PETSC_USE_DEBUG)
1196   /* Error out if selected[] has dups or  out of range indices */
1197   {
1198     PetscBool dups;
1199     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1200     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1201     for (i=0; i<nselected; i++)
1202       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1203   }
1204 #endif
1205 
1206   if (sf->ops->CreateEmbeddedSF) {
1207     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1208   } else {
1209     /* A generic version of creating embedded sf */
1210     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1211     maxlocal = maxleaf - minleaf + 1;
1212     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1213     leafdata = leafmem - minleaf;
1214     /* Tag selected roots and bcast to leaves */
1215     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1216     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1217     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1218 
1219     /* Build esf with leaves that are still connected */
1220     esf_nleaves = 0;
1221     for (i=0; i<nleaves; i++) {
1222       j = ilocal ? ilocal[i] : i;
1223       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1224          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1225       */
1226       esf_nleaves += (leafdata[j] ? 1 : 0);
1227     }
1228     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1229     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1230     for (i=n=0; i<nleaves; i++) {
1231       j = ilocal ? ilocal[i] : i;
1232       if (leafdata[j]) {
1233         new_ilocal[n]        = j;
1234         new_iremote[n].rank  = iremote[i].rank;
1235         new_iremote[n].index = iremote[i].index;
1236         ++n;
1237       }
1238     }
1239     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1240     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1241     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1242     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1243   }
1244   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 /*@C
1249   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1250 
1251   Collective
1252 
1253   Input Arguments:
1254 + sf - original star forest
1255 . nselected  - number of selected leaves on this process
1256 - selected   - indices of the selected leaves on this process
1257 
1258   Output Arguments:
1259 .  newsf - new star forest
1260 
1261   Level: advanced
1262 
1263 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1264 @*/
1265 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1266 {
1267   const PetscSFNode *iremote;
1268   PetscSFNode       *new_iremote;
1269   const PetscInt    *ilocal;
1270   PetscInt          i,nroots,*leaves,*new_ilocal;
1271   MPI_Comm          comm;
1272   PetscErrorCode    ierr;
1273 
1274   PetscFunctionBegin;
1275   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1276   PetscSFCheckGraphSet(sf,1);
1277   if (nselected) PetscValidPointer(selected,3);
1278   PetscValidPointer(newsf,4);
1279 
1280   /* Uniq selected[] and put results in leaves[] */
1281   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1282   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1283   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1284   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1285   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1286 
1287   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1288   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1289     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1290   } else {
1291     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1292     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1293     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1294     for (i=0; i<nselected; ++i) {
1295       const PetscInt l     = leaves[i];
1296       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1297       new_iremote[i].rank  = iremote[l].rank;
1298       new_iremote[i].index = iremote[l].index;
1299     }
1300     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1301     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1302   }
1303   ierr = PetscFree(leaves);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 /*@C
1308    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1309 
1310    Collective on PetscSF
1311 
1312    Input Arguments:
1313 +  sf - star forest on which to communicate
1314 .  unit - data type associated with each node
1315 .  rootdata - buffer to broadcast
1316 -  op - operation to use for reduction
1317 
1318    Output Arguments:
1319 .  leafdata - buffer to be reduced with values from each leaf's respective root
1320 
1321    Level: intermediate
1322 
1323 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1324 @*/
1325 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1326 {
1327   PetscErrorCode ierr;
1328   PetscMemType   rootmtype,leafmtype;
1329 
1330   PetscFunctionBegin;
1331   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1332   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1333   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1334   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1335   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1336   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1337   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 /*@C
1342    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1343 
1344    Collective
1345 
1346    Input Arguments:
1347 +  sf - star forest
1348 .  unit - data type
1349 .  rootdata - buffer to broadcast
1350 -  op - operation to use for reduction
1351 
1352    Output Arguments:
1353 .  leafdata - buffer to be reduced with values from each leaf's respective root
1354 
1355    Level: intermediate
1356 
1357 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1358 @*/
1359 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1360 {
1361   PetscErrorCode ierr;
1362 
1363   PetscFunctionBegin;
1364   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1365   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1366   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1367   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1368   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 /*@C
1373    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1374 
1375    Collective
1376 
1377    Input Arguments:
1378 +  sf - star forest
1379 .  unit - data type
1380 .  leafdata - values to reduce
1381 -  op - reduction operation
1382 
1383    Output Arguments:
1384 .  rootdata - result of reduction of values from all leaves of each root
1385 
1386    Level: intermediate
1387 
1388 .seealso: PetscSFBcastBegin()
1389 @*/
1390 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1391 {
1392   PetscErrorCode ierr;
1393   PetscMemType   rootmtype,leafmtype;
1394 
1395   PetscFunctionBegin;
1396   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1397   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1398   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1399   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1400   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1401   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1402   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /*@C
1407    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1408 
1409    Collective
1410 
1411    Input Arguments:
1412 +  sf - star forest
1413 .  unit - data type
1414 .  leafdata - values to reduce
1415 -  op - reduction operation
1416 
1417    Output Arguments:
1418 .  rootdata - result of reduction of values from all leaves of each root
1419 
1420    Level: intermediate
1421 
1422 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1423 @*/
1424 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1425 {
1426   PetscErrorCode ierr;
1427 
1428   PetscFunctionBegin;
1429   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1430   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1431   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1432   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1433   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 /*@C
1438    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1439 
1440    Collective
1441 
1442    Input Arguments:
1443 +  sf - star forest
1444 .  unit - data type
1445 .  leafdata - leaf values to use in reduction
1446 -  op - operation to use for reduction
1447 
1448    Output Arguments:
1449 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1450 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1451 
1452    Level: advanced
1453 
1454    Note:
1455    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1456    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1457    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1458    integers.
1459 
1460 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1461 @*/
1462 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1463 {
1464   PetscErrorCode ierr;
1465   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1466 
1467   PetscFunctionBegin;
1468   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1469   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1470   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1471   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1472   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1473   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1474   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1475   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1476   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 /*@C
1481    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1482 
1483    Collective
1484 
1485    Input Arguments:
1486 +  sf - star forest
1487 .  unit - data type
1488 .  leafdata - leaf values to use in reduction
1489 -  op - operation to use for reduction
1490 
1491    Output Arguments:
1492 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1493 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1494 
1495    Level: advanced
1496 
1497 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1498 @*/
1499 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1500 {
1501   PetscErrorCode ierr;
1502 
1503   PetscFunctionBegin;
1504   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1505   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1506   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1507   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1508   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 /*@C
1513    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1514 
1515    Collective
1516 
1517    Input Arguments:
1518 .  sf - star forest
1519 
1520    Output Arguments:
1521 .  degree - degree of each root vertex
1522 
1523    Level: advanced
1524 
1525    Notes:
1526    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1527 
1528 .seealso: PetscSFGatherBegin()
1529 @*/
1530 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1531 {
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1536   PetscSFCheckGraphSet(sf,1);
1537   PetscValidPointer(degree,2);
1538   if (!sf->degreeknown) {
1539     PetscInt i, nroots = sf->nroots, maxlocal;
1540     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1541     maxlocal = sf->maxleaf-sf->minleaf+1;
1542     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1543     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1544     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1545     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1546     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1547   }
1548   *degree = NULL;
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 /*@C
1553    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1554 
1555    Collective
1556 
1557    Input Arguments:
1558 .  sf - star forest
1559 
1560    Output Arguments:
1561 .  degree - degree of each root vertex
1562 
1563    Level: developer
1564 
1565    Notes:
1566    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1567 
1568 .seealso:
1569 @*/
1570 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1571 {
1572   PetscErrorCode ierr;
1573 
1574   PetscFunctionBegin;
1575   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1576   PetscSFCheckGraphSet(sf,1);
1577   PetscValidPointer(degree,2);
1578   if (!sf->degreeknown) {
1579     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1580     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1581     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1582     sf->degreeknown = PETSC_TRUE;
1583   }
1584   *degree = sf->degree;
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 
1589 /*@C
1590    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1591    Each multi-root is assigned index of the corresponding original root.
1592 
1593    Collective
1594 
1595    Input Arguments:
1596 +  sf - star forest
1597 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1598 
1599    Output Arguments:
1600 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1601 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1602 
1603    Level: developer
1604 
1605    Notes:
1606    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1607 
1608 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1609 @*/
1610 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1611 {
1612   PetscSF             msf;
1613   PetscInt            i, j, k, nroots, nmroots;
1614   PetscErrorCode      ierr;
1615 
1616   PetscFunctionBegin;
1617   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1618   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1619   if (nroots) PetscValidIntPointer(degree,2);
1620   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1621   PetscValidPointer(multiRootsOrigNumbering,4);
1622   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1623   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1624   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1625   for (i=0,j=0,k=0; i<nroots; i++) {
1626     if (!degree[i]) continue;
1627     for (j=0; j<degree[i]; j++,k++) {
1628       (*multiRootsOrigNumbering)[k] = i;
1629     }
1630   }
1631 #if defined(PETSC_USE_DEBUG)
1632   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1633 #endif
1634   if (nMultiRoots) *nMultiRoots = nmroots;
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 /*@C
1639    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1640 
1641    Collective
1642 
1643    Input Arguments:
1644 +  sf - star forest
1645 .  unit - data type
1646 -  leafdata - leaf data to gather to roots
1647 
1648    Output Argument:
1649 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1650 
1651    Level: intermediate
1652 
1653 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1654 @*/
1655 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1656 {
1657   PetscErrorCode ierr;
1658   PetscSF        multi;
1659 
1660   PetscFunctionBegin;
1661   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1662   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1663   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1664   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 /*@C
1669    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1670 
1671    Collective
1672 
1673    Input Arguments:
1674 +  sf - star forest
1675 .  unit - data type
1676 -  leafdata - leaf data to gather to roots
1677 
1678    Output Argument:
1679 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1680 
1681    Level: intermediate
1682 
1683 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1684 @*/
1685 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1686 {
1687   PetscErrorCode ierr;
1688   PetscSF        multi;
1689 
1690   PetscFunctionBegin;
1691   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1692   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1693   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1694   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 /*@C
1699    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1700 
1701    Collective
1702 
1703    Input Arguments:
1704 +  sf - star forest
1705 .  unit - data type
1706 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1707 
1708    Output Argument:
1709 .  leafdata - leaf data to be update with personal data from each respective root
1710 
1711    Level: intermediate
1712 
1713 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1714 @*/
1715 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1716 {
1717   PetscErrorCode ierr;
1718   PetscSF        multi;
1719 
1720   PetscFunctionBegin;
1721   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1722   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1723   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1724   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 /*@C
1729    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1730 
1731    Collective
1732 
1733    Input Arguments:
1734 +  sf - star forest
1735 .  unit - data type
1736 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1737 
1738    Output Argument:
1739 .  leafdata - leaf data to be update with personal data from each respective root
1740 
1741    Level: intermediate
1742 
1743 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1744 @*/
1745 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1746 {
1747   PetscErrorCode ierr;
1748   PetscSF        multi;
1749 
1750   PetscFunctionBegin;
1751   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1752   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1753   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1754   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1759 {
1760 #if defined(PETSC_USE_DEBUG)
1761   PetscInt        i, n, nleaves;
1762   const PetscInt *ilocal = NULL;
1763   PetscHSetI      seen;
1764   PetscErrorCode  ierr;
1765 
1766   PetscFunctionBegin;
1767   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1768   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1769   for (i = 0; i < nleaves; i++) {
1770     const PetscInt leaf = ilocal ? ilocal[i] : i;
1771     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1772   }
1773   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1774   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1775   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 #else
1778   PetscFunctionBegin;
1779   PetscFunctionReturn(0);
1780 #endif
1781 }
1782 
1783 /*@
1784   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1785 
1786   Input Parameters:
1787 + sfA - The first PetscSF
1788 - sfB - The second PetscSF
1789 
1790   Output Parameters:
1791 . sfBA - The composite SF
1792 
1793   Level: developer
1794 
1795   Notes:
1796   Currently, the two SFs must be defined on congruent communicators and they must be true star
1797   forests, i.e. the same leaf is not connected with different roots.
1798 
1799   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1800   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1801   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1802   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1803 
1804 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1805 @*/
1806 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1807 {
1808   PetscErrorCode    ierr;
1809   const PetscSFNode *remotePointsA,*remotePointsB;
1810   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1811   const PetscInt    *localPointsA,*localPointsB;
1812   PetscInt          *localPointsBA;
1813   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1814   PetscBool         denseB;
1815 
1816   PetscFunctionBegin;
1817   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1818   PetscSFCheckGraphSet(sfA,1);
1819   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1820   PetscSFCheckGraphSet(sfB,2);
1821   PetscCheckSameComm(sfA,1,sfB,2);
1822   PetscValidPointer(sfBA,3);
1823   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1824   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1825 
1826   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1827   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1828   if (localPointsA) {
1829     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1830     for (i=0; i<numRootsB; i++) {
1831       reorderedRemotePointsA[i].rank = -1;
1832       reorderedRemotePointsA[i].index = -1;
1833     }
1834     for (i=0; i<numLeavesA; i++) {
1835       if (localPointsA[i] >= numRootsB) continue;
1836       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1837     }
1838     remotePointsA = reorderedRemotePointsA;
1839   }
1840   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1841   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1842   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1843   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1844   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1845 
1846   denseB = (PetscBool)!localPointsB;
1847   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1848     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1849     else numLeavesBA++;
1850   }
1851   if (denseB) {
1852     localPointsBA  = NULL;
1853     remotePointsBA = leafdataB;
1854   } else {
1855     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
1856     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
1857     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1858       const PetscInt l = localPointsB ? localPointsB[i] : i;
1859 
1860       if (leafdataB[l-minleaf].rank == -1) continue;
1861       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1862       localPointsBA[numLeavesBA] = l;
1863       numLeavesBA++;
1864     }
1865     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
1866   }
1867   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1868   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 /*@
1873   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1874 
1875   Input Parameters:
1876 + sfA - The first PetscSF
1877 - sfB - The second PetscSF
1878 
1879   Output Parameters:
1880 . sfBA - The composite SF.
1881 
1882   Level: developer
1883 
1884   Notes:
1885   Currently, the two SFs must be defined on congruent communicators and they must be true star
1886   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1887   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1888 
1889   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1890   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1891   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1892   a Reduce on sfB, on connected roots.
1893 
1894 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1895 @*/
1896 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1897 {
1898   PetscErrorCode    ierr;
1899   const PetscSFNode *remotePointsA,*remotePointsB;
1900   PetscSFNode       *remotePointsBA;
1901   const PetscInt    *localPointsA,*localPointsB;
1902   PetscSFNode       *reorderedRemotePointsA = NULL;
1903   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1904   MPI_Op            op;
1905 #if defined(PETSC_USE_64BIT_INDICES)
1906   PetscBool         iswin;
1907 #endif
1908 
1909   PetscFunctionBegin;
1910   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1911   PetscSFCheckGraphSet(sfA,1);
1912   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1913   PetscSFCheckGraphSet(sfB,2);
1914   PetscCheckSameComm(sfA,1,sfB,2);
1915   PetscValidPointer(sfBA,3);
1916   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1917   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1918 
1919   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1920   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1921 
1922   /* TODO: Check roots of sfB have degree of 1 */
1923   /* Once we implement it, we can replace the MPI_MAXLOC
1924      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1925      We use MPI_MAXLOC only to have a deterministic output from this routine if
1926      the root condition is not meet.
1927    */
1928   op = MPI_MAXLOC;
1929 #if defined(PETSC_USE_64BIT_INDICES)
1930   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
1931   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
1932   if (iswin) op = MPIU_REPLACE;
1933 #endif
1934 
1935   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
1936   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
1937   for (i=0; i<maxleaf - minleaf + 1; i++) {
1938     reorderedRemotePointsA[i].rank = -1;
1939     reorderedRemotePointsA[i].index = -1;
1940   }
1941   if (localPointsA) {
1942     for (i=0; i<numLeavesA; i++) {
1943       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1944       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1945     }
1946   } else {
1947     for (i=0; i<numLeavesA; i++) {
1948       if (i > maxleaf || i < minleaf) continue;
1949       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1950     }
1951   }
1952 
1953   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
1954   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
1955   for (i=0; i<numRootsB; i++) {
1956     remotePointsBA[i].rank = -1;
1957     remotePointsBA[i].index = -1;
1958   }
1959 
1960   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
1961   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
1962   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1963   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1964     if (remotePointsBA[i].rank == -1) continue;
1965     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1966     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1967     localPointsBA[numLeavesBA] = i;
1968     numLeavesBA++;
1969   }
1970   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1971   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 /*
1976   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1977 
1978   Input Parameters:
1979 . sf - The global PetscSF
1980 
1981   Output Parameters:
1982 . out - The local PetscSF
1983  */
1984 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1985 {
1986   MPI_Comm           comm;
1987   PetscMPIInt        myrank;
1988   const PetscInt     *ilocal;
1989   const PetscSFNode  *iremote;
1990   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1991   PetscSFNode        *liremote;
1992   PetscSF            lsf;
1993   PetscErrorCode     ierr;
1994 
1995   PetscFunctionBegin;
1996   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1997   if (sf->ops->CreateLocalSF) {
1998     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
1999   } else {
2000     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2001     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2002     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
2003 
2004     /* Find out local edges and build a local SF */
2005     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2006     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2007     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2008     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2009 
2010     for (i=j=0; i<nleaves; i++) {
2011       if (iremote[i].rank == (PetscInt)myrank) {
2012         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2013         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2014         liremote[j].index = iremote[i].index;
2015         j++;
2016       }
2017     }
2018     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2019     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2020     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2021     *out = lsf;
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2027 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2028 {
2029   PetscErrorCode     ierr;
2030   PetscMemType       rootmtype,leafmtype;
2031 
2032   PetscFunctionBegin;
2033   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2034   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2035   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2036   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2037   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2038   if (sf->ops->BcastToZero) {
2039     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2040   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2041   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2042   PetscFunctionReturn(0);
2043 }
2044 
2045