xref: /petsc/src/vec/is/sf/interface/sf.c (revision 140a147293a25251a9cb594f525bcfe7ceee4ef4)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_USE_DEBUG)
6 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
7     if (PetscUnlikely(!(sf)->graphset))                              \
8       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9   } while (0)
10 #else
11 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12 #endif
13 
14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
15 
16 /*@
17    PetscSFCreate - create a star forest communication context
18 
19    Collective
20 
21    Input Arguments:
22 .  comm - communicator on which the star forest will operate
23 
24    Output Arguments:
25 .  sf - new star forest context
26 
27    Options Database Keys:
28 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
29 .  -sf_type window    -Use MPI-3 one-sided window for communication
30 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
31 
32    Level: intermediate
33 
34    Notes:
35    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37    SFs are optimized and they have better performance than general SFs.
38 
39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40 @*/
41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42 {
43   PetscErrorCode ierr;
44   PetscSF        b;
45 
46   PetscFunctionBegin;
47   PetscValidPointer(sf,2);
48   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
49 
50   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
51 
52   b->nroots    = -1;
53   b->nleaves   = -1;
54   b->minleaf   = PETSC_MAX_INT;
55   b->maxleaf   = PETSC_MIN_INT;
56   b->nranks    = -1;
57   b->rankorder = PETSC_TRUE;
58   b->ingroup   = MPI_GROUP_NULL;
59   b->outgroup  = MPI_GROUP_NULL;
60   b->graphset  = PETSC_FALSE;
61 
62   *sf = b;
63   PetscFunctionReturn(0);
64 }
65 
66 /*@
67    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
68 
69    Collective
70 
71    Input Arguments:
72 .  sf - star forest
73 
74    Level: advanced
75 
76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77 @*/
78 PetscErrorCode PetscSFReset(PetscSF sf)
79 {
80   PetscErrorCode ierr;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
84   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
85   sf->nroots   = -1;
86   sf->nleaves  = -1;
87   sf->minleaf  = PETSC_MAX_INT;
88   sf->maxleaf  = PETSC_MIN_INT;
89   sf->mine     = NULL;
90   sf->remote   = NULL;
91   sf->graphset = PETSC_FALSE;
92   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
93   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
94   sf->nranks = -1;
95   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
96 #if defined(PETSC_HAVE_CUDA)
97   if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;}
98 #endif
99   sf->degreeknown = PETSC_FALSE;
100   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
101   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
102   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
103   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
104   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
105   sf->setupcalled = PETSC_FALSE;
106   PetscFunctionReturn(0);
107 }
108 
109 /*@C
110    PetscSFSetType - Set the PetscSF communication implementation
111 
112    Collective on PetscSF
113 
114    Input Parameters:
115 +  sf - the PetscSF context
116 -  type - a known method
117 
118    Options Database Key:
119 .  -sf_type <type> - Sets the method; use -help for a list
120    of available methods (for instance, window, pt2pt, neighbor)
121 
122    Notes:
123    See "include/petscsf.h" for available methods (for instance)
124 +    PETSCSFWINDOW - MPI-2/3 one-sided
125 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
126 
127   Level: intermediate
128 
129 .seealso: PetscSFType, PetscSFCreate()
130 @*/
131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
132 {
133   PetscErrorCode ierr,(*r)(PetscSF);
134   PetscBool      match;
135 
136   PetscFunctionBegin;
137   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
138   PetscValidCharPointer(type,2);
139 
140   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
141   if (match) PetscFunctionReturn(0);
142 
143   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
144   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
145   /* Destroy the previous PetscSF implementation context */
146   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
147   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
148   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
149   ierr = (*r)(sf);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 /*@C
154   PetscSFGetType - Get the PetscSF communication implementation
155 
156   Not Collective
157 
158   Input Parameter:
159 . sf  - the PetscSF context
160 
161   Output Parameter:
162 . type - the PetscSF type name
163 
164   Level: intermediate
165 
166 .seealso: PetscSFSetType(), PetscSFCreate()
167 @*/
168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
169 {
170   PetscFunctionBegin;
171   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
172   PetscValidPointer(type,2);
173   *type = ((PetscObject)sf)->type_name;
174   PetscFunctionReturn(0);
175 }
176 
177 /*@
178    PetscSFDestroy - destroy star forest
179 
180    Collective
181 
182    Input Arguments:
183 .  sf - address of star forest
184 
185    Level: intermediate
186 
187 .seealso: PetscSFCreate(), PetscSFReset()
188 @*/
189 PetscErrorCode PetscSFDestroy(PetscSF *sf)
190 {
191   PetscErrorCode ierr;
192 
193   PetscFunctionBegin;
194   if (!*sf) PetscFunctionReturn(0);
195   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
196   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
197   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
198   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
199   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 
203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
204 {
205 #if defined(PETSC_USE_DEBUG)
206   PetscInt           i, nleaves;
207   PetscMPIInt        size;
208   const PetscInt    *ilocal;
209   const PetscSFNode *iremote;
210   PetscErrorCode     ierr;
211 
212   PetscFunctionBegin;
213   if (!sf->graphset) PetscFunctionReturn(0);
214   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
215   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
216   for (i = 0; i < nleaves; i++) {
217     const PetscInt rank = iremote[i].rank;
218     const PetscInt remote = iremote[i].index;
219     const PetscInt leaf = ilocal ? ilocal[i] : i;
220     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
221     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
222     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
223   }
224   PetscFunctionReturn(0);
225 #else
226   PetscFunctionBegin;
227   PetscFunctionReturn(0);
228 #endif
229 }
230 
231 /*@
232    PetscSFSetUp - set up communication structures
233 
234    Collective
235 
236    Input Arguments:
237 .  sf - star forest communication object
238 
239    Level: beginner
240 
241 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
242 @*/
243 PetscErrorCode PetscSFSetUp(PetscSF sf)
244 {
245   PetscErrorCode ierr;
246 
247   PetscFunctionBegin;
248   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
249   PetscSFCheckGraphSet(sf,1);
250   if (sf->setupcalled) PetscFunctionReturn(0);
251   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
252   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
253   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
254   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
255   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
256   sf->setupcalled = PETSC_TRUE;
257   PetscFunctionReturn(0);
258 }
259 
260 /*@
261    PetscSFSetFromOptions - set PetscSF options using the options database
262 
263    Logically Collective
264 
265    Input Arguments:
266 .  sf - star forest
267 
268    Options Database Keys:
269 +  -sf_type - implementation type, see PetscSFSetType()
270 -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
271 
272    Level: intermediate
273 
274 .seealso: PetscSFWindowSetSyncType()
275 @*/
276 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
277 {
278   PetscSFType    deft;
279   char           type[256];
280   PetscErrorCode ierr;
281   PetscBool      flg;
282 
283   PetscFunctionBegin;
284   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
285   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
286   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
287   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
288   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
289   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
290   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
291   ierr = PetscOptionsEnd();CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 /*@
296    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
297 
298    Logically Collective
299 
300    Input Arguments:
301 +  sf - star forest
302 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
303 
304    Level: advanced
305 
306 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
307 @*/
308 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
309 {
310 
311   PetscFunctionBegin;
312   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
313   PetscValidLogicalCollectiveBool(sf,flg,2);
314   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
315   sf->rankorder = flg;
316   PetscFunctionReturn(0);
317 }
318 
319 /*@
320    PetscSFSetGraph - Set a parallel star forest
321 
322    Collective
323 
324    Input Arguments:
325 +  sf - star forest
326 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
327 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
328 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
329 during setup in debug mode)
330 .  localmode - copy mode for ilocal
331 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
332 during setup in debug mode)
333 -  remotemode - copy mode for iremote
334 
335    Level: intermediate
336 
337    Notes:
338     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
339 
340    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
341    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
342    needed
343 
344    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
345    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
346 
347 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
348 @*/
349 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
350 {
351   PetscErrorCode ierr;
352 
353   PetscFunctionBegin;
354   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
355   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
356   if (nleaves > 0) PetscValidPointer(iremote,6);
357   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
358   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
359 
360   ierr = PetscSFReset(sf);CHKERRQ(ierr);
361   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
362 
363   sf->nroots  = nroots;
364   sf->nleaves = nleaves;
365 
366   if (nleaves && ilocal) {
367     PetscInt i;
368     PetscInt minleaf = PETSC_MAX_INT;
369     PetscInt maxleaf = PETSC_MIN_INT;
370     int      contiguous = 1;
371     for (i=0; i<nleaves; i++) {
372       minleaf = PetscMin(minleaf,ilocal[i]);
373       maxleaf = PetscMax(maxleaf,ilocal[i]);
374       contiguous &= (ilocal[i] == i);
375     }
376     sf->minleaf = minleaf;
377     sf->maxleaf = maxleaf;
378     if (contiguous) {
379       if (localmode == PETSC_OWN_POINTER) {
380         ierr = PetscFree(ilocal);CHKERRQ(ierr);
381       }
382       ilocal = NULL;
383     }
384   } else {
385     sf->minleaf = 0;
386     sf->maxleaf = nleaves - 1;
387   }
388 
389   if (ilocal) {
390     switch (localmode) {
391     case PETSC_COPY_VALUES:
392       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
393       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
394       sf->mine = sf->mine_alloc;
395       break;
396     case PETSC_OWN_POINTER:
397       sf->mine_alloc = (PetscInt*)ilocal;
398       sf->mine       = sf->mine_alloc;
399       break;
400     case PETSC_USE_POINTER:
401       sf->mine_alloc = NULL;
402       sf->mine       = (PetscInt*)ilocal;
403       break;
404     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
405     }
406   }
407 
408   switch (remotemode) {
409   case PETSC_COPY_VALUES:
410     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
411     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
412     sf->remote = sf->remote_alloc;
413     break;
414   case PETSC_OWN_POINTER:
415     sf->remote_alloc = (PetscSFNode*)iremote;
416     sf->remote       = sf->remote_alloc;
417     break;
418   case PETSC_USE_POINTER:
419     sf->remote_alloc = NULL;
420     sf->remote       = (PetscSFNode*)iremote;
421     break;
422   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
423   }
424 
425   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
426   sf->graphset = PETSC_TRUE;
427   PetscFunctionReturn(0);
428 }
429 
430 /*@
431   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
432 
433   Collective
434 
435   Input Parameters:
436 + sf      - The PetscSF
437 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
438 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
439 
440   Notes:
441   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
442   n and N are local and global sizes of x respectively.
443 
444   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
445   sequential vectors y on all ranks.
446 
447   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
448   sequential vector y on rank 0.
449 
450   In above cases, entries of x are roots and entries of y are leaves.
451 
452   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
453   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
454   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
455   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
456   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
457 
458   In this case, roots and leaves are symmetric.
459 
460   Level: intermediate
461  @*/
462 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
463 {
464   MPI_Comm       comm;
465   PetscInt       n,N,res[2];
466   PetscMPIInt    rank,size;
467   PetscSFType    type;
468   PetscErrorCode ierr;
469 
470   PetscFunctionBegin;
471   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
472   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
473   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
474   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
475 
476   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
477     type = PETSCSFALLTOALL;
478     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
479     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
480     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
481     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
482   } else {
483     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
484     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
485     res[0] = n;
486     res[1] = -n;
487     /* Check if n are same over all ranks so that we can optimize it */
488     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
489     if (res[0] == -res[1]) { /* same n */
490       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
491     } else {
492       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
493     }
494     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
495   }
496   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
497 
498   sf->pattern = pattern;
499   sf->mine    = NULL; /* Contiguous */
500 
501   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
502      Also set other easy stuff.
503    */
504   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
505     sf->nleaves      = N;
506     sf->nroots       = n;
507     sf->nranks       = size;
508     sf->minleaf      = 0;
509     sf->maxleaf      = N - 1;
510   } else if (pattern == PETSCSF_PATTERN_GATHER) {
511     sf->nleaves      = rank ? 0 : N;
512     sf->nroots       = n;
513     sf->nranks       = rank ? 0 : size;
514     sf->minleaf      = 0;
515     sf->maxleaf      = rank ? -1 : N - 1;
516   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
517     sf->nleaves      = size;
518     sf->nroots       = size;
519     sf->nranks       = size;
520     sf->minleaf      = 0;
521     sf->maxleaf      = size - 1;
522   }
523   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
524   sf->graphset = PETSC_TRUE;
525   PetscFunctionReturn(0);
526 }
527 
528 /*@
529    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
530 
531    Collective
532 
533    Input Arguments:
534 
535 .  sf - star forest to invert
536 
537    Output Arguments:
538 .  isf - inverse of sf
539    Level: advanced
540 
541    Notes:
542    All roots must have degree 1.
543 
544    The local space may be a permutation, but cannot be sparse.
545 
546 .seealso: PetscSFSetGraph()
547 @*/
548 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
549 {
550   PetscErrorCode ierr;
551   PetscMPIInt    rank;
552   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
553   const PetscInt *ilocal;
554   PetscSFNode    *roots,*leaves;
555 
556   PetscFunctionBegin;
557   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
558   PetscSFCheckGraphSet(sf,1);
559   PetscValidPointer(isf,2);
560 
561   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
562   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
563 
564   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
565   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
566   for (i=0; i<maxlocal; i++) {
567     leaves[i].rank  = rank;
568     leaves[i].index = i;
569   }
570   for (i=0; i <nroots; i++) {
571     roots[i].rank  = -1;
572     roots[i].index = -1;
573   }
574   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
575   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
576 
577   /* Check whether our leaves are sparse */
578   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
579   if (count == nroots) newilocal = NULL;
580   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
581     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
582     for (i=0,count=0; i<nroots; i++) {
583       if (roots[i].rank >= 0) {
584         newilocal[count]   = i;
585         roots[count].rank  = roots[i].rank;
586         roots[count].index = roots[i].index;
587         count++;
588       }
589     }
590   }
591 
592   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
593   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
594   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 /*@
599    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
600 
601    Collective
602 
603    Input Arguments:
604 +  sf - communication object to duplicate
605 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
606 
607    Output Arguments:
608 .  newsf - new communication object
609 
610    Level: beginner
611 
612 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
613 @*/
614 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
615 {
616   PetscSFType    type;
617   PetscErrorCode ierr;
618 
619   PetscFunctionBegin;
620   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
621   PetscValidLogicalCollectiveEnum(sf,opt,2);
622   PetscValidPointer(newsf,3);
623   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
624   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
625   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
626   if (opt == PETSCSF_DUPLICATE_GRAPH) {
627     PetscSFCheckGraphSet(sf,1);
628     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
629       PetscInt          nroots,nleaves;
630       const PetscInt    *ilocal;
631       const PetscSFNode *iremote;
632       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
633       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
634     } else {
635       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
636     }
637   }
638   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
639   PetscFunctionReturn(0);
640 }
641 
642 /*@C
643    PetscSFGetGraph - Get the graph specifying a parallel star forest
644 
645    Not Collective
646 
647    Input Arguments:
648 .  sf - star forest
649 
650    Output Arguments:
651 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
652 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
653 .  ilocal - locations of leaves in leafdata buffers
654 -  iremote - remote locations of root vertices for each leaf on the current process
655 
656    Notes:
657    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
658 
659    When called from Fortran, the returned iremote array is a copy and must deallocated after use. Consequently, if you
660    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
661 
662    Level: intermediate
663 
664 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
665 @*/
666 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
667 {
668   PetscErrorCode ierr;
669 
670   PetscFunctionBegin;
671   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
672   if (sf->ops->GetGraph) {
673     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
674   } else {
675     if (nroots) *nroots = sf->nroots;
676     if (nleaves) *nleaves = sf->nleaves;
677     if (ilocal) *ilocal = sf->mine;
678     if (iremote) *iremote = sf->remote;
679   }
680   PetscFunctionReturn(0);
681 }
682 
683 /*@
684    PetscSFGetLeafRange - Get the active leaf ranges
685 
686    Not Collective
687 
688    Input Arguments:
689 .  sf - star forest
690 
691    Output Arguments:
692 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
693 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
694 
695    Level: developer
696 
697 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
698 @*/
699 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
700 {
701 
702   PetscFunctionBegin;
703   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
704   PetscSFCheckGraphSet(sf,1);
705   if (minleaf) *minleaf = sf->minleaf;
706   if (maxleaf) *maxleaf = sf->maxleaf;
707   PetscFunctionReturn(0);
708 }
709 
710 /*@C
711    PetscSFViewFromOptions - View from Options
712 
713    Collective on PetscSF
714 
715    Input Parameters:
716 +  A - the star forest
717 .  obj - Optional object
718 -  name - command line option
719 
720    Level: intermediate
721 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
722 @*/
723 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
724 {
725   PetscErrorCode ierr;
726 
727   PetscFunctionBegin;
728   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
729   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 /*@C
734    PetscSFView - view a star forest
735 
736    Collective
737 
738    Input Arguments:
739 +  sf - star forest
740 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
741 
742    Level: beginner
743 
744 .seealso: PetscSFCreate(), PetscSFSetGraph()
745 @*/
746 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
747 {
748   PetscErrorCode    ierr;
749   PetscBool         iascii;
750   PetscViewerFormat format;
751 
752   PetscFunctionBegin;
753   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
754   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
755   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
756   PetscCheckSameComm(sf,1,viewer,2);
757   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
759   if (iascii) {
760     PetscMPIInt rank;
761     PetscInt    ii,i,j;
762 
763     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
764     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
765     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
766     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
767       if (!sf->graphset) {
768         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
769         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
770         PetscFunctionReturn(0);
771       }
772       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
773       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
774       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
775       for (i=0; i<sf->nleaves; i++) {
776         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
777       }
778       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
779       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
780       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
781         PetscMPIInt *tmpranks,*perm;
782         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
783         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
784         for (i=0; i<sf->nranks; i++) perm[i] = i;
785         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
786         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
787         for (ii=0; ii<sf->nranks; ii++) {
788           i = perm[ii];
789           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
790           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
791             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
792           }
793         }
794         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
795       }
796       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
798     }
799     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 /*@C
805    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
806 
807    Not Collective
808 
809    Input Arguments:
810 .  sf - star forest
811 
812    Output Arguments:
813 +  nranks - number of ranks referenced by local part
814 .  ranks - array of ranks
815 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
816 .  rmine - concatenated array holding local indices referencing each remote rank
817 -  rremote - concatenated array holding remote indices referenced for each remote rank
818 
819    Level: developer
820 
821 .seealso: PetscSFGetLeafRanks()
822 @*/
823 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
824 {
825   PetscErrorCode ierr;
826 
827   PetscFunctionBegin;
828   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
829   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
830   if (sf->ops->GetRootRanks) {
831     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
832   } else {
833     /* The generic implementation */
834     if (nranks)  *nranks  = sf->nranks;
835     if (ranks)   *ranks   = sf->ranks;
836     if (roffset) *roffset = sf->roffset;
837     if (rmine)   *rmine   = sf->rmine;
838     if (rremote) *rremote = sf->rremote;
839   }
840   PetscFunctionReturn(0);
841 }
842 
843 /*@C
844    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
845 
846    Not Collective
847 
848    Input Arguments:
849 .  sf - star forest
850 
851    Output Arguments:
852 +  niranks - number of leaf ranks referencing roots on this process
853 .  iranks - array of ranks
854 .  ioffset - offset in irootloc for each rank (length niranks+1)
855 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
856 
857    Level: developer
858 
859 .seealso: PetscSFGetRootRanks()
860 @*/
861 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
862 {
863   PetscErrorCode ierr;
864 
865   PetscFunctionBegin;
866   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
867   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
868   if (sf->ops->GetLeafRanks) {
869     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
870   } else {
871     PetscSFType type;
872     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
873     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
879   PetscInt i;
880   for (i=0; i<n; i++) {
881     if (needle == list[i]) return PETSC_TRUE;
882   }
883   return PETSC_FALSE;
884 }
885 
886 /*@C
887    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
888 
889    Collective
890 
891    Input Arguments:
892 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
893 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
894 
895    Level: developer
896 
897 .seealso: PetscSFGetRootRanks()
898 @*/
899 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
900 {
901   PetscErrorCode     ierr;
902   PetscTable         table;
903   PetscTablePosition pos;
904   PetscMPIInt        size,groupsize,*groupranks;
905   PetscInt           *rcount,*ranks;
906   PetscInt           i, irank = -1,orank = -1;
907 
908   PetscFunctionBegin;
909   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
910   PetscSFCheckGraphSet(sf,1);
911   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
912   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
913   for (i=0; i<sf->nleaves; i++) {
914     /* Log 1-based rank */
915     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
916   }
917   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
918   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
919   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
920   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
921   for (i=0; i<sf->nranks; i++) {
922     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
923     ranks[i]--;             /* Convert back to 0-based */
924   }
925   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
926 
927   /* We expect that dgroup is reliably "small" while nranks could be large */
928   {
929     MPI_Group group = MPI_GROUP_NULL;
930     PetscMPIInt *dgroupranks;
931     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
932     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
933     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
934     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
935     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
936     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
937     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
938     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
939   }
940 
941   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
942   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
943     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
944       if (InList(ranks[i],groupsize,groupranks)) break;
945     }
946     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
947       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
948     }
949     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
950       PetscInt    tmprank,tmpcount;
951 
952       tmprank             = ranks[i];
953       tmpcount            = rcount[i];
954       ranks[i]            = ranks[sf->ndranks];
955       rcount[i]           = rcount[sf->ndranks];
956       ranks[sf->ndranks]  = tmprank;
957       rcount[sf->ndranks] = tmpcount;
958       sf->ndranks++;
959     }
960   }
961   ierr = PetscFree(groupranks);CHKERRQ(ierr);
962   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
963   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
964   sf->roffset[0] = 0;
965   for (i=0; i<sf->nranks; i++) {
966     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
967     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
968     rcount[i]        = 0;
969   }
970   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
971     /* short circuit */
972     if (orank != sf->remote[i].rank) {
973       /* Search for index of iremote[i].rank in sf->ranks */
974       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
975       if (irank < 0) {
976         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
977         if (irank >= 0) irank += sf->ndranks;
978       }
979       orank = sf->remote[i].rank;
980     }
981     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
982     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
983     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
984     rcount[irank]++;
985   }
986   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 /*@C
991    PetscSFGetGroups - gets incoming and outgoing process groups
992 
993    Collective
994 
995    Input Argument:
996 .  sf - star forest
997 
998    Output Arguments:
999 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1000 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1001 
1002    Level: developer
1003 
1004 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1005 @*/
1006 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1007 {
1008   PetscErrorCode ierr;
1009   MPI_Group      group = MPI_GROUP_NULL;
1010 
1011   PetscFunctionBegin;
1012   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
1013   if (sf->ingroup == MPI_GROUP_NULL) {
1014     PetscInt       i;
1015     const PetscInt *indegree;
1016     PetscMPIInt    rank,*outranks,*inranks;
1017     PetscSFNode    *remote;
1018     PetscSF        bgcount;
1019 
1020     /* Compute the number of incoming ranks */
1021     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1022     for (i=0; i<sf->nranks; i++) {
1023       remote[i].rank  = sf->ranks[i];
1024       remote[i].index = 0;
1025     }
1026     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1027     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1028     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1029     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1030 
1031     /* Enumerate the incoming ranks */
1032     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1033     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1034     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1035     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1036     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1037     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1038     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1039     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1040     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1041     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1042   }
1043   *incoming = sf->ingroup;
1044 
1045   if (sf->outgroup == MPI_GROUP_NULL) {
1046     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1047     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1048     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1049   }
1050   *outgoing = sf->outgroup;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 /*@
1055    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1056 
1057    Collective
1058 
1059    Input Argument:
1060 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1061 
1062    Output Arguments:
1063 .  multi - star forest with split roots, such that each root has degree exactly 1
1064 
1065    Level: developer
1066 
1067    Notes:
1068 
1069    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1070    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1071    edge, it is a candidate for future optimization that might involve its removal.
1072 
1073 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1074 @*/
1075 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1076 {
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1081   PetscValidPointer(multi,2);
1082   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1083     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1084     *multi = sf->multi;
1085     PetscFunctionReturn(0);
1086   }
1087   if (!sf->multi) {
1088     const PetscInt *indegree;
1089     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1090     PetscSFNode    *remote;
1091     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1092     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1093     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1094     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1095     inoffset[0] = 0;
1096     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1097     for (i=0; i<maxlocal; i++) outones[i] = 1;
1098     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1099     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1100     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1101 #if 0
1102 #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1103     for (i=0; i<sf->nroots; i++) {
1104       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1105     }
1106 #endif
1107 #endif
1108     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1109     for (i=0; i<sf->nleaves; i++) {
1110       remote[i].rank  = sf->remote[i].rank;
1111       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1112     }
1113     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1114     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1115     if (sf->rankorder) {        /* Sort the ranks */
1116       PetscMPIInt rank;
1117       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1118       PetscSFNode *newremote;
1119       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1120       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1121       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1122       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1123       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1124       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1125       /* Sort the incoming ranks at each vertex, build the inverse map */
1126       for (i=0; i<sf->nroots; i++) {
1127         PetscInt j;
1128         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1129         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1130         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1131       }
1132       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1133       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1134       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1135       for (i=0; i<sf->nleaves; i++) {
1136         newremote[i].rank  = sf->remote[i].rank;
1137         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1138       }
1139       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1140       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1141     }
1142     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1143   }
1144   *multi = sf->multi;
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 /*@C
1149    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1150 
1151    Collective
1152 
1153    Input Arguments:
1154 +  sf - original star forest
1155 .  nselected  - number of selected roots on this process
1156 -  selected   - indices of the selected roots on this process
1157 
1158    Output Arguments:
1159 .  newsf - new star forest
1160 
1161    Level: advanced
1162 
1163    Note:
1164    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1165    be done by calling PetscSFGetGraph().
1166 
1167 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1168 @*/
1169 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1170 {
1171   PetscInt          i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
1172   const PetscSFNode *iremote;
1173   PetscSFNode       *new_iremote;
1174   PetscSF           tmpsf;
1175   MPI_Comm          comm;
1176   PetscErrorCode    ierr;
1177 
1178   PetscFunctionBegin;
1179   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1180   PetscSFCheckGraphSet(sf,1);
1181   if (nselected) PetscValidPointer(selected,3);
1182   PetscValidPointer(newsf,4);
1183 
1184   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1185   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1186   /* Uniq selected[] and put results in roots[] */
1187   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1188   ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr);
1189   ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr);
1190   ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr);
1191   if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);
1192 
1193   if (sf->ops->CreateEmbeddedSF) {
1194     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr);
1195   } else {
1196     /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1197     /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1198     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr);
1199     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr);
1200     ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr);
1201     ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr);
1202     for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1203     ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1204     ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1205     ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr);
1206 
1207     /* Build newsf with leaves that are still connected */
1208     connected_leaves = 0;
1209     for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1210     ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
1211     ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
1212     for (i=0, n=0; i<nleaves; ++i) {
1213       if (leafdata[i]) {
1214         new_ilocal[n]        = sf->mine ? sf->mine[i] : i;
1215         new_iremote[n].rank  = sf->remote[i].rank;
1216         new_iremote[n].index = sf->remote[i].index;
1217         ++n;
1218       }
1219     }
1220 
1221     if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1222     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1223     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1224     ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1225     ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
1226   }
1227   ierr = PetscFree(roots);CHKERRQ(ierr);
1228   ierr = PetscSFSetUp(*newsf);CHKERRQ(ierr);
1229   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 /*@C
1234   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1235 
1236   Collective
1237 
1238   Input Arguments:
1239 + sf - original star forest
1240 . nselected  - number of selected leaves on this process
1241 - selected   - indices of the selected leaves on this process
1242 
1243   Output Arguments:
1244 .  newsf - new star forest
1245 
1246   Level: advanced
1247 
1248 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1249 @*/
1250 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1251 {
1252   const PetscSFNode *iremote;
1253   PetscSFNode       *new_iremote;
1254   const PetscInt    *ilocal;
1255   PetscInt          i,nroots,*leaves,*new_ilocal;
1256   MPI_Comm          comm;
1257   PetscErrorCode    ierr;
1258 
1259   PetscFunctionBegin;
1260   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1261   PetscSFCheckGraphSet(sf,1);
1262   if (nselected) PetscValidPointer(selected,3);
1263   PetscValidPointer(newsf,4);
1264 
1265   /* Uniq selected[] and put results in leaves[] */
1266   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1267   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1268   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1269   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1270   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1271 
1272   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1273   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1274     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1275   } else {
1276     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1277     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1278     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1279     for (i=0; i<nselected; ++i) {
1280       const PetscInt l     = leaves[i];
1281       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1282       new_iremote[i].rank  = iremote[l].rank;
1283       new_iremote[i].index = iremote[l].index;
1284     }
1285     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1286     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1287     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1288   }
1289   ierr = PetscFree(leaves);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 /*@C
1294    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1295 
1296    Collective on PetscSF
1297 
1298    Input Arguments:
1299 +  sf - star forest on which to communicate
1300 .  unit - data type associated with each node
1301 .  rootdata - buffer to broadcast
1302 -  op - operation to use for reduction
1303 
1304    Output Arguments:
1305 .  leafdata - buffer to be reduced with values from each leaf's respective root
1306 
1307    Level: intermediate
1308 
1309 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1310 @*/
1311 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1312 {
1313   PetscErrorCode ierr;
1314   PetscMemType   rootmtype,leafmtype;
1315 
1316   PetscFunctionBegin;
1317   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1318   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1319   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1320   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1321   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1322 #if defined(PETSC_HAVE_CUDA)
1323   /*  Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)?
1324     To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream.
1325     But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if
1326     we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also
1327     computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize
1328     is inevitable. We do it now and put pack/unpack kernels to non-NULL streams.
1329    */
1330   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1331 #endif
1332   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1333   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 /*@C
1338    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1339 
1340    Collective
1341 
1342    Input Arguments:
1343 +  sf - star forest
1344 .  unit - data type
1345 .  rootdata - buffer to broadcast
1346 -  op - operation to use for reduction
1347 
1348    Output Arguments:
1349 .  leafdata - buffer to be reduced with values from each leaf's respective root
1350 
1351    Level: intermediate
1352 
1353 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1354 @*/
1355 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1356 {
1357   PetscErrorCode ierr;
1358   PetscMemType   rootmtype,leafmtype;
1359 
1360   PetscFunctionBegin;
1361   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1362   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1363   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1364   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1365   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1366   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1367   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 /*@C
1372    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1373 
1374    Collective
1375 
1376    Input Arguments:
1377 +  sf - star forest
1378 .  unit - data type
1379 .  leafdata - values to reduce
1380 -  op - reduction operation
1381 
1382    Output Arguments:
1383 .  rootdata - result of reduction of values from all leaves of each root
1384 
1385    Level: intermediate
1386 
1387 .seealso: PetscSFBcastBegin()
1388 @*/
1389 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1390 {
1391   PetscErrorCode ierr;
1392   PetscMemType   rootmtype,leafmtype;
1393 
1394   PetscFunctionBegin;
1395   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1396   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1397   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1398   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1399   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1400 #if defined(PETSC_HAVE_CUDA)
1401   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1402 #endif
1403   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1404   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 /*@C
1409    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1410 
1411    Collective
1412 
1413    Input Arguments:
1414 +  sf - star forest
1415 .  unit - data type
1416 .  leafdata - values to reduce
1417 -  op - reduction operation
1418 
1419    Output Arguments:
1420 .  rootdata - result of reduction of values from all leaves of each root
1421 
1422    Level: intermediate
1423 
1424 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1425 @*/
1426 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1427 {
1428   PetscErrorCode ierr;
1429   PetscMemType   rootmtype,leafmtype;
1430 
1431   PetscFunctionBegin;
1432   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1433   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1434   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1435   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1436   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1437   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1438   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 /*@C
1443    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1444 
1445    Collective
1446 
1447    Input Arguments:
1448 +  sf - star forest
1449 .  unit - data type
1450 .  leafdata - leaf values to use in reduction
1451 -  op - operation to use for reduction
1452 
1453    Output Arguments:
1454 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1455 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1456 
1457    Level: advanced
1458 
1459    Note:
1460    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1461    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1462    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1463    integers.
1464 
1465 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1466 @*/
1467 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1468 {
1469   PetscErrorCode ierr;
1470   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1471 
1472   PetscFunctionBegin;
1473   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1474   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1475   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1476   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1477   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1478   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1479   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1480 #if defined(PETSC_HAVE_CUDA)
1481   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1482 #endif
1483   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1484   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 /*@C
1489    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1490 
1491    Collective
1492 
1493    Input Arguments:
1494 +  sf - star forest
1495 .  unit - data type
1496 .  leafdata - leaf values to use in reduction
1497 -  op - operation to use for reduction
1498 
1499    Output Arguments:
1500 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1501 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1502 
1503    Level: advanced
1504 
1505 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1506 @*/
1507 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1508 {
1509   PetscErrorCode ierr;
1510   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1511 
1512   PetscFunctionBegin;
1513   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1514   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1515   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1516   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1517   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1518   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1519   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1520   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1521   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 /*@C
1526    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1527 
1528    Collective
1529 
1530    Input Arguments:
1531 .  sf - star forest
1532 
1533    Output Arguments:
1534 .  degree - degree of each root vertex
1535 
1536    Level: advanced
1537 
1538    Notes:
1539    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1540 
1541 .seealso: PetscSFGatherBegin()
1542 @*/
1543 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1544 {
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1549   PetscSFCheckGraphSet(sf,1);
1550   PetscValidPointer(degree,2);
1551   if (!sf->degreeknown) {
1552     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1553     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1554     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1555     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1556     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1557     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1558     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1559   }
1560   *degree = NULL;
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 /*@C
1565    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1566 
1567    Collective
1568 
1569    Input Arguments:
1570 .  sf - star forest
1571 
1572    Output Arguments:
1573 .  degree - degree of each root vertex
1574 
1575    Level: developer
1576 
1577    Notes:
1578    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1579 
1580 .seealso:
1581 @*/
1582 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1583 {
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1588   PetscSFCheckGraphSet(sf,1);
1589   PetscValidPointer(degree,2);
1590   if (!sf->degreeknown) {
1591     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1592     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1593     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1594     sf->degreeknown = PETSC_TRUE;
1595   }
1596   *degree = sf->degree;
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 
1601 /*@C
1602    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1603    Each multi-root is assigned index of the corresponding original root.
1604 
1605    Collective
1606 
1607    Input Arguments:
1608 +  sf - star forest
1609 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1610 
1611    Output Arguments:
1612 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1613 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1614 
1615    Level: developer
1616 
1617    Notes:
1618    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1619 
1620 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1621 @*/
1622 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1623 {
1624   PetscSF             msf;
1625   PetscInt            i, j, k, nroots, nmroots;
1626   PetscErrorCode      ierr;
1627 
1628   PetscFunctionBegin;
1629   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1630   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1631   if (nroots) PetscValidIntPointer(degree,2);
1632   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1633   PetscValidPointer(multiRootsOrigNumbering,4);
1634   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1635   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1636   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1637   for (i=0,j=0,k=0; i<nroots; i++) {
1638     if (!degree[i]) continue;
1639     for (j=0; j<degree[i]; j++,k++) {
1640       (*multiRootsOrigNumbering)[k] = i;
1641     }
1642   }
1643 #if defined(PETSC_USE_DEBUG)
1644   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1645 #endif
1646   if (nMultiRoots) *nMultiRoots = nmroots;
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 /*@C
1651    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1652 
1653    Collective
1654 
1655    Input Arguments:
1656 +  sf - star forest
1657 .  unit - data type
1658 -  leafdata - leaf data to gather to roots
1659 
1660    Output Argument:
1661 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1662 
1663    Level: intermediate
1664 
1665 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1666 @*/
1667 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1668 {
1669   PetscErrorCode ierr;
1670   PetscSF        multi;
1671 
1672   PetscFunctionBegin;
1673   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1674   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1675   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1676   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 /*@C
1681    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1682 
1683    Collective
1684 
1685    Input Arguments:
1686 +  sf - star forest
1687 .  unit - data type
1688 -  leafdata - leaf data to gather to roots
1689 
1690    Output Argument:
1691 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1692 
1693    Level: intermediate
1694 
1695 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1696 @*/
1697 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1698 {
1699   PetscErrorCode ierr;
1700   PetscSF        multi;
1701 
1702   PetscFunctionBegin;
1703   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1704   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1705   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1706   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 /*@C
1711    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1712 
1713    Collective
1714 
1715    Input Arguments:
1716 +  sf - star forest
1717 .  unit - data type
1718 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1719 
1720    Output Argument:
1721 .  leafdata - leaf data to be update with personal data from each respective root
1722 
1723    Level: intermediate
1724 
1725 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1726 @*/
1727 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1728 {
1729   PetscErrorCode ierr;
1730   PetscSF        multi;
1731 
1732   PetscFunctionBegin;
1733   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1734   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1735   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1736   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 /*@C
1741    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1742 
1743    Collective
1744 
1745    Input Arguments:
1746 +  sf - star forest
1747 .  unit - data type
1748 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1749 
1750    Output Argument:
1751 .  leafdata - leaf data to be update with personal data from each respective root
1752 
1753    Level: intermediate
1754 
1755 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1756 @*/
1757 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1758 {
1759   PetscErrorCode ierr;
1760   PetscSF        multi;
1761 
1762   PetscFunctionBegin;
1763   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1764   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1765   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1766   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1771 {
1772 #if defined(PETSC_USE_DEBUG)
1773   PetscInt        i, n, nleaves;
1774   const PetscInt *ilocal = NULL;
1775   PetscHSetI      seen;
1776   PetscErrorCode  ierr;
1777 
1778   PetscFunctionBegin;
1779   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1780   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1781   for (i = 0; i < nleaves; i++) {
1782     const PetscInt leaf = ilocal ? ilocal[i] : i;
1783     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1784   }
1785   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1786   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1787   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1788   PetscFunctionReturn(0);
1789 #else
1790   PetscFunctionBegin;
1791   PetscFunctionReturn(0);
1792 #endif
1793 }
1794 
1795 /*@
1796   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1797 
1798   Input Parameters:
1799 + sfA - The first PetscSF
1800 - sfB - The second PetscSF
1801 
1802   Output Parameters:
1803 . sfBA - The composite SF
1804 
1805   Level: developer
1806 
1807   Notes:
1808   Currently, the two SFs must be defined on congruent communicators and they must be true star
1809   forests, i.e. the same leaf is not connected with different roots.
1810 
1811   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1812   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1813   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1814   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1815 
1816 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1817 @*/
1818 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1819 {
1820   PetscErrorCode    ierr;
1821   const PetscSFNode *remotePointsA,*remotePointsB;
1822   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1823   const PetscInt    *localPointsA,*localPointsB;
1824   PetscInt          *localPointsBA;
1825   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1826   PetscBool         denseB;
1827 
1828   PetscFunctionBegin;
1829   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1830   PetscSFCheckGraphSet(sfA,1);
1831   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1832   PetscSFCheckGraphSet(sfB,2);
1833   PetscCheckSameComm(sfA,1,sfB,2);
1834   PetscValidPointer(sfBA,3);
1835   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1836   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1837 
1838   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1839   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1840   if (localPointsA) {
1841     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1842     for (i=0; i<numRootsB; i++) {
1843       reorderedRemotePointsA[i].rank = -1;
1844       reorderedRemotePointsA[i].index = -1;
1845     }
1846     for (i=0; i<numLeavesA; i++) {
1847       if (localPointsA[i] >= numRootsB) continue;
1848       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1849     }
1850     remotePointsA = reorderedRemotePointsA;
1851   }
1852   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1853   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1854   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1855   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1856   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1857 
1858   denseB = (PetscBool)!localPointsB;
1859   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1860     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1861     else numLeavesBA++;
1862   }
1863   if (denseB) {
1864     localPointsBA  = NULL;
1865     remotePointsBA = leafdataB;
1866   } else {
1867     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
1868     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
1869     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1870       const PetscInt l = localPointsB ? localPointsB[i] : i;
1871 
1872       if (leafdataB[l-minleaf].rank == -1) continue;
1873       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1874       localPointsBA[numLeavesBA] = l;
1875       numLeavesBA++;
1876     }
1877     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
1878   }
1879   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1880   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 /*@
1885   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1886 
1887   Input Parameters:
1888 + sfA - The first PetscSF
1889 - sfB - The second PetscSF
1890 
1891   Output Parameters:
1892 . sfBA - The composite SF.
1893 
1894   Level: developer
1895 
1896   Notes:
1897   Currently, the two SFs must be defined on congruent communicators and they must be true star
1898   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1899   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1900 
1901   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1902   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1903   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1904   a Reduce on sfB, on connected roots.
1905 
1906 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1907 @*/
1908 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1909 {
1910   PetscErrorCode    ierr;
1911   const PetscSFNode *remotePointsA,*remotePointsB;
1912   PetscSFNode       *remotePointsBA;
1913   const PetscInt    *localPointsA,*localPointsB;
1914   PetscSFNode       *reorderedRemotePointsA = NULL;
1915   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1916 
1917   PetscFunctionBegin;
1918   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1919   PetscSFCheckGraphSet(sfA,1);
1920   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1921   PetscSFCheckGraphSet(sfB,2);
1922   PetscCheckSameComm(sfA,1,sfB,2);
1923   PetscValidPointer(sfBA,3);
1924   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1925   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1926   /* TODO: Check roots of sfB have degree of 1 */
1927 
1928   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1929   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1930   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
1931   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
1932   for (i=0; i<maxleaf - minleaf + 1; i++) {
1933     reorderedRemotePointsA[i].rank = -1;
1934     reorderedRemotePointsA[i].index = -1;
1935   }
1936   if (localPointsA) {
1937     for (i=0; i<numLeavesA; i++) {
1938       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1939       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1940     }
1941   } else {
1942     for (i=0; i<numLeavesA; i++) {
1943       if (i > maxleaf || i < minleaf) continue;
1944       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1945     }
1946   }
1947 
1948   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
1949   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
1950   for (i=0; i<numRootsB; i++) {
1951     remotePointsBA[i].rank = -1;
1952     remotePointsBA[i].index = -1;
1953   }
1954 
1955   /* Once we implement the TODO above (check all roots of sfB have degree of 1), we can replace the MPI_MAXLOC
1956      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1957    */
1958   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr);
1959   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr);
1960   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1961   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1962     if (remotePointsBA[i].rank == -1) continue;
1963     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1964     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1965     localPointsBA[numLeavesBA] = i;
1966     numLeavesBA++;
1967   }
1968   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1969   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 /*
1974   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1975 
1976   Input Parameters:
1977 . sf - The global PetscSF
1978 
1979   Output Parameters:
1980 . out - The local PetscSF
1981  */
1982 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1983 {
1984   MPI_Comm           comm;
1985   PetscMPIInt        myrank;
1986   const PetscInt     *ilocal;
1987   const PetscSFNode  *iremote;
1988   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1989   PetscSFNode        *liremote;
1990   PetscSF            lsf;
1991   PetscErrorCode     ierr;
1992 
1993   PetscFunctionBegin;
1994   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1995   if (sf->ops->CreateLocalSF) {
1996     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
1997   } else {
1998     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1999     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2000     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
2001 
2002     /* Find out local edges and build a local SF */
2003     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2004     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2005     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2006     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2007 
2008     for (i=j=0; i<nleaves; i++) {
2009       if (iremote[i].rank == (PetscInt)myrank) {
2010         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2011         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2012         liremote[j].index = iremote[i].index;
2013         j++;
2014       }
2015     }
2016     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2017     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2018     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2019     *out = lsf;
2020   }
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2025 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2026 {
2027   PetscErrorCode     ierr;
2028   PetscMemType       rootmtype,leafmtype;
2029 
2030   PetscFunctionBegin;
2031   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2032   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2033   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2034   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2035   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2036   if (sf->ops->BcastToZero) {
2037     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2038   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2039   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043