xref: /petsc/src/vec/is/sf/interface/sf.c (revision 124bfe34137104d2555f0fd7cbcd22e554ad3a82)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_USE_DEBUG)
6 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
7     if (PetscUnlikely(!(sf)->graphset))                              \
8       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
9   } while (0)
10 #else
11 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
12 #endif
13 
14 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
15 
16 /*@
17    PetscSFCreate - create a star forest communication context
18 
19    Collective
20 
21    Input Arguments:
22 .  comm - communicator on which the star forest will operate
23 
24    Output Arguments:
25 .  sf - new star forest context
26 
27    Options Database Keys:
28 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
29 .  -sf_type window    -Use MPI-3 one-sided window for communication
30 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
31 
32    Level: intermediate
33 
34    Notes:
35    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
36    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
37    SFs are optimized and they have better performance than general SFs.
38 
39 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
40 @*/
41 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
42 {
43   PetscErrorCode ierr;
44   PetscSF        b;
45 
46   PetscFunctionBegin;
47   PetscValidPointer(sf,2);
48   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
49 
50   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
51 
52   b->nroots    = -1;
53   b->nleaves   = -1;
54   b->minleaf   = PETSC_MAX_INT;
55   b->maxleaf   = PETSC_MIN_INT;
56   b->nranks    = -1;
57   b->rankorder = PETSC_TRUE;
58   b->ingroup   = MPI_GROUP_NULL;
59   b->outgroup  = MPI_GROUP_NULL;
60   b->graphset  = PETSC_FALSE;
61 
62   *sf = b;
63   PetscFunctionReturn(0);
64 }
65 
66 /*@
67    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
68 
69    Collective
70 
71    Input Arguments:
72 .  sf - star forest
73 
74    Level: advanced
75 
76 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
77 @*/
78 PetscErrorCode PetscSFReset(PetscSF sf)
79 {
80   PetscErrorCode ierr;
81 
82   PetscFunctionBegin;
83   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
84   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
85   sf->nroots   = -1;
86   sf->nleaves  = -1;
87   sf->minleaf  = PETSC_MAX_INT;
88   sf->maxleaf  = PETSC_MIN_INT;
89   sf->mine     = NULL;
90   sf->remote   = NULL;
91   sf->graphset = PETSC_FALSE;
92   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
93   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
94   sf->nranks = -1;
95   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
96 #if defined(PETSC_HAVE_CUDA)
97   if (sf->rmine_d) {cudaError_t err = cudaFree(sf->rmine_d);CHKERRCUDA(err);sf->rmine_d=NULL;}
98 #endif
99   sf->degreeknown = PETSC_FALSE;
100   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
101   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
102   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
103   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
104   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
105   sf->setupcalled = PETSC_FALSE;
106   PetscFunctionReturn(0);
107 }
108 
109 /*@C
110    PetscSFSetType - Set the PetscSF communication implementation
111 
112    Collective on PetscSF
113 
114    Input Parameters:
115 +  sf - the PetscSF context
116 -  type - a known method
117 
118    Options Database Key:
119 .  -sf_type <type> - Sets the method; use -help for a list
120    of available methods (for instance, window, pt2pt, neighbor)
121 
122    Notes:
123    See "include/petscsf.h" for available methods (for instance)
124 +    PETSCSFWINDOW - MPI-2/3 one-sided
125 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
126 
127   Level: intermediate
128 
129 .seealso: PetscSFType, PetscSFCreate()
130 @*/
131 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
132 {
133   PetscErrorCode ierr,(*r)(PetscSF);
134   PetscBool      match;
135 
136   PetscFunctionBegin;
137   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
138   PetscValidCharPointer(type,2);
139 
140   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
141   if (match) PetscFunctionReturn(0);
142 
143   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
144   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
145   /* Destroy the previous PetscSF implementation context */
146   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
147   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
148   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
149   ierr = (*r)(sf);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 /*@C
154   PetscSFGetType - Get the PetscSF communication implementation
155 
156   Not Collective
157 
158   Input Parameter:
159 . sf  - the PetscSF context
160 
161   Output Parameter:
162 . type - the PetscSF type name
163 
164   Level: intermediate
165 
166 .seealso: PetscSFSetType(), PetscSFCreate()
167 @*/
168 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
169 {
170   PetscFunctionBegin;
171   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
172   PetscValidPointer(type,2);
173   *type = ((PetscObject)sf)->type_name;
174   PetscFunctionReturn(0);
175 }
176 
177 /*@
178    PetscSFDestroy - destroy star forest
179 
180    Collective
181 
182    Input Arguments:
183 .  sf - address of star forest
184 
185    Level: intermediate
186 
187 .seealso: PetscSFCreate(), PetscSFReset()
188 @*/
189 PetscErrorCode PetscSFDestroy(PetscSF *sf)
190 {
191   PetscErrorCode ierr;
192 
193   PetscFunctionBegin;
194   if (!*sf) PetscFunctionReturn(0);
195   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
196   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
197   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
198   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
199   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 
203 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
204 {
205 #if defined(PETSC_USE_DEBUG)
206   PetscInt           i, nleaves;
207   PetscMPIInt        size;
208   const PetscInt    *ilocal;
209   const PetscSFNode *iremote;
210   PetscErrorCode     ierr;
211 
212   PetscFunctionBegin;
213   if (!sf->graphset) PetscFunctionReturn(0);
214   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
215   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
216   for (i = 0; i < nleaves; i++) {
217     const PetscInt rank = iremote[i].rank;
218     const PetscInt remote = iremote[i].index;
219     const PetscInt leaf = ilocal ? ilocal[i] : i;
220     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
221     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
222     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
223   }
224   PetscFunctionReturn(0);
225 #else
226   PetscFunctionBegin;
227   PetscFunctionReturn(0);
228 #endif
229 }
230 
231 /*@
232    PetscSFSetUp - set up communication structures
233 
234    Collective
235 
236    Input Arguments:
237 .  sf - star forest communication object
238 
239    Level: beginner
240 
241 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
242 @*/
243 PetscErrorCode PetscSFSetUp(PetscSF sf)
244 {
245   PetscErrorCode ierr;
246 
247   PetscFunctionBegin;
248   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
249   PetscSFCheckGraphSet(sf,1);
250   if (sf->setupcalled) PetscFunctionReturn(0);
251   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
252   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
253   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
254   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
255   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
256   sf->setupcalled = PETSC_TRUE;
257   PetscFunctionReturn(0);
258 }
259 
260 /*@
261    PetscSFSetFromOptions - set PetscSF options using the options database
262 
263    Logically Collective
264 
265    Input Arguments:
266 .  sf - star forest
267 
268    Options Database Keys:
269 +  -sf_type - implementation type, see PetscSFSetType()
270 -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
271 
272    Level: intermediate
273 
274 .seealso: PetscSFWindowSetSyncType()
275 @*/
276 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
277 {
278   PetscSFType    deft;
279   char           type[256];
280   PetscErrorCode ierr;
281   PetscBool      flg;
282 
283   PetscFunctionBegin;
284   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
285   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
286   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
287   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
288   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
289   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
290   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
291   ierr = PetscOptionsEnd();CHKERRQ(ierr);
292   PetscFunctionReturn(0);
293 }
294 
295 /*@
296    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
297 
298    Logically Collective
299 
300    Input Arguments:
301 +  sf - star forest
302 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
303 
304    Level: advanced
305 
306 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
307 @*/
308 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
309 {
310 
311   PetscFunctionBegin;
312   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
313   PetscValidLogicalCollectiveBool(sf,flg,2);
314   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
315   sf->rankorder = flg;
316   PetscFunctionReturn(0);
317 }
318 
319 /*@
320    PetscSFSetGraph - Set a parallel star forest
321 
322    Collective
323 
324    Input Arguments:
325 +  sf - star forest
326 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
327 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
328 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
329 during setup in debug mode)
330 .  localmode - copy mode for ilocal
331 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
332 during setup in debug mode)
333 -  remotemode - copy mode for iremote
334 
335    Level: intermediate
336 
337    Notes:
338     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
339 
340    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
341    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
342    needed
343 
344    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
345    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
346 
347 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
348 @*/
349 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
350 {
351   PetscErrorCode ierr;
352 
353   PetscFunctionBegin;
354   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
355   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
356   if (nleaves > 0) PetscValidPointer(iremote,6);
357   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
358   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
359 
360   ierr = PetscSFReset(sf);CHKERRQ(ierr);
361   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
362 
363   sf->nroots  = nroots;
364   sf->nleaves = nleaves;
365 
366   if (nleaves && ilocal) {
367     PetscInt i;
368     PetscInt minleaf = PETSC_MAX_INT;
369     PetscInt maxleaf = PETSC_MIN_INT;
370     int      contiguous = 1;
371     for (i=0; i<nleaves; i++) {
372       minleaf = PetscMin(minleaf,ilocal[i]);
373       maxleaf = PetscMax(maxleaf,ilocal[i]);
374       contiguous &= (ilocal[i] == i);
375     }
376     sf->minleaf = minleaf;
377     sf->maxleaf = maxleaf;
378     if (contiguous) {
379       if (localmode == PETSC_OWN_POINTER) {
380         ierr = PetscFree(ilocal);CHKERRQ(ierr);
381       }
382       ilocal = NULL;
383     }
384   } else {
385     sf->minleaf = 0;
386     sf->maxleaf = nleaves - 1;
387   }
388 
389   if (ilocal) {
390     switch (localmode) {
391     case PETSC_COPY_VALUES:
392       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
393       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
394       sf->mine = sf->mine_alloc;
395       break;
396     case PETSC_OWN_POINTER:
397       sf->mine_alloc = (PetscInt*)ilocal;
398       sf->mine       = sf->mine_alloc;
399       break;
400     case PETSC_USE_POINTER:
401       sf->mine_alloc = NULL;
402       sf->mine       = (PetscInt*)ilocal;
403       break;
404     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
405     }
406   }
407 
408   switch (remotemode) {
409   case PETSC_COPY_VALUES:
410     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
411     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
412     sf->remote = sf->remote_alloc;
413     break;
414   case PETSC_OWN_POINTER:
415     sf->remote_alloc = (PetscSFNode*)iremote;
416     sf->remote       = sf->remote_alloc;
417     break;
418   case PETSC_USE_POINTER:
419     sf->remote_alloc = NULL;
420     sf->remote       = (PetscSFNode*)iremote;
421     break;
422   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
423   }
424 
425   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
426   sf->graphset = PETSC_TRUE;
427   PetscFunctionReturn(0);
428 }
429 
430 /*@
431   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
432 
433   Collective
434 
435   Input Parameters:
436 + sf      - The PetscSF
437 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
438 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
439 
440   Notes:
441   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
442   n and N are local and global sizes of x respectively.
443 
444   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
445   sequential vectors y on all ranks.
446 
447   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
448   sequential vector y on rank 0.
449 
450   In above cases, entries of x are roots and entries of y are leaves.
451 
452   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
453   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
454   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
455   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
456   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
457 
458   In this case, roots and leaves are symmetric.
459 
460   Level: intermediate
461  @*/
462 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
463 {
464   MPI_Comm       comm;
465   PetscInt       n,N,res[2];
466   PetscMPIInt    rank,size;
467   PetscSFType    type;
468   PetscErrorCode ierr;
469 
470   PetscFunctionBegin;
471   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
472   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
473   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
474   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
475 
476   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
477     type = PETSCSFALLTOALL;
478     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
479     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
480     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
481     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
482   } else {
483     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
484     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
485     res[0] = n;
486     res[1] = -n;
487     /* Check if n are same over all ranks so that we can optimize it */
488     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
489     if (res[0] == -res[1]) { /* same n */
490       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
491     } else {
492       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
493     }
494     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
495   }
496   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
497 
498   sf->pattern = pattern;
499   sf->mine    = NULL; /* Contiguous */
500 
501   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
502      Also set other easy stuff.
503    */
504   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
505     sf->nleaves      = N;
506     sf->nroots       = n;
507     sf->nranks       = size;
508     sf->minleaf      = 0;
509     sf->maxleaf      = N - 1;
510   } else if (pattern == PETSCSF_PATTERN_GATHER) {
511     sf->nleaves      = rank ? 0 : N;
512     sf->nroots       = n;
513     sf->nranks       = rank ? 0 : size;
514     sf->minleaf      = 0;
515     sf->maxleaf      = rank ? -1 : N - 1;
516   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
517     sf->nleaves      = size;
518     sf->nroots       = size;
519     sf->nranks       = size;
520     sf->minleaf      = 0;
521     sf->maxleaf      = size - 1;
522   }
523   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
524   sf->graphset = PETSC_TRUE;
525   PetscFunctionReturn(0);
526 }
527 
528 /*@
529    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
530 
531    Collective
532 
533    Input Arguments:
534 
535 .  sf - star forest to invert
536 
537    Output Arguments:
538 .  isf - inverse of sf
539    Level: advanced
540 
541    Notes:
542    All roots must have degree 1.
543 
544    The local space may be a permutation, but cannot be sparse.
545 
546 .seealso: PetscSFSetGraph()
547 @*/
548 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
549 {
550   PetscErrorCode ierr;
551   PetscMPIInt    rank;
552   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
553   const PetscInt *ilocal;
554   PetscSFNode    *roots,*leaves;
555 
556   PetscFunctionBegin;
557   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
558   PetscSFCheckGraphSet(sf,1);
559   PetscValidPointer(isf,2);
560 
561   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
562   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
563 
564   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
565   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
566   for (i=0; i<maxlocal; i++) {
567     leaves[i].rank  = rank;
568     leaves[i].index = i;
569   }
570   for (i=0; i <nroots; i++) {
571     roots[i].rank  = -1;
572     roots[i].index = -1;
573   }
574   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
575   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
576 
577   /* Check whether our leaves are sparse */
578   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
579   if (count == nroots) newilocal = NULL;
580   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
581     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
582     for (i=0,count=0; i<nroots; i++) {
583       if (roots[i].rank >= 0) {
584         newilocal[count]   = i;
585         roots[count].rank  = roots[i].rank;
586         roots[count].index = roots[i].index;
587         count++;
588       }
589     }
590   }
591 
592   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
593   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
594   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 /*@
599    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
600 
601    Collective
602 
603    Input Arguments:
604 +  sf - communication object to duplicate
605 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
606 
607    Output Arguments:
608 .  newsf - new communication object
609 
610    Level: beginner
611 
612 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
613 @*/
614 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
615 {
616   PetscSFType    type;
617   PetscErrorCode ierr;
618 
619   PetscFunctionBegin;
620   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
621   PetscValidLogicalCollectiveEnum(sf,opt,2);
622   PetscValidPointer(newsf,3);
623   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
624   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
625   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
626   if (opt == PETSCSF_DUPLICATE_GRAPH) {
627     PetscSFCheckGraphSet(sf,1);
628     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
629       PetscInt          nroots,nleaves;
630       const PetscInt    *ilocal;
631       const PetscSFNode *iremote;
632       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
633       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
634     } else {
635       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
636     }
637   }
638   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
639   PetscFunctionReturn(0);
640 }
641 
642 /*@C
643    PetscSFGetGraph - Get the graph specifying a parallel star forest
644 
645    Not Collective
646 
647    Input Arguments:
648 .  sf - star forest
649 
650    Output Arguments:
651 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
652 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
653 .  ilocal - locations of leaves in leafdata buffers
654 -  iremote - remote locations of root vertices for each leaf on the current process
655 
656    Notes:
657    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
658 
659    When called from Fortran, the returned iremote array is a copy and must deallocated after use. Consequently, if you
660    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
661 
662    Level: intermediate
663 
664 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
665 @*/
666 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
667 {
668   PetscErrorCode ierr;
669 
670   PetscFunctionBegin;
671   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
672   if (sf->ops->GetGraph) {
673     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
674   } else {
675     if (nroots) *nroots = sf->nroots;
676     if (nleaves) *nleaves = sf->nleaves;
677     if (ilocal) *ilocal = sf->mine;
678     if (iremote) *iremote = sf->remote;
679   }
680   PetscFunctionReturn(0);
681 }
682 
683 /*@
684    PetscSFGetLeafRange - Get the active leaf ranges
685 
686    Not Collective
687 
688    Input Arguments:
689 .  sf - star forest
690 
691    Output Arguments:
692 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
693 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
694 
695    Level: developer
696 
697 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
698 @*/
699 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
700 {
701 
702   PetscFunctionBegin;
703   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
704   PetscSFCheckGraphSet(sf,1);
705   if (minleaf) *minleaf = sf->minleaf;
706   if (maxleaf) *maxleaf = sf->maxleaf;
707   PetscFunctionReturn(0);
708 }
709 
710 /*@C
711    PetscSFViewFromOptions - View from Options
712 
713    Collective on PetscSF
714 
715    Input Parameters:
716 +  A - the star forest
717 .  obj - Optional object
718 -  name - command line option
719 
720    Level: intermediate
721 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
722 @*/
723 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
724 {
725   PetscErrorCode ierr;
726 
727   PetscFunctionBegin;
728   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
729   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 /*@C
734    PetscSFView - view a star forest
735 
736    Collective
737 
738    Input Arguments:
739 +  sf - star forest
740 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
741 
742    Level: beginner
743 
744 .seealso: PetscSFCreate(), PetscSFSetGraph()
745 @*/
746 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
747 {
748   PetscErrorCode    ierr;
749   PetscBool         iascii;
750   PetscViewerFormat format;
751 
752   PetscFunctionBegin;
753   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
754   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
755   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
756   PetscCheckSameComm(sf,1,viewer,2);
757   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
758   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
759   if (iascii) {
760     PetscMPIInt rank;
761     PetscInt    ii,i,j;
762 
763     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
764     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
765     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
766     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
767       if (!sf->graphset) {
768         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
769         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
770         PetscFunctionReturn(0);
771       }
772       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
773       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
774       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
775       for (i=0; i<sf->nleaves; i++) {
776         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
777       }
778       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
779       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
780       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
781         PetscMPIInt *tmpranks,*perm;
782         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
783         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
784         for (i=0; i<sf->nranks; i++) perm[i] = i;
785         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
786         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
787         for (ii=0; ii<sf->nranks; ii++) {
788           i = perm[ii];
789           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
790           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
791             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
792           }
793         }
794         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
795       }
796       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
797       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
798     }
799     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 /*@C
805    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
806 
807    Not Collective
808 
809    Input Arguments:
810 .  sf - star forest
811 
812    Output Arguments:
813 +  nranks - number of ranks referenced by local part
814 .  ranks - array of ranks
815 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
816 .  rmine - concatenated array holding local indices referencing each remote rank
817 -  rremote - concatenated array holding remote indices referenced for each remote rank
818 
819    Level: developer
820 
821 .seealso: PetscSFGetLeafRanks()
822 @*/
823 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
824 {
825   PetscErrorCode ierr;
826 
827   PetscFunctionBegin;
828   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
829   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
830   if (sf->ops->GetRootRanks) {
831     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
832   } else {
833     /* The generic implementation */
834     if (nranks)  *nranks  = sf->nranks;
835     if (ranks)   *ranks   = sf->ranks;
836     if (roffset) *roffset = sf->roffset;
837     if (rmine)   *rmine   = sf->rmine;
838     if (rremote) *rremote = sf->rremote;
839   }
840   PetscFunctionReturn(0);
841 }
842 
843 /*@C
844    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
845 
846    Not Collective
847 
848    Input Arguments:
849 .  sf - star forest
850 
851    Output Arguments:
852 +  niranks - number of leaf ranks referencing roots on this process
853 .  iranks - array of ranks
854 .  ioffset - offset in irootloc for each rank (length niranks+1)
855 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
856 
857    Level: developer
858 
859 .seealso: PetscSFGetRootRanks()
860 @*/
861 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
862 {
863   PetscErrorCode ierr;
864 
865   PetscFunctionBegin;
866   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
867   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
868   if (sf->ops->GetLeafRanks) {
869     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
870   } else {
871     PetscSFType type;
872     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
873     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
879   PetscInt i;
880   for (i=0; i<n; i++) {
881     if (needle == list[i]) return PETSC_TRUE;
882   }
883   return PETSC_FALSE;
884 }
885 
886 /*@C
887    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
888 
889    Collective
890 
891    Input Arguments:
892 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
893 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
894 
895    Level: developer
896 
897 .seealso: PetscSFGetRootRanks()
898 @*/
899 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
900 {
901   PetscErrorCode     ierr;
902   PetscTable         table;
903   PetscTablePosition pos;
904   PetscMPIInt        size,groupsize,*groupranks;
905   PetscInt           *rcount,*ranks;
906   PetscInt           i, irank = -1,orank = -1;
907 
908   PetscFunctionBegin;
909   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
910   PetscSFCheckGraphSet(sf,1);
911   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
912   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
913   for (i=0; i<sf->nleaves; i++) {
914     /* Log 1-based rank */
915     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
916   }
917   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
918   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
919   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
920   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
921   for (i=0; i<sf->nranks; i++) {
922     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
923     ranks[i]--;             /* Convert back to 0-based */
924   }
925   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
926 
927   /* We expect that dgroup is reliably "small" while nranks could be large */
928   {
929     MPI_Group group = MPI_GROUP_NULL;
930     PetscMPIInt *dgroupranks;
931     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
932     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
933     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
934     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
935     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
936     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
937     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
938     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
939   }
940 
941   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
942   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
943     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
944       if (InList(ranks[i],groupsize,groupranks)) break;
945     }
946     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
947       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
948     }
949     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
950       PetscInt    tmprank,tmpcount;
951 
952       tmprank             = ranks[i];
953       tmpcount            = rcount[i];
954       ranks[i]            = ranks[sf->ndranks];
955       rcount[i]           = rcount[sf->ndranks];
956       ranks[sf->ndranks]  = tmprank;
957       rcount[sf->ndranks] = tmpcount;
958       sf->ndranks++;
959     }
960   }
961   ierr = PetscFree(groupranks);CHKERRQ(ierr);
962   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
963   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
964   sf->roffset[0] = 0;
965   for (i=0; i<sf->nranks; i++) {
966     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
967     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
968     rcount[i]        = 0;
969   }
970   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
971     /* short circuit */
972     if (orank != sf->remote[i].rank) {
973       /* Search for index of iremote[i].rank in sf->ranks */
974       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
975       if (irank < 0) {
976         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
977         if (irank >= 0) irank += sf->ndranks;
978       }
979       orank = sf->remote[i].rank;
980     }
981     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
982     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
983     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
984     rcount[irank]++;
985   }
986   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 /*@C
991    PetscSFGetGroups - gets incoming and outgoing process groups
992 
993    Collective
994 
995    Input Argument:
996 .  sf - star forest
997 
998    Output Arguments:
999 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1000 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1001 
1002    Level: developer
1003 
1004 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1005 @*/
1006 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1007 {
1008   PetscErrorCode ierr;
1009   MPI_Group      group = MPI_GROUP_NULL;
1010 
1011   PetscFunctionBegin;
1012   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
1013   if (sf->ingroup == MPI_GROUP_NULL) {
1014     PetscInt       i;
1015     const PetscInt *indegree;
1016     PetscMPIInt    rank,*outranks,*inranks;
1017     PetscSFNode    *remote;
1018     PetscSF        bgcount;
1019 
1020     /* Compute the number of incoming ranks */
1021     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1022     for (i=0; i<sf->nranks; i++) {
1023       remote[i].rank  = sf->ranks[i];
1024       remote[i].index = 0;
1025     }
1026     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1027     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1028     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1029     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1030 
1031     /* Enumerate the incoming ranks */
1032     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1033     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1034     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1035     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1036     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1037     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1038     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1039     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1040     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1041     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1042   }
1043   *incoming = sf->ingroup;
1044 
1045   if (sf->outgroup == MPI_GROUP_NULL) {
1046     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1047     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1048     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1049   }
1050   *outgoing = sf->outgroup;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 /*@
1055    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1056 
1057    Collective
1058 
1059    Input Argument:
1060 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1061 
1062    Output Arguments:
1063 .  multi - star forest with split roots, such that each root has degree exactly 1
1064 
1065    Level: developer
1066 
1067    Notes:
1068 
1069    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1070    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1071    edge, it is a candidate for future optimization that might involve its removal.
1072 
1073 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1074 @*/
1075 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1076 {
1077   PetscErrorCode ierr;
1078 
1079   PetscFunctionBegin;
1080   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1081   PetscValidPointer(multi,2);
1082   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1083     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1084     *multi = sf->multi;
1085     PetscFunctionReturn(0);
1086   }
1087   if (!sf->multi) {
1088     const PetscInt *indegree;
1089     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1090     PetscSFNode    *remote;
1091     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1092     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1093     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1094     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1095     inoffset[0] = 0;
1096     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1097     for (i=0; i<maxlocal; i++) outones[i] = 1;
1098     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1099     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1100     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1101 #if 0
1102 #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1103     for (i=0; i<sf->nroots; i++) {
1104       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1105     }
1106 #endif
1107 #endif
1108     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1109     for (i=0; i<sf->nleaves; i++) {
1110       remote[i].rank  = sf->remote[i].rank;
1111       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1112     }
1113     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1114     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1115     if (sf->rankorder) {        /* Sort the ranks */
1116       PetscMPIInt rank;
1117       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1118       PetscSFNode *newremote;
1119       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1120       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1121       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1122       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1123       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1124       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1125       /* Sort the incoming ranks at each vertex, build the inverse map */
1126       for (i=0; i<sf->nroots; i++) {
1127         PetscInt j;
1128         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1129         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1130         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1131       }
1132       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1133       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1134       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1135       for (i=0; i<sf->nleaves; i++) {
1136         newremote[i].rank  = sf->remote[i].rank;
1137         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1138       }
1139       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1140       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1141     }
1142     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1143   }
1144   *multi = sf->multi;
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 /*@C
1149    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1150 
1151    Collective
1152 
1153    Input Arguments:
1154 +  sf - original star forest
1155 .  nselected  - number of selected roots on this process
1156 -  selected   - indices of the selected roots on this process
1157 
1158    Output Arguments:
1159 .  newsf - new star forest
1160 
1161    Level: advanced
1162 
1163    Note:
1164    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1165    be done by calling PetscSFGetGraph().
1166 
1167 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1168 @*/
1169 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1170 {
1171   PetscInt          i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
1172   const PetscSFNode *iremote;
1173   PetscSFNode       *new_iremote;
1174   PetscSF           tmpsf;
1175   MPI_Comm          comm;
1176   PetscErrorCode    ierr;
1177 
1178   PetscFunctionBegin;
1179   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1180   PetscSFCheckGraphSet(sf,1);
1181   if (nselected) PetscValidPointer(selected,3);
1182   PetscValidPointer(newsf,4);
1183 
1184   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1185 
1186   /* Uniq selected[] and put results in roots[] */
1187   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1188   ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr);
1189   ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr);
1190   ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr);
1191   if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);
1192 
1193   if (sf->ops->CreateEmbeddedSF) {
1194     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr);
1195   } else {
1196     /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1197     /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1198     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr);
1199     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr);
1200     ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr);
1201     ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr);
1202     for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1203     ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1204     ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1205     ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr);
1206 
1207     /* Build newsf with leaves that are still connected */
1208     connected_leaves = 0;
1209     for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1210     ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
1211     ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
1212     for (i=0, n=0; i<nleaves; ++i) {
1213       if (leafdata[i]) {
1214         new_ilocal[n]        = sf->mine ? sf->mine[i] : i;
1215         new_iremote[n].rank  = sf->remote[i].rank;
1216         new_iremote[n].index = sf->remote[i].index;
1217         ++n;
1218       }
1219     }
1220 
1221     if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1222     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1223     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1224     ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1225     ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
1226   }
1227   ierr = PetscFree(roots);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 /*@C
1232   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1233 
1234   Collective
1235 
1236   Input Arguments:
1237 + sf - original star forest
1238 . nselected  - number of selected leaves on this process
1239 - selected   - indices of the selected leaves on this process
1240 
1241   Output Arguments:
1242 .  newsf - new star forest
1243 
1244   Level: advanced
1245 
1246 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1247 @*/
1248 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1249 {
1250   const PetscSFNode *iremote;
1251   PetscSFNode       *new_iremote;
1252   const PetscInt    *ilocal;
1253   PetscInt          i,nroots,*leaves,*new_ilocal;
1254   MPI_Comm          comm;
1255   PetscErrorCode    ierr;
1256 
1257   PetscFunctionBegin;
1258   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1259   PetscSFCheckGraphSet(sf,1);
1260   if (nselected) PetscValidPointer(selected,3);
1261   PetscValidPointer(newsf,4);
1262 
1263   /* Uniq selected[] and put results in leaves[] */
1264   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1265   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1266   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1267   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1268   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1269 
1270   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1271   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1272     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1273   } else {
1274     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1275     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1276     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1277     for (i=0; i<nselected; ++i) {
1278       const PetscInt l     = leaves[i];
1279       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1280       new_iremote[i].rank  = iremote[l].rank;
1281       new_iremote[i].index = iremote[l].index;
1282     }
1283     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1284     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1285     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1286   }
1287   ierr = PetscFree(leaves);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 /*@C
1292    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1293 
1294    Collective on PetscSF
1295 
1296    Input Arguments:
1297 +  sf - star forest on which to communicate
1298 .  unit - data type associated with each node
1299 .  rootdata - buffer to broadcast
1300 -  op - operation to use for reduction
1301 
1302    Output Arguments:
1303 .  leafdata - buffer to be reduced with values from each leaf's respective root
1304 
1305    Level: intermediate
1306 
1307 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1308 @*/
1309 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1310 {
1311   PetscErrorCode ierr;
1312   PetscMemType   rootmtype,leafmtype;
1313 
1314   PetscFunctionBegin;
1315   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1316   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1317   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1318   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1319   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1320 #if defined(PETSC_HAVE_CUDA)
1321   /*  Shall we assume rootdata, leafdata are ready to use (instead of being computed by some asynchronous kernels)?
1322     To be similar to MPI, I'd like to have this assumption, since MPI does not have a concept of stream.
1323     But currently this assumption is not enforecd in Petsc. To be safe, I do synchronization here. Otherwise, if
1324     we do not sync now and call the Pack kernel directly on the default NULL stream (assume petsc objects are also
1325     computed on it), we have to sync the NULL stream before calling MPI routines. So, it looks a cudaDeviceSynchronize
1326     is inevitable. We do it now and put pack/unpack kernels to non-NULL streams.
1327    */
1328   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1329 #endif
1330   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1331   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 /*@C
1336    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1337 
1338    Collective
1339 
1340    Input Arguments:
1341 +  sf - star forest
1342 .  unit - data type
1343 .  rootdata - buffer to broadcast
1344 -  op - operation to use for reduction
1345 
1346    Output Arguments:
1347 .  leafdata - buffer to be reduced with values from each leaf's respective root
1348 
1349    Level: intermediate
1350 
1351 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1352 @*/
1353 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1354 {
1355   PetscErrorCode ierr;
1356   PetscMemType   rootmtype,leafmtype;
1357 
1358   PetscFunctionBegin;
1359   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1360   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1361   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1362   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1363   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1364   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1365   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 /*@C
1370    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1371 
1372    Collective
1373 
1374    Input Arguments:
1375 +  sf - star forest
1376 .  unit - data type
1377 .  leafdata - values to reduce
1378 -  op - reduction operation
1379 
1380    Output Arguments:
1381 .  rootdata - result of reduction of values from all leaves of each root
1382 
1383    Level: intermediate
1384 
1385 .seealso: PetscSFBcastBegin()
1386 @*/
1387 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1388 {
1389   PetscErrorCode ierr;
1390   PetscMemType   rootmtype,leafmtype;
1391 
1392   PetscFunctionBegin;
1393   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1394   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1395   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1396   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1397   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1398 #if defined(PETSC_HAVE_CUDA)
1399   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1400 #endif
1401   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1402   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /*@C
1407    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1408 
1409    Collective
1410 
1411    Input Arguments:
1412 +  sf - star forest
1413 .  unit - data type
1414 .  leafdata - values to reduce
1415 -  op - reduction operation
1416 
1417    Output Arguments:
1418 .  rootdata - result of reduction of values from all leaves of each root
1419 
1420    Level: intermediate
1421 
1422 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1423 @*/
1424 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1425 {
1426   PetscErrorCode ierr;
1427   PetscMemType   rootmtype,leafmtype;
1428 
1429   PetscFunctionBegin;
1430   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1431   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1432   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1433   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1434   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1435   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1436   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 /*@C
1441    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1442 
1443    Collective
1444 
1445    Input Arguments:
1446 +  sf - star forest
1447 .  unit - data type
1448 .  leafdata - leaf values to use in reduction
1449 -  op - operation to use for reduction
1450 
1451    Output Arguments:
1452 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1453 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1454 
1455    Level: advanced
1456 
1457    Note:
1458    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1459    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1460    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1461    integers.
1462 
1463 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1464 @*/
1465 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1466 {
1467   PetscErrorCode ierr;
1468   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1469 
1470   PetscFunctionBegin;
1471   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1472   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1473   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1474   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1475   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1476   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1477   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1478 #if defined(PETSC_HAVE_CUDA)
1479   if (rootmtype == PETSC_MEMTYPE_DEVICE || leafmtype == PETSC_MEMTYPE_DEVICE) {cudaError_t err = cudaDeviceSynchronize();CHKERRCUDA(err);}
1480 #endif
1481   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1482   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 /*@C
1487    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1488 
1489    Collective
1490 
1491    Input Arguments:
1492 +  sf - star forest
1493 .  unit - data type
1494 .  leafdata - leaf values to use in reduction
1495 -  op - operation to use for reduction
1496 
1497    Output Arguments:
1498 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1499 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1500 
1501    Level: advanced
1502 
1503 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1504 @*/
1505 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1506 {
1507   PetscErrorCode ierr;
1508   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1509 
1510   PetscFunctionBegin;
1511   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1512   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1513   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1514   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1515   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1516   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1517   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1518   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1519   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 /*@C
1524    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1525 
1526    Collective
1527 
1528    Input Arguments:
1529 .  sf - star forest
1530 
1531    Output Arguments:
1532 .  degree - degree of each root vertex
1533 
1534    Level: advanced
1535 
1536    Notes:
1537    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1538 
1539 .seealso: PetscSFGatherBegin()
1540 @*/
1541 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1542 {
1543   PetscErrorCode ierr;
1544 
1545   PetscFunctionBegin;
1546   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1547   PetscSFCheckGraphSet(sf,1);
1548   PetscValidPointer(degree,2);
1549   if (!sf->degreeknown) {
1550     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1551     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1552     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1553     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1554     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1555     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1556     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1557   }
1558   *degree = NULL;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 /*@C
1563    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1564 
1565    Collective
1566 
1567    Input Arguments:
1568 .  sf - star forest
1569 
1570    Output Arguments:
1571 .  degree - degree of each root vertex
1572 
1573    Level: developer
1574 
1575    Notes:
1576    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1577 
1578 .seealso:
1579 @*/
1580 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1581 {
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1586   PetscSFCheckGraphSet(sf,1);
1587   PetscValidPointer(degree,2);
1588   if (!sf->degreeknown) {
1589     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1590     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1591     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1592     sf->degreeknown = PETSC_TRUE;
1593   }
1594   *degree = sf->degree;
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 
1599 /*@C
1600    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1601    Each multi-root is assigned index of the corresponding original root.
1602 
1603    Collective
1604 
1605    Input Arguments:
1606 +  sf - star forest
1607 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1608 
1609    Output Arguments:
1610 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1611 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1612 
1613    Level: developer
1614 
1615    Notes:
1616    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1617 
1618 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1619 @*/
1620 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1621 {
1622   PetscSF             msf;
1623   PetscInt            i, j, k, nroots, nmroots;
1624   PetscErrorCode      ierr;
1625 
1626   PetscFunctionBegin;
1627   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1628   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1629   if (nroots) PetscValidIntPointer(degree,2);
1630   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1631   PetscValidPointer(multiRootsOrigNumbering,4);
1632   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1633   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1634   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1635   for (i=0,j=0,k=0; i<nroots; i++) {
1636     if (!degree[i]) continue;
1637     for (j=0; j<degree[i]; j++,k++) {
1638       (*multiRootsOrigNumbering)[k] = i;
1639     }
1640   }
1641 #if defined(PETSC_USE_DEBUG)
1642   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1643 #endif
1644   if (nMultiRoots) *nMultiRoots = nmroots;
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 /*@C
1649    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1650 
1651    Collective
1652 
1653    Input Arguments:
1654 +  sf - star forest
1655 .  unit - data type
1656 -  leafdata - leaf data to gather to roots
1657 
1658    Output Argument:
1659 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1660 
1661    Level: intermediate
1662 
1663 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1664 @*/
1665 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1666 {
1667   PetscErrorCode ierr;
1668   PetscSF        multi;
1669 
1670   PetscFunctionBegin;
1671   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1672   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1673   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1674   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 /*@C
1679    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1680 
1681    Collective
1682 
1683    Input Arguments:
1684 +  sf - star forest
1685 .  unit - data type
1686 -  leafdata - leaf data to gather to roots
1687 
1688    Output Argument:
1689 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1690 
1691    Level: intermediate
1692 
1693 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1694 @*/
1695 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1696 {
1697   PetscErrorCode ierr;
1698   PetscSF        multi;
1699 
1700   PetscFunctionBegin;
1701   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1702   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1703   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1704   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 /*@C
1709    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1710 
1711    Collective
1712 
1713    Input Arguments:
1714 +  sf - star forest
1715 .  unit - data type
1716 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1717 
1718    Output Argument:
1719 .  leafdata - leaf data to be update with personal data from each respective root
1720 
1721    Level: intermediate
1722 
1723 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1724 @*/
1725 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1726 {
1727   PetscErrorCode ierr;
1728   PetscSF        multi;
1729 
1730   PetscFunctionBegin;
1731   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1732   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1733   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1734   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 /*@C
1739    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1740 
1741    Collective
1742 
1743    Input Arguments:
1744 +  sf - star forest
1745 .  unit - data type
1746 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1747 
1748    Output Argument:
1749 .  leafdata - leaf data to be update with personal data from each respective root
1750 
1751    Level: intermediate
1752 
1753 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1754 @*/
1755 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1756 {
1757   PetscErrorCode ierr;
1758   PetscSF        multi;
1759 
1760   PetscFunctionBegin;
1761   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1762   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1763   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1764   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1769 {
1770 #if defined(PETSC_USE_DEBUG)
1771   PetscInt        i, n, nleaves;
1772   const PetscInt *ilocal = NULL;
1773   PetscHSetI      seen;
1774   PetscErrorCode  ierr;
1775 
1776   PetscFunctionBegin;
1777   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1778   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1779   for (i = 0; i < nleaves; i++) {
1780     const PetscInt leaf = ilocal ? ilocal[i] : i;
1781     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1782   }
1783   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1784   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1785   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 #else
1788   PetscFunctionBegin;
1789   PetscFunctionReturn(0);
1790 #endif
1791 }
1792 
1793 /*@
1794   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1795 
1796   Input Parameters:
1797 + sfA - The first PetscSF
1798 - sfB - The second PetscSF
1799 
1800   Output Parameters:
1801 . sfBA - The composite SF
1802 
1803   Level: developer
1804 
1805   Notes:
1806   Currently, the two SFs must be defined on congruent communicators and they must be true star
1807   forests, i.e. the same leaf is not connected with different roots.
1808 
1809   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1810   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1811   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1812   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1813 
1814 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1815 @*/
1816 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1817 {
1818   PetscErrorCode    ierr;
1819   const PetscSFNode *remotePointsA,*remotePointsB;
1820   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1821   const PetscInt    *localPointsA,*localPointsB;
1822   PetscInt          *localPointsBA;
1823   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1824   PetscBool         denseB;
1825 
1826   PetscFunctionBegin;
1827   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1828   PetscSFCheckGraphSet(sfA,1);
1829   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1830   PetscSFCheckGraphSet(sfB,2);
1831   PetscCheckSameComm(sfA,1,sfB,2);
1832   PetscValidPointer(sfBA,3);
1833   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1834   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1835 
1836   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1837   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1838   if (localPointsA) {
1839     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
1840     for (i=0; i<numRootsB; i++) {
1841       reorderedRemotePointsA[i].rank = -1;
1842       reorderedRemotePointsA[i].index = -1;
1843     }
1844     for (i=0; i<numLeavesA; i++) {
1845       if (localPointsA[i] >= numRootsB) continue;
1846       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1847     }
1848     remotePointsA = reorderedRemotePointsA;
1849   }
1850   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1851   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1852   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1853   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1854   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1855 
1856   denseB = (PetscBool)!localPointsB;
1857   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1858     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1859     else numLeavesBA++;
1860   }
1861   if (denseB) {
1862     localPointsBA  = NULL;
1863     remotePointsBA = leafdataB;
1864   } else {
1865     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
1866     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
1867     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1868       const PetscInt l = localPointsB ? localPointsB[i] : i;
1869 
1870       if (leafdataB[l-minleaf].rank == -1) continue;
1871       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1872       localPointsBA[numLeavesBA] = l;
1873       numLeavesBA++;
1874     }
1875     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
1876   }
1877   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1878   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 /*@
1883   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1884 
1885   Input Parameters:
1886 + sfA - The first PetscSF
1887 - sfB - The second PetscSF
1888 
1889   Output Parameters:
1890 . sfBA - The composite SF.
1891 
1892   Level: developer
1893 
1894   Notes:
1895   Currently, the two SFs must be defined on congruent communicators and they must be true star
1896   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1897   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1898 
1899   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1900   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1901   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
1902   a Reduce on sfB, on connected roots.
1903 
1904 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
1905 @*/
1906 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1907 {
1908   PetscErrorCode    ierr;
1909   const PetscSFNode *remotePointsA,*remotePointsB;
1910   PetscSFNode       *remotePointsBA;
1911   const PetscInt    *localPointsA,*localPointsB;
1912   PetscSFNode       *reorderedRemotePointsA = NULL;
1913   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
1914 
1915   PetscFunctionBegin;
1916   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1917   PetscSFCheckGraphSet(sfA,1);
1918   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1919   PetscSFCheckGraphSet(sfB,2);
1920   PetscCheckSameComm(sfA,1,sfB,2);
1921   PetscValidPointer(sfBA,3);
1922   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1923   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1924   /* TODO: Check roots of sfB have degree of 1 */
1925 
1926   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1927   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1928   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
1929   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
1930   for (i=0; i<maxleaf - minleaf + 1; i++) {
1931     reorderedRemotePointsA[i].rank = -1;
1932     reorderedRemotePointsA[i].index = -1;
1933   }
1934   if (localPointsA) {
1935     for (i=0; i<numLeavesA; i++) {
1936       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
1937       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
1938     }
1939   } else {
1940     for (i=0; i<numLeavesA; i++) {
1941       if (i > maxleaf || i < minleaf) continue;
1942       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
1943     }
1944   }
1945 
1946   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
1947   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
1948   for (i=0; i<numRootsB; i++) {
1949     remotePointsBA[i].rank = -1;
1950     remotePointsBA[i].index = -1;
1951   }
1952 
1953   /* Once we implement the TODO above (check all roots of sfB have degree of 1), we can replace the MPI_MAXLOC
1954      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
1955    */
1956   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr);
1957   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,MPI_MAXLOC);CHKERRQ(ierr);
1958   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1959   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
1960     if (remotePointsBA[i].rank == -1) continue;
1961     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
1962     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
1963     localPointsBA[numLeavesBA] = i;
1964     numLeavesBA++;
1965   }
1966   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
1967   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 /*
1972   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1973 
1974   Input Parameters:
1975 . sf - The global PetscSF
1976 
1977   Output Parameters:
1978 . out - The local PetscSF
1979  */
1980 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1981 {
1982   MPI_Comm           comm;
1983   PetscMPIInt        myrank;
1984   const PetscInt     *ilocal;
1985   const PetscSFNode  *iremote;
1986   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1987   PetscSFNode        *liremote;
1988   PetscSF            lsf;
1989   PetscErrorCode     ierr;
1990 
1991   PetscFunctionBegin;
1992   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1993   if (sf->ops->CreateLocalSF) {
1994     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
1995   } else {
1996     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1997     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1998     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
1999 
2000     /* Find out local edges and build a local SF */
2001     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2002     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2003     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2004     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2005 
2006     for (i=j=0; i<nleaves; i++) {
2007       if (iremote[i].rank == (PetscInt)myrank) {
2008         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2009         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2010         liremote[j].index = iremote[i].index;
2011         j++;
2012       }
2013     }
2014     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2015     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2016     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2017     *out = lsf;
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2023 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2024 {
2025   PetscErrorCode     ierr;
2026   PetscMemType       rootmtype,leafmtype;
2027 
2028   PetscFunctionBegin;
2029   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2030   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2031   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2032   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2033   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2034   if (sf->ops->BcastToZero) {
2035     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2036   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2037   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2038   PetscFunctionReturn(0);
2039 }
2040 
2041