xref: /petsc/src/vec/is/sf/interface/sf.c (revision b8dee149cebb0df1d9ad7b1e4169e24a3c108940)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petscctable.h>
3 
4 #if defined(PETSC_USE_DEBUG)
5 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
6     if (PetscUnlikely(!(sf)->graphset))                              \
7       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
8   } while (0)
9 #else
10 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
11 #endif
12 
13 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
14 
15 /*@
16    PetscSFCreate - create a star forest communication context
17 
18    Collective
19 
20    Input Arguments:
21 .  comm - communicator on which the star forest will operate
22 
23    Output Arguments:
24 .  sf - new star forest context
25 
26    Level: intermediate
27 
28 .seealso: PetscSFSetGraph(), PetscSFDestroy()
29 @*/
30 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
31 {
32   PetscErrorCode ierr;
33   PetscSF        b;
34 
35   PetscFunctionBegin;
36   PetscValidPointer(sf,2);
37   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
38 
39   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
40 
41   b->nroots    = -1;
42   b->nleaves   = -1;
43   b->minleaf   = PETSC_MAX_INT;
44   b->maxleaf   = PETSC_MIN_INT;
45   b->nranks    = -1;
46   b->rankorder = PETSC_TRUE;
47   b->ingroup   = MPI_GROUP_NULL;
48   b->outgroup  = MPI_GROUP_NULL;
49   b->graphset  = PETSC_FALSE;
50 
51   *sf = b;
52   PetscFunctionReturn(0);
53 }
54 
55 /*@
56    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
57 
58    Collective
59 
60    Input Arguments:
61 .  sf - star forest
62 
63    Level: advanced
64 
65 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
66 @*/
67 PetscErrorCode PetscSFReset(PetscSF sf)
68 {
69   PetscErrorCode ierr;
70 
71   PetscFunctionBegin;
72   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
73   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
74   sf->nroots   = -1;
75   sf->nleaves  = -1;
76   sf->minleaf  = PETSC_MAX_INT;
77   sf->maxleaf  = PETSC_MIN_INT;
78   sf->mine     = NULL;
79   sf->remote   = NULL;
80   sf->graphset = PETSC_FALSE;
81   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
82   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
83   sf->nranks = -1;
84   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
85   sf->degreeknown = PETSC_FALSE;
86   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
87   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
88   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
89   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
90   sf->setupcalled = PETSC_FALSE;
91   PetscFunctionReturn(0);
92 }
93 
94 /*@C
95    PetscSFSetType - Set the PetscSF communication implementation
96 
97    Collective on PetscSF
98 
99    Input Parameters:
100 +  sf - the PetscSF context
101 -  type - a known method
102 
103    Options Database Key:
104 .  -sf_type <type> - Sets the method; use -help for a list
105    of available methods (for instance, window, pt2pt, neighbor)
106 
107    Notes:
108    See "include/petscsf.h" for available methods (for instance)
109 +    PETSCSFWINDOW - MPI-2/3 one-sided
110 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
111 
112   Level: intermediate
113 
114 .seealso: PetscSFType, PetscSFCreate()
115 @*/
116 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
117 {
118   PetscErrorCode ierr,(*r)(PetscSF);
119   PetscBool      match;
120 
121   PetscFunctionBegin;
122   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
123   PetscValidCharPointer(type,2);
124 
125   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
126   if (match) PetscFunctionReturn(0);
127 
128   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
129   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
130   /* Destroy the previous PetscSF implementation context */
131   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
132   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
133   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
134   ierr = (*r)(sf);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 /*@C
139   PetscSFGetType - Get the PetscSF communication implementation
140 
141   Not Collective
142 
143   Input Parameter:
144 . sf  - the PetscSF context
145 
146   Output Parameter:
147 . type - the PetscSF type name
148 
149   Level: intermediate
150 
151 .seealso: PetscSFSetType(), PetscSFCreate()
152 @*/
153 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
154 {
155   PetscFunctionBegin;
156   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
157   PetscValidPointer(type,2);
158   *type = ((PetscObject)sf)->type_name;
159   PetscFunctionReturn(0);
160 }
161 
162 /*@
163    PetscSFDestroy - destroy star forest
164 
165    Collective
166 
167    Input Arguments:
168 .  sf - address of star forest
169 
170    Level: intermediate
171 
172 .seealso: PetscSFCreate(), PetscSFReset()
173 @*/
174 PetscErrorCode PetscSFDestroy(PetscSF *sf)
175 {
176   PetscErrorCode ierr;
177 
178   PetscFunctionBegin;
179   if (!*sf) PetscFunctionReturn(0);
180   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
181   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
182   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
183   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
184   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
185   PetscFunctionReturn(0);
186 }
187 
188 /*@
189    PetscSFSetUp - set up communication structures
190 
191    Collective
192 
193    Input Arguments:
194 .  sf - star forest communication object
195 
196    Level: beginner
197 
198 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
199 @*/
200 PetscErrorCode PetscSFSetUp(PetscSF sf)
201 {
202   PetscErrorCode ierr;
203 
204   PetscFunctionBegin;
205   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
206   PetscSFCheckGraphSet(sf,1);
207   if (sf->setupcalled) PetscFunctionReturn(0);
208   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
209   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
210   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
211   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
212   sf->setupcalled = PETSC_TRUE;
213   PetscFunctionReturn(0);
214 }
215 
216 /*@
217    PetscSFSetFromOptions - set PetscSF options using the options database
218 
219    Logically Collective
220 
221    Input Arguments:
222 .  sf - star forest
223 
224    Options Database Keys:
225 +  -sf_type - implementation type, see PetscSFSetType()
226 -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
227 
228    Level: intermediate
229 
230 .seealso: PetscSFWindowSetSyncType()
231 @*/
232 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
233 {
234   PetscSFType    deft;
235   char           type[256];
236   PetscErrorCode ierr;
237   PetscBool      flg;
238 
239   PetscFunctionBegin;
240   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
241   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
242   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
243   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
244   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
245   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
246   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
247   ierr = PetscOptionsEnd();CHKERRQ(ierr);
248   PetscFunctionReturn(0);
249 }
250 
251 /*@
252    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
253 
254    Logically Collective
255 
256    Input Arguments:
257 +  sf - star forest
258 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
259 
260    Level: advanced
261 
262 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
263 @*/
264 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
265 {
266 
267   PetscFunctionBegin;
268   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
269   PetscValidLogicalCollectiveBool(sf,flg,2);
270   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
271   sf->rankorder = flg;
272   PetscFunctionReturn(0);
273 }
274 
275 /*@
276    PetscSFSetGraph - Set a parallel star forest
277 
278    Collective
279 
280    Input Arguments:
281 +  sf - star forest
282 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
283 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
284 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage
285 .  localmode - copy mode for ilocal
286 .  iremote - remote locations of root vertices for each leaf on the current process
287 -  remotemode - copy mode for iremote
288 
289    Level: intermediate
290 
291    Notes:
292     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
293 
294    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
295    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
296    needed
297 
298 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
299 @*/
300 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
301 {
302   PetscErrorCode ierr;
303 
304   PetscFunctionBegin;
305   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
306   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
307   if (nleaves > 0) PetscValidPointer(iremote,6);
308   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
309   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
310 
311   ierr = PetscSFReset(sf);CHKERRQ(ierr);
312   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
313 
314   sf->nroots  = nroots;
315   sf->nleaves = nleaves;
316 
317   if (nleaves && ilocal) {
318     PetscInt i;
319     PetscInt minleaf = PETSC_MAX_INT;
320     PetscInt maxleaf = PETSC_MIN_INT;
321     int      contiguous = 1;
322     for (i=0; i<nleaves; i++) {
323       minleaf = PetscMin(minleaf,ilocal[i]);
324       maxleaf = PetscMax(maxleaf,ilocal[i]);
325       contiguous &= (ilocal[i] == i);
326     }
327     sf->minleaf = minleaf;
328     sf->maxleaf = maxleaf;
329     if (contiguous) {
330       if (localmode == PETSC_OWN_POINTER) {
331         ierr = PetscFree(ilocal);CHKERRQ(ierr);
332       }
333       ilocal = NULL;
334     }
335   } else {
336     sf->minleaf = 0;
337     sf->maxleaf = nleaves - 1;
338   }
339 
340   if (ilocal) {
341     switch (localmode) {
342     case PETSC_COPY_VALUES:
343       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
344       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
345       sf->mine = sf->mine_alloc;
346       break;
347     case PETSC_OWN_POINTER:
348       sf->mine_alloc = (PetscInt*)ilocal;
349       sf->mine       = sf->mine_alloc;
350       break;
351     case PETSC_USE_POINTER:
352       sf->mine_alloc = NULL;
353       sf->mine       = (PetscInt*)ilocal;
354       break;
355     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
356     }
357   }
358 
359   switch (remotemode) {
360   case PETSC_COPY_VALUES:
361     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
362     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
363     sf->remote = sf->remote_alloc;
364     break;
365   case PETSC_OWN_POINTER:
366     sf->remote_alloc = (PetscSFNode*)iremote;
367     sf->remote       = sf->remote_alloc;
368     break;
369   case PETSC_USE_POINTER:
370     sf->remote_alloc = NULL;
371     sf->remote       = (PetscSFNode*)iremote;
372     break;
373   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
374   }
375 
376   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
377   sf->graphset = PETSC_TRUE;
378   PetscFunctionReturn(0);
379 }
380 
381 /*@
382    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
383 
384    Collective
385 
386    Input Arguments:
387 .  sf - star forest to invert
388 
389    Output Arguments:
390 .  isf - inverse of sf
391 
392    Level: advanced
393 
394    Notes:
395    All roots must have degree 1.
396 
397    The local space may be a permutation, but cannot be sparse.
398 
399 .seealso: PetscSFSetGraph()
400 @*/
401 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
402 {
403   PetscErrorCode ierr;
404   PetscMPIInt    rank;
405   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
406   const PetscInt *ilocal;
407   PetscSFNode    *roots,*leaves;
408 
409   PetscFunctionBegin;
410   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
411   PetscSFCheckGraphSet(sf,1);
412   PetscValidPointer(isf,2);
413 
414   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
415   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
416 
417   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
418   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
419   for (i=0; i<maxlocal; i++) {
420     leaves[i].rank  = rank;
421     leaves[i].index = i;
422   }
423   for (i=0; i <nroots; i++) {
424     roots[i].rank  = -1;
425     roots[i].index = -1;
426   }
427   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
428   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
429 
430   /* Check whether our leaves are sparse */
431   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
432   if (count == nroots) newilocal = NULL;
433   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
434     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
435     for (i=0,count=0; i<nroots; i++) {
436       if (roots[i].rank >= 0) {
437         newilocal[count]   = i;
438         roots[count].rank  = roots[i].rank;
439         roots[count].index = roots[i].index;
440         count++;
441       }
442     }
443   }
444 
445   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
446   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
447   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 /*@
452    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
453 
454    Collective
455 
456    Input Arguments:
457 +  sf - communication object to duplicate
458 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
459 
460    Output Arguments:
461 .  newsf - new communication object
462 
463    Level: beginner
464 
465 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
466 @*/
467 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
468 {
469   PetscSFType    type;
470   PetscErrorCode ierr;
471 
472   PetscFunctionBegin;
473   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
474   PetscValidLogicalCollectiveEnum(sf,opt,2);
475   PetscValidPointer(newsf,3);
476   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
477   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
478   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
479   if (opt == PETSCSF_DUPLICATE_GRAPH) {
480     PetscInt          nroots,nleaves;
481     const PetscInt    *ilocal;
482     const PetscSFNode *iremote;
483     PetscSFCheckGraphSet(sf,1);
484     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
485     ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
486   }
487   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
488   PetscFunctionReturn(0);
489 }
490 
491 /*@C
492    PetscSFGetGraph - Get the graph specifying a parallel star forest
493 
494    Not Collective
495 
496    Input Arguments:
497 .  sf - star forest
498 
499    Output Arguments:
500 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
501 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
502 .  ilocal - locations of leaves in leafdata buffers
503 -  iremote - remote locations of root vertices for each leaf on the current process
504 
505    Notes:
506    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
507 
508    Level: intermediate
509 
510 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
511 @*/
512 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
513 {
514   PetscErrorCode ierr;
515 
516   PetscFunctionBegin;
517   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
518   if (sf->ops->GetGraph) {
519     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
520   } else {
521     if (nroots) *nroots = sf->nroots;
522     if (nleaves) *nleaves = sf->nleaves;
523     if (ilocal) *ilocal = sf->mine;
524     if (iremote) *iremote = sf->remote;
525   }
526   PetscFunctionReturn(0);
527 }
528 
529 /*@
530    PetscSFGetLeafRange - Get the active leaf ranges
531 
532    Not Collective
533 
534    Input Arguments:
535 .  sf - star forest
536 
537    Output Arguments:
538 +  minleaf - minimum active leaf on this process
539 -  maxleaf - maximum active leaf on this process
540 
541    Level: developer
542 
543 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
544 @*/
545 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
546 {
547 
548   PetscFunctionBegin;
549   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
550   PetscSFCheckGraphSet(sf,1);
551   if (minleaf) *minleaf = sf->minleaf;
552   if (maxleaf) *maxleaf = sf->maxleaf;
553   PetscFunctionReturn(0);
554 }
555 
556 /*@C
557    PetscSFView - view a star forest
558 
559    Collective
560 
561    Input Arguments:
562 +  sf - star forest
563 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
564 
565    Level: beginner
566 
567 .seealso: PetscSFCreate(), PetscSFSetGraph()
568 @*/
569 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
570 {
571   PetscErrorCode    ierr;
572   PetscBool         iascii;
573   PetscViewerFormat format;
574 
575   PetscFunctionBegin;
576   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
577   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
578   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
579   PetscCheckSameComm(sf,1,viewer,2);
580   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
581   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
582   if (iascii) {
583     PetscMPIInt rank;
584     PetscInt    ii,i,j;
585 
586     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
587     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
588     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
589     if (!sf->graphset) {
590       ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
591       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
592       PetscFunctionReturn(0);
593     }
594     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
595     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
596     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
597     for (i=0; i<sf->nleaves; i++) {
598       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
599     }
600     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
601     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
602     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
603       PetscMPIInt *tmpranks,*perm;
604       ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
605       ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
606       for (i=0; i<sf->nranks; i++) perm[i] = i;
607       ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
608       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
609       for (ii=0; ii<sf->nranks; ii++) {
610         i = perm[ii];
611         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
612         for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
613           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
614         }
615       }
616       ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
617     }
618     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
619     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
620     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
621   }
622   PetscFunctionReturn(0);
623 }
624 
625 /*@C
626    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
627 
628    Not Collective
629 
630    Input Arguments:
631 .  sf - star forest
632 
633    Output Arguments:
634 +  nranks - number of ranks referenced by local part
635 .  ranks - array of ranks
636 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
637 .  rmine - concatenated array holding local indices referencing each remote rank
638 -  rremote - concatenated array holding remote indices referenced for each remote rank
639 
640    Level: developer
641 
642 .seealso: PetscSFGetLeafRanks()
643 @*/
644 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
645 {
646   PetscErrorCode ierr;
647 
648   PetscFunctionBegin;
649   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
650   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
651   if (sf->ops->GetRootRanks) {
652     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
653   } else {
654     /* The generic implementation */
655     if (nranks)  *nranks  = sf->nranks;
656     if (ranks)   *ranks   = sf->ranks;
657     if (roffset) *roffset = sf->roffset;
658     if (rmine)   *rmine   = sf->rmine;
659     if (rremote) *rremote = sf->rremote;
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 /*@C
665    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
666 
667    Not Collective
668 
669    Input Arguments:
670 .  sf - star forest
671 
672    Output Arguments:
673 +  niranks - number of leaf ranks referencing roots on this process
674 .  iranks - array of ranks
675 .  ioffset - offset in irootloc for each rank (length niranks+1)
676 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
677 
678    Level: developer
679 
680 .seealso: PetscSFGetRootRanks()
681 @*/
682 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
683 {
684   PetscErrorCode ierr;
685 
686   PetscFunctionBegin;
687   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
688   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
689   if (sf->ops->GetLeafRanks) {
690     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
691   } else {
692     PetscSFType type;
693     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
694     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
700   PetscInt i;
701   for (i=0; i<n; i++) {
702     if (needle == list[i]) return PETSC_TRUE;
703   }
704   return PETSC_FALSE;
705 }
706 
707 /*@C
708    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
709 
710    Collective
711 
712    Input Arguments:
713 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
714 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
715 
716    Level: developer
717 
718 .seealso: PetscSFGetRootRanks()
719 @*/
720 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
721 {
722   PetscErrorCode     ierr;
723   PetscTable         table;
724   PetscTablePosition pos;
725   PetscMPIInt        size,groupsize,*groupranks;
726   PetscInt           *rcount,*ranks;
727   PetscInt           i, irank = -1,orank = -1;
728 
729   PetscFunctionBegin;
730   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
731   PetscSFCheckGraphSet(sf,1);
732   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
733   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
734   for (i=0; i<sf->nleaves; i++) {
735     /* Log 1-based rank */
736     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
737   }
738   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
739   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
740   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
741   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
742   for (i=0; i<sf->nranks; i++) {
743     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
744     ranks[i]--;             /* Convert back to 0-based */
745   }
746   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
747 
748   /* We expect that dgroup is reliably "small" while nranks could be large */
749   {
750     MPI_Group group = MPI_GROUP_NULL;
751     PetscMPIInt *dgroupranks;
752     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
753     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
754     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
755     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
756     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
757     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
758     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
759     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
760   }
761 
762   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
763   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
764     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
765       if (InList(ranks[i],groupsize,groupranks)) break;
766     }
767     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
768       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
769     }
770     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
771       PetscInt    tmprank,tmpcount;
772 
773       tmprank             = ranks[i];
774       tmpcount            = rcount[i];
775       ranks[i]            = ranks[sf->ndranks];
776       rcount[i]           = rcount[sf->ndranks];
777       ranks[sf->ndranks]  = tmprank;
778       rcount[sf->ndranks] = tmpcount;
779       sf->ndranks++;
780     }
781   }
782   ierr = PetscFree(groupranks);CHKERRQ(ierr);
783   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
784   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
785   sf->roffset[0] = 0;
786   for (i=0; i<sf->nranks; i++) {
787     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
788     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
789     rcount[i]        = 0;
790   }
791   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
792     /* short circuit */
793     if (orank != sf->remote[i].rank) {
794       /* Search for index of iremote[i].rank in sf->ranks */
795       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
796       if (irank < 0) {
797         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
798         if (irank >= 0) irank += sf->ndranks;
799       }
800       orank = sf->remote[i].rank;
801     }
802     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
803     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
804     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
805     rcount[irank]++;
806   }
807   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
808   PetscFunctionReturn(0);
809 }
810 
811 /*@C
812    PetscSFGetGroups - gets incoming and outgoing process groups
813 
814    Collective
815 
816    Input Argument:
817 .  sf - star forest
818 
819    Output Arguments:
820 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
821 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
822 
823    Level: developer
824 
825 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
826 @*/
827 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
828 {
829   PetscErrorCode ierr;
830   MPI_Group      group = MPI_GROUP_NULL;
831 
832   PetscFunctionBegin;
833   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
834   if (sf->ingroup == MPI_GROUP_NULL) {
835     PetscInt       i;
836     const PetscInt *indegree;
837     PetscMPIInt    rank,*outranks,*inranks;
838     PetscSFNode    *remote;
839     PetscSF        bgcount;
840 
841     /* Compute the number of incoming ranks */
842     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
843     for (i=0; i<sf->nranks; i++) {
844       remote[i].rank  = sf->ranks[i];
845       remote[i].index = 0;
846     }
847     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
848     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
849     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
850     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
851 
852     /* Enumerate the incoming ranks */
853     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
854     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
855     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
856     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
857     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
858     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
859     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
860     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
861     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
862     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
863   }
864   *incoming = sf->ingroup;
865 
866   if (sf->outgroup == MPI_GROUP_NULL) {
867     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
868     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
869     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
870   }
871   *outgoing = sf->outgroup;
872   PetscFunctionReturn(0);
873 }
874 
875 /*@
876    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
877 
878    Collective
879 
880    Input Argument:
881 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
882 
883    Output Arguments:
884 .  multi - star forest with split roots, such that each root has degree exactly 1
885 
886    Level: developer
887 
888    Notes:
889 
890    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
891    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
892    edge, it is a candidate for future optimization that might involve its removal.
893 
894 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
895 @*/
896 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
897 {
898   PetscErrorCode ierr;
899 
900   PetscFunctionBegin;
901   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
902   PetscValidPointer(multi,2);
903   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
904     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
905     *multi = sf->multi;
906     PetscFunctionReturn(0);
907   }
908   if (!sf->multi) {
909     const PetscInt *indegree;
910     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
911     PetscSFNode    *remote;
912     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
913     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
914     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
915     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
916     inoffset[0] = 0;
917     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
918     for (i=0; i<maxlocal; i++) outones[i] = 1;
919     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
920     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
921     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
922 #if 0
923 #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
924     for (i=0; i<sf->nroots; i++) {
925       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
926     }
927 #endif
928 #endif
929     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
930     for (i=0; i<sf->nleaves; i++) {
931       remote[i].rank  = sf->remote[i].rank;
932       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
933     }
934     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
935     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
936     if (sf->rankorder) {        /* Sort the ranks */
937       PetscMPIInt rank;
938       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
939       PetscSFNode *newremote;
940       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
941       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
942       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
943       for (i=0; i<maxlocal; i++) outranks[i] = rank;
944       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
945       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
946       /* Sort the incoming ranks at each vertex, build the inverse map */
947       for (i=0; i<sf->nroots; i++) {
948         PetscInt j;
949         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
950         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
951         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
952       }
953       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
954       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
955       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
956       for (i=0; i<sf->nleaves; i++) {
957         newremote[i].rank  = sf->remote[i].rank;
958         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
959       }
960       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
961       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
962     }
963     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
964   }
965   *multi = sf->multi;
966   PetscFunctionReturn(0);
967 }
968 
969 /*@C
970    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
971 
972    Collective
973 
974    Input Arguments:
975 +  sf - original star forest
976 .  nselected  - number of selected roots on this process
977 -  selected   - indices of the selected roots on this process
978 
979    Output Arguments:
980 .  newsf - new star forest
981 
982    Level: advanced
983 
984    Note:
985    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
986    be done by calling PetscSFGetGraph().
987 
988 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
989 @*/
990 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
991 {
992   PetscInt          i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
993   const PetscSFNode *iremote;
994   PetscSFNode       *new_iremote;
995   PetscSF           tmpsf;
996   MPI_Comm          comm;
997   PetscErrorCode    ierr;
998 
999   PetscFunctionBegin;
1000   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1001   PetscSFCheckGraphSet(sf,1);
1002   if (nselected) PetscValidPointer(selected,3);
1003   PetscValidPointer(newsf,4);
1004 
1005   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1006 
1007   /* Uniq selected[] and put results in roots[] */
1008   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1009   ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr);
1010   ierr = PetscMemcpy(roots,selected,sizeof(PetscInt)*nselected);CHKERRQ(ierr);
1011   ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr);
1012   if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);
1013 
1014   if (sf->ops->CreateEmbeddedSF) {
1015     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr);
1016   } else {
1017     /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1018     /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1019     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr);
1020     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr);
1021     ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr);
1022     ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr);
1023     for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1024     ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1025     ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1026     ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr);
1027 
1028     /* Build newsf with leaves that are still connected */
1029     connected_leaves = 0;
1030     for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1031     ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
1032     ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
1033     for (i=0, n=0; i<nleaves; ++i) {
1034       if (leafdata[i]) {
1035         new_ilocal[n]        = sf->mine ? sf->mine[i] : i;
1036         new_iremote[n].rank  = sf->remote[i].rank;
1037         new_iremote[n].index = sf->remote[i].index;
1038         ++n;
1039       }
1040     }
1041 
1042     if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1043     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1044     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1045     ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1046     ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
1047   }
1048   ierr = PetscFree(roots);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 /*@C
1053   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1054 
1055   Collective
1056 
1057   Input Arguments:
1058 + sf - original star forest
1059 . nselected  - number of selected leaves on this process
1060 - selected   - indices of the selected leaves on this process
1061 
1062   Output Arguments:
1063 .  newsf - new star forest
1064 
1065   Level: advanced
1066 
1067 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1068 @*/
1069 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1070 {
1071   const PetscSFNode *iremote;
1072   PetscSFNode       *new_iremote;
1073   const PetscInt    *ilocal;
1074   PetscInt          i,nroots,*leaves,*new_ilocal;
1075   MPI_Comm          comm;
1076   PetscErrorCode    ierr;
1077 
1078   PetscFunctionBegin;
1079   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1080   PetscSFCheckGraphSet(sf,1);
1081   if (nselected) PetscValidPointer(selected,3);
1082   PetscValidPointer(newsf,4);
1083 
1084   /* Uniq selected[] and put results in leaves[] */
1085   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1086   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1087   ierr = PetscMemcpy(leaves,selected,sizeof(PetscInt)*nselected);CHKERRQ(ierr);
1088   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1089   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1090 
1091   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1092   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1093     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1094   } else {
1095     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1096     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1097     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1098     for (i=0; i<nselected; ++i) {
1099       const PetscInt l     = leaves[i];
1100       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1101       new_iremote[i].rank  = iremote[l].rank;
1102       new_iremote[i].index = iremote[l].index;
1103     }
1104     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1105     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1106     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1107   }
1108   ierr = PetscFree(leaves);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*@C
1113    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1114 
1115    Collective on PetscSF
1116 
1117    Input Arguments:
1118 +  sf - star forest on which to communicate
1119 .  unit - data type associated with each node
1120 .  rootdata - buffer to broadcast
1121 -  op - operation to use for reduction
1122 
1123    Output Arguments:
1124 .  leafdata - buffer to be reduced with values from each leaf's respective root
1125 
1126    Level: intermediate
1127 
1128 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1129 @*/
1130 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1131 {
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1136   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1137   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1138   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1139   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 /*@C
1144    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1145 
1146    Collective
1147 
1148    Input Arguments:
1149 +  sf - star forest
1150 .  unit - data type
1151 .  rootdata - buffer to broadcast
1152 -  op - operation to use for reduction
1153 
1154    Output Arguments:
1155 .  leafdata - buffer to be reduced with values from each leaf's respective root
1156 
1157    Level: intermediate
1158 
1159 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1160 @*/
1161 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1162 {
1163   PetscErrorCode ierr;
1164 
1165   PetscFunctionBegin;
1166   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1167   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1168   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1169   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1170   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 /*@C
1175    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1176 
1177    Collective
1178 
1179    Input Arguments:
1180 +  sf - star forest
1181 .  unit - data type
1182 .  leafdata - values to reduce
1183 -  op - reduction operation
1184 
1185    Output Arguments:
1186 .  rootdata - result of reduction of values from all leaves of each root
1187 
1188    Level: intermediate
1189 
1190 .seealso: PetscSFBcastBegin()
1191 @*/
1192 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1193 {
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1198   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1199   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1200   ierr = (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1201   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 /*@C
1206    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1207 
1208    Collective
1209 
1210    Input Arguments:
1211 +  sf - star forest
1212 .  unit - data type
1213 .  leafdata - values to reduce
1214 -  op - reduction operation
1215 
1216    Output Arguments:
1217 .  rootdata - result of reduction of values from all leaves of each root
1218 
1219    Level: intermediate
1220 
1221 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1222 @*/
1223 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1224 {
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1229   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1230   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1231   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1232   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 /*@C
1237    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1238 
1239    Collective
1240 
1241    Input Arguments:
1242 .  sf - star forest
1243 
1244    Output Arguments:
1245 .  degree - degree of each root vertex
1246 
1247    Level: advanced
1248 
1249    Notes:
1250    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1251 
1252 .seealso: PetscSFGatherBegin()
1253 @*/
1254 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1255 {
1256   PetscErrorCode ierr;
1257 
1258   PetscFunctionBegin;
1259   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1260   PetscSFCheckGraphSet(sf,1);
1261   PetscValidPointer(degree,2);
1262   if (!sf->degreeknown) {
1263     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1264     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1265     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1266     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1267     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1268     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1269     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1270   }
1271   *degree = NULL;
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 /*@C
1276    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1277 
1278    Collective
1279 
1280    Input Arguments:
1281 .  sf - star forest
1282 
1283    Output Arguments:
1284 .  degree - degree of each root vertex
1285 
1286    Level: developer
1287 
1288    Notes:
1289    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1290 
1291 .seealso:
1292 @*/
1293 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1294 {
1295   PetscErrorCode ierr;
1296 
1297   PetscFunctionBegin;
1298   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1299   PetscSFCheckGraphSet(sf,1);
1300   PetscValidPointer(degree,2);
1301   if (!sf->degreeknown) {
1302     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1303     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1304     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1305     sf->degreeknown = PETSC_TRUE;
1306   }
1307   *degree = sf->degree;
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 
1312 /*@C
1313    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1314    Each multi-root is assigned index of the corresponding original root.
1315 
1316    Collective
1317 
1318    Input Arguments:
1319 +  sf - star forest
1320 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1321 
1322    Output Arguments:
1323 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1324 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1325 
1326    Level: developer
1327 
1328    Notes:
1329    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1330 
1331 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1332 @*/
1333 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1334 {
1335   PetscSF             msf;
1336   PetscInt            i, j, k, nroots, nmroots;
1337   PetscErrorCode      ierr;
1338 
1339   PetscFunctionBegin;
1340   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1341   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1342   if (nroots) PetscValidIntPointer(degree,2);
1343   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1344   PetscValidPointer(multiRootsOrigNumbering,4);
1345   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1346   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1347   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1348   for (i=0,j=0,k=0; i<nroots; i++) {
1349     if (!degree[i]) continue;
1350     for (j=0; j<degree[i]; j++,k++) {
1351       (*multiRootsOrigNumbering)[k] = i;
1352     }
1353   }
1354 #if defined(PETSC_USE_DEBUG)
1355   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1356 #endif
1357   if (nMultiRoots) *nMultiRoots = nmroots;
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 /*@C
1362    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1363 
1364    Collective
1365 
1366    Input Arguments:
1367 +  sf - star forest
1368 .  unit - data type
1369 .  leafdata - leaf values to use in reduction
1370 -  op - operation to use for reduction
1371 
1372    Output Arguments:
1373 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1374 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1375 
1376    Level: advanced
1377 
1378    Note:
1379    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1380    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1381    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1382    integers.
1383 
1384 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1385 @*/
1386 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1387 {
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1392   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1393   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1394   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1395   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 /*@C
1400    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1401 
1402    Collective
1403 
1404    Input Arguments:
1405 +  sf - star forest
1406 .  unit - data type
1407 .  leafdata - leaf values to use in reduction
1408 -  op - operation to use for reduction
1409 
1410    Output Arguments:
1411 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1412 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1413 
1414    Level: advanced
1415 
1416 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1417 @*/
1418 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1419 {
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1424   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1425   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1426   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1427   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 /*@C
1432    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1433 
1434    Collective
1435 
1436    Input Arguments:
1437 +  sf - star forest
1438 .  unit - data type
1439 -  leafdata - leaf data to gather to roots
1440 
1441    Output Argument:
1442 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1443 
1444    Level: intermediate
1445 
1446 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1447 @*/
1448 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1449 {
1450   PetscErrorCode ierr;
1451   PetscSF        multi;
1452 
1453   PetscFunctionBegin;
1454   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1455   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1456   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1457   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 /*@C
1462    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1463 
1464    Collective
1465 
1466    Input Arguments:
1467 +  sf - star forest
1468 .  unit - data type
1469 -  leafdata - leaf data to gather to roots
1470 
1471    Output Argument:
1472 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1473 
1474    Level: intermediate
1475 
1476 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1477 @*/
1478 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1479 {
1480   PetscErrorCode ierr;
1481   PetscSF        multi;
1482 
1483   PetscFunctionBegin;
1484   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1485   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1486   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1487   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 /*@C
1492    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1493 
1494    Collective
1495 
1496    Input Arguments:
1497 +  sf - star forest
1498 .  unit - data type
1499 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1500 
1501    Output Argument:
1502 .  leafdata - leaf data to be update with personal data from each respective root
1503 
1504    Level: intermediate
1505 
1506 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1507 @*/
1508 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1509 {
1510   PetscErrorCode ierr;
1511   PetscSF        multi;
1512 
1513   PetscFunctionBegin;
1514   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1515   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1516   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1517   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 /*@C
1522    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1523 
1524    Collective
1525 
1526    Input Arguments:
1527 +  sf - star forest
1528 .  unit - data type
1529 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1530 
1531    Output Argument:
1532 .  leafdata - leaf data to be update with personal data from each respective root
1533 
1534    Level: intermediate
1535 
1536 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1537 @*/
1538 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1539 {
1540   PetscErrorCode ierr;
1541   PetscSF        multi;
1542 
1543   PetscFunctionBegin;
1544   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1545   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1546   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1547   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 /*@
1552   PetscSFCompose - Compose a new PetscSF equivalent to action to PetscSFs
1553 
1554   Input Parameters:
1555 + sfA - The first PetscSF
1556 - sfB - The second PetscSF
1557 
1558   Output Parameters:
1559 . sfBA - equvalent PetscSF for applying A then B
1560 
1561   Level: developer
1562 
1563 .seealso: PetscSF, PetscSFGetGraph(), PetscSFSetGraph()
1564 @*/
1565 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1566 {
1567   MPI_Comm           comm;
1568   const PetscSFNode *remotePointsA, *remotePointsB;
1569   PetscSFNode       *remotePointsBA;
1570   const PetscInt    *localPointsA, *localPointsB;
1571   PetscInt           numRootsA, numLeavesA, numRootsB, numLeavesB;
1572   PetscErrorCode     ierr;
1573 
1574   PetscFunctionBegin;
1575   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
1576   PetscSFCheckGraphSet(sfA, 1);
1577   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
1578   PetscSFCheckGraphSet(sfB, 2);
1579   PetscValidPointer(sfBA, 3);
1580   ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr);
1581   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1582   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1583   ierr = PetscMalloc1(numLeavesB, &remotePointsBA);CHKERRQ(ierr);
1584   ierr = PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, remotePointsBA);CHKERRQ(ierr);
1585   ierr = PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, remotePointsBA);CHKERRQ(ierr);
1586   ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr);
1587   ierr = PetscSFSetGraph(*sfBA, numRootsA, numLeavesB, localPointsB, PETSC_COPY_VALUES, remotePointsBA, PETSC_OWN_POINTER);CHKERRQ(ierr);
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 /*
1592   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1593 
1594   Input Parameters:
1595 . sf - The global PetscSF
1596 
1597   Output Parameters:
1598 . out - The local PetscSF
1599  */
1600 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1601 {
1602   MPI_Comm           comm;
1603   PetscMPIInt        myrank;
1604   const PetscInt     *ilocal;
1605   const PetscSFNode  *iremote;
1606   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1607   PetscSFNode        *liremote;
1608   PetscSF            lsf;
1609   PetscErrorCode     ierr;
1610 
1611   PetscFunctionBegin;
1612   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1613   if (sf->ops->CreateLocalSF) {
1614     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
1615   } else {
1616     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1617     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1618     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
1619 
1620     /* Find out local edges and build a local SF */
1621     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1622     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
1623     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
1624     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
1625 
1626     for (i=j=0; i<nleaves; i++) {
1627       if (iremote[i].rank == (PetscInt)myrank) {
1628         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
1629         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
1630         liremote[j].index = iremote[i].index;
1631         j++;
1632       }
1633     }
1634     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
1635     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1636     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
1637     *out = lsf;
1638   }
1639   PetscFunctionReturn(0);
1640 }
1641