xref: /petsc/src/vec/is/sf/interface/sf.c (revision 58c0e5077dcf40d6a880c19c87f1075ae1d22c8e)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petscctable.h>
3 
4 #if defined(PETSC_USE_DEBUG)
5 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
6     if (PetscUnlikely(!(sf)->graphset))                              \
7       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
8   } while (0)
9 #else
10 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
11 #endif
12 
13 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
14 
15 /*@
16    PetscSFCreate - create a star forest communication context
17 
18    Collective
19 
20    Input Arguments:
21 .  comm - communicator on which the star forest will operate
22 
23    Output Arguments:
24 .  sf - new star forest context
25 
26    Options Database Keys:
27 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
28 .  -sf_type window    -Use MPI-3 one-sided window for communication
29 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
30 
31    Level: intermediate
32 
33    Notes:
34    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
35    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
36    SFs are optimized and they have better performance than general SFs.
37 
38 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
39 @*/
40 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
41 {
42   PetscErrorCode ierr;
43   PetscSF        b;
44 
45   PetscFunctionBegin;
46   PetscValidPointer(sf,2);
47   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
48 
49   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
50 
51   b->nroots    = -1;
52   b->nleaves   = -1;
53   b->minleaf   = PETSC_MAX_INT;
54   b->maxleaf   = PETSC_MIN_INT;
55   b->nranks    = -1;
56   b->rankorder = PETSC_TRUE;
57   b->ingroup   = MPI_GROUP_NULL;
58   b->outgroup  = MPI_GROUP_NULL;
59   b->graphset  = PETSC_FALSE;
60 
61   *sf = b;
62   PetscFunctionReturn(0);
63 }
64 
65 /*@
66    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
67 
68    Collective
69 
70    Input Arguments:
71 .  sf - star forest
72 
73    Level: advanced
74 
75 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
76 @*/
77 PetscErrorCode PetscSFReset(PetscSF sf)
78 {
79   PetscErrorCode ierr;
80 
81   PetscFunctionBegin;
82   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
83   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
84   sf->nroots   = -1;
85   sf->nleaves  = -1;
86   sf->minleaf  = PETSC_MAX_INT;
87   sf->maxleaf  = PETSC_MIN_INT;
88   sf->mine     = NULL;
89   sf->remote   = NULL;
90   sf->graphset = PETSC_FALSE;
91   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
92   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
93   sf->nranks = -1;
94   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
95   sf->degreeknown = PETSC_FALSE;
96   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
97   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
98   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
99   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
100   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
101   sf->setupcalled = PETSC_FALSE;
102   PetscFunctionReturn(0);
103 }
104 
105 /*@C
106    PetscSFSetType - Set the PetscSF communication implementation
107 
108    Collective on PetscSF
109 
110    Input Parameters:
111 +  sf - the PetscSF context
112 -  type - a known method
113 
114    Options Database Key:
115 .  -sf_type <type> - Sets the method; use -help for a list
116    of available methods (for instance, window, pt2pt, neighbor)
117 
118    Notes:
119    See "include/petscsf.h" for available methods (for instance)
120 +    PETSCSFWINDOW - MPI-2/3 one-sided
121 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
122 
123   Level: intermediate
124 
125 .seealso: PetscSFType, PetscSFCreate()
126 @*/
127 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
128 {
129   PetscErrorCode ierr,(*r)(PetscSF);
130   PetscBool      match;
131 
132   PetscFunctionBegin;
133   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
134   PetscValidCharPointer(type,2);
135 
136   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
137   if (match) PetscFunctionReturn(0);
138 
139   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
140   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
141   /* Destroy the previous PetscSF implementation context */
142   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
143   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
144   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
145   ierr = (*r)(sf);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 /*@C
150   PetscSFGetType - Get the PetscSF communication implementation
151 
152   Not Collective
153 
154   Input Parameter:
155 . sf  - the PetscSF context
156 
157   Output Parameter:
158 . type - the PetscSF type name
159 
160   Level: intermediate
161 
162 .seealso: PetscSFSetType(), PetscSFCreate()
163 @*/
164 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
165 {
166   PetscFunctionBegin;
167   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
168   PetscValidPointer(type,2);
169   *type = ((PetscObject)sf)->type_name;
170   PetscFunctionReturn(0);
171 }
172 
173 /*@
174    PetscSFDestroy - destroy star forest
175 
176    Collective
177 
178    Input Arguments:
179 .  sf - address of star forest
180 
181    Level: intermediate
182 
183 .seealso: PetscSFCreate(), PetscSFReset()
184 @*/
185 PetscErrorCode PetscSFDestroy(PetscSF *sf)
186 {
187   PetscErrorCode ierr;
188 
189   PetscFunctionBegin;
190   if (!*sf) PetscFunctionReturn(0);
191   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
192   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
193   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
194   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
195   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
196   PetscFunctionReturn(0);
197 }
198 
199 /*@
200    PetscSFSetUp - set up communication structures
201 
202    Collective
203 
204    Input Arguments:
205 .  sf - star forest communication object
206 
207    Level: beginner
208 
209 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
210 @*/
211 PetscErrorCode PetscSFSetUp(PetscSF sf)
212 {
213   PetscErrorCode ierr;
214 
215   PetscFunctionBegin;
216   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
217   PetscSFCheckGraphSet(sf,1);
218   if (sf->setupcalled) PetscFunctionReturn(0);
219   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
220   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
221   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
222   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
223   sf->setupcalled = PETSC_TRUE;
224   PetscFunctionReturn(0);
225 }
226 
227 /*@
228    PetscSFSetFromOptions - set PetscSF options using the options database
229 
230    Logically Collective
231 
232    Input Arguments:
233 .  sf - star forest
234 
235    Options Database Keys:
236 +  -sf_type - implementation type, see PetscSFSetType()
237 -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
238 
239    Level: intermediate
240 
241 .seealso: PetscSFWindowSetSyncType()
242 @*/
243 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
244 {
245   PetscSFType    deft;
246   char           type[256];
247   PetscErrorCode ierr;
248   PetscBool      flg;
249 
250   PetscFunctionBegin;
251   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
252   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
253   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
254   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
255   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
256   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
257   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
258   ierr = PetscOptionsEnd();CHKERRQ(ierr);
259   PetscFunctionReturn(0);
260 }
261 
262 /*@
263    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
264 
265    Logically Collective
266 
267    Input Arguments:
268 +  sf - star forest
269 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
270 
271    Level: advanced
272 
273 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
274 @*/
275 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
276 {
277 
278   PetscFunctionBegin;
279   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
280   PetscValidLogicalCollectiveBool(sf,flg,2);
281   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
282   sf->rankorder = flg;
283   PetscFunctionReturn(0);
284 }
285 
286 /*@
287    PetscSFSetGraph - Set a parallel star forest
288 
289    Collective
290 
291    Input Arguments:
292 +  sf - star forest
293 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
294 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
295 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage
296 .  localmode - copy mode for ilocal
297 .  iremote - remote locations of root vertices for each leaf on the current process
298 -  remotemode - copy mode for iremote
299 
300    Level: intermediate
301 
302    Notes:
303     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
304 
305    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
306    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
307    needed
308 
309 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
310 @*/
311 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
312 {
313   PetscErrorCode ierr;
314 
315   PetscFunctionBegin;
316   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
317   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
318   if (nleaves > 0) PetscValidPointer(iremote,6);
319   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
320   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
321 
322   ierr = PetscSFReset(sf);CHKERRQ(ierr);
323   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
324 
325   sf->nroots  = nroots;
326   sf->nleaves = nleaves;
327 
328   if (nleaves && ilocal) {
329     PetscInt i;
330     PetscInt minleaf = PETSC_MAX_INT;
331     PetscInt maxleaf = PETSC_MIN_INT;
332     int      contiguous = 1;
333     for (i=0; i<nleaves; i++) {
334       minleaf = PetscMin(minleaf,ilocal[i]);
335       maxleaf = PetscMax(maxleaf,ilocal[i]);
336       contiguous &= (ilocal[i] == i);
337     }
338     sf->minleaf = minleaf;
339     sf->maxleaf = maxleaf;
340     if (contiguous) {
341       if (localmode == PETSC_OWN_POINTER) {
342         ierr = PetscFree(ilocal);CHKERRQ(ierr);
343       }
344       ilocal = NULL;
345     }
346   } else {
347     sf->minleaf = 0;
348     sf->maxleaf = nleaves - 1;
349   }
350 
351   if (ilocal) {
352     switch (localmode) {
353     case PETSC_COPY_VALUES:
354       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
355       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
356       sf->mine = sf->mine_alloc;
357       break;
358     case PETSC_OWN_POINTER:
359       sf->mine_alloc = (PetscInt*)ilocal;
360       sf->mine       = sf->mine_alloc;
361       break;
362     case PETSC_USE_POINTER:
363       sf->mine_alloc = NULL;
364       sf->mine       = (PetscInt*)ilocal;
365       break;
366     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
367     }
368   }
369 
370   switch (remotemode) {
371   case PETSC_COPY_VALUES:
372     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
373     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
374     sf->remote = sf->remote_alloc;
375     break;
376   case PETSC_OWN_POINTER:
377     sf->remote_alloc = (PetscSFNode*)iremote;
378     sf->remote       = sf->remote_alloc;
379     break;
380   case PETSC_USE_POINTER:
381     sf->remote_alloc = NULL;
382     sf->remote       = (PetscSFNode*)iremote;
383     break;
384   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
385   }
386 
387   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
388   sf->graphset = PETSC_TRUE;
389   PetscFunctionReturn(0);
390 }
391 
392 /*@
393   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
394 
395   Collective
396 
397   Input Parameters:
398 + sf      - The PetscSF
399 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
400 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
401 
402   Notes:
403   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
404   n and N are local and global sizes of x respectively.
405 
406   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
407   sequential vectors y on all ranks.
408 
409   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
410   sequential vector y on rank 0.
411 
412   In above cases, entries of x are roots and entries of y are leaves.
413 
414   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
415   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
416   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
417   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
418   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
419 
420   In this case, roots and leaves are symmetric.
421 
422   Level: intermediate
423  @*/
424 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
425 {
426   MPI_Comm       comm;
427   PetscInt       n,N,res[2];
428   PetscMPIInt    rank,size;
429   PetscSFType    type;
430   PetscErrorCode ierr;
431 
432   PetscFunctionBegin;
433   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
434   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
435   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
436   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
437 
438   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
439     type = PETSCSFALLTOALL;
440     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
441     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
442     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
443     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
444   } else {
445     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
446     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
447     res[0] = n;
448     res[1] = -n;
449     /* Check if n are same over all ranks so that we can optimize it */
450     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
451     if (res[0] == -res[1]) { /* same n */
452       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
453     } else {
454       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
455     }
456     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
457   }
458   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
459 
460   sf->pattern = pattern;
461   sf->mine    = NULL; /* Contiguous */
462 
463   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
464      Also set other easy stuff.
465    */
466   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
467     sf->nleaves      = N;
468     sf->nroots       = n;
469     sf->nranks       = size;
470     sf->minleaf      = 0;
471     sf->maxleaf      = N - 1;
472   } else if (pattern == PETSCSF_PATTERN_GATHER) {
473     sf->nleaves      = rank ? 0 : N;
474     sf->nroots       = n;
475     sf->nranks       = rank ? 0 : size;
476     sf->minleaf      = 0;
477     sf->maxleaf      = rank ? -1 : N - 1;
478   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
479     sf->nleaves      = size;
480     sf->nroots       = size;
481     sf->nranks       = size;
482     sf->minleaf      = 0;
483     sf->maxleaf      = size - 1;
484   }
485   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
486   sf->graphset = PETSC_TRUE;
487   PetscFunctionReturn(0);
488 }
489 
490 /*@
491    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
492 
493    Collective
494 
495    Input Arguments:
496 
497 .  sf - star forest to invert
498 
499    Output Arguments:
500 .  isf - inverse of sf
501    Level: advanced
502 
503    Notes:
504    All roots must have degree 1.
505 
506    The local space may be a permutation, but cannot be sparse.
507 
508 .seealso: PetscSFSetGraph()
509 @*/
510 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
511 {
512   PetscErrorCode ierr;
513   PetscMPIInt    rank;
514   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
515   const PetscInt *ilocal;
516   PetscSFNode    *roots,*leaves;
517 
518   PetscFunctionBegin;
519   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
520   PetscSFCheckGraphSet(sf,1);
521   PetscValidPointer(isf,2);
522 
523   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
524   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
525 
526   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
527   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
528   for (i=0; i<maxlocal; i++) {
529     leaves[i].rank  = rank;
530     leaves[i].index = i;
531   }
532   for (i=0; i <nroots; i++) {
533     roots[i].rank  = -1;
534     roots[i].index = -1;
535   }
536   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
537   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
538 
539   /* Check whether our leaves are sparse */
540   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
541   if (count == nroots) newilocal = NULL;
542   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
543     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
544     for (i=0,count=0; i<nroots; i++) {
545       if (roots[i].rank >= 0) {
546         newilocal[count]   = i;
547         roots[count].rank  = roots[i].rank;
548         roots[count].index = roots[i].index;
549         count++;
550       }
551     }
552   }
553 
554   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
555   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
556   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 /*@
561    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
562 
563    Collective
564 
565    Input Arguments:
566 +  sf - communication object to duplicate
567 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
568 
569    Output Arguments:
570 .  newsf - new communication object
571 
572    Level: beginner
573 
574 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
575 @*/
576 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
577 {
578   PetscSFType    type;
579   PetscErrorCode ierr;
580 
581   PetscFunctionBegin;
582   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
583   PetscValidLogicalCollectiveEnum(sf,opt,2);
584   PetscValidPointer(newsf,3);
585   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
586   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
587   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
588   if (opt == PETSCSF_DUPLICATE_GRAPH) {
589     PetscSFCheckGraphSet(sf,1);
590     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
591       PetscInt          nroots,nleaves;
592       const PetscInt    *ilocal;
593       const PetscSFNode *iremote;
594       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
595       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
596     } else {
597       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
598     }
599   }
600   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
601   PetscFunctionReturn(0);
602 }
603 
604 /*@C
605    PetscSFGetGraph - Get the graph specifying a parallel star forest
606 
607    Not Collective
608 
609    Input Arguments:
610 .  sf - star forest
611 
612    Output Arguments:
613 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
614 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
615 .  ilocal - locations of leaves in leafdata buffers
616 -  iremote - remote locations of root vertices for each leaf on the current process
617 
618    Notes:
619    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
620 
621    Level: intermediate
622 
623 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
624 @*/
625 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
626 {
627   PetscErrorCode ierr;
628 
629   PetscFunctionBegin;
630   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
631   if (sf->ops->GetGraph) {
632     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
633   } else {
634     if (nroots) *nroots = sf->nroots;
635     if (nleaves) *nleaves = sf->nleaves;
636     if (ilocal) *ilocal = sf->mine;
637     if (iremote) *iremote = sf->remote;
638   }
639   PetscFunctionReturn(0);
640 }
641 
642 /*@
643    PetscSFGetLeafRange - Get the active leaf ranges
644 
645    Not Collective
646 
647    Input Arguments:
648 .  sf - star forest
649 
650    Output Arguments:
651 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
652 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
653 
654    Level: developer
655 
656 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
657 @*/
658 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
659 {
660 
661   PetscFunctionBegin;
662   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
663   PetscSFCheckGraphSet(sf,1);
664   if (minleaf) *minleaf = sf->minleaf;
665   if (maxleaf) *maxleaf = sf->maxleaf;
666   PetscFunctionReturn(0);
667 }
668 
669 /*@C
670    PetscSFView - view a star forest
671 
672    Collective
673 
674    Input Arguments:
675 +  sf - star forest
676 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
677 
678    Level: beginner
679 
680 .seealso: PetscSFCreate(), PetscSFSetGraph()
681 @*/
682 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
683 {
684   PetscErrorCode    ierr;
685   PetscBool         iascii;
686   PetscViewerFormat format;
687 
688   PetscFunctionBegin;
689   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
690   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
691   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
692   PetscCheckSameComm(sf,1,viewer,2);
693   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
694   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
695   if (iascii) {
696     PetscMPIInt rank;
697     PetscInt    ii,i,j;
698 
699     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
700     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
701     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
702     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
703       if (!sf->graphset) {
704         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
705         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
706         PetscFunctionReturn(0);
707       }
708       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
709       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
710       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
711       for (i=0; i<sf->nleaves; i++) {
712         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
713       }
714       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
715       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
716       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
717         PetscMPIInt *tmpranks,*perm;
718         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
719         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
720         for (i=0; i<sf->nranks; i++) perm[i] = i;
721         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
722         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
723         for (ii=0; ii<sf->nranks; ii++) {
724           i = perm[ii];
725           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
726           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
727             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
728           }
729         }
730         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
731       }
732       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
733       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
734     }
735     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 /*@C
741    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
742 
743    Not Collective
744 
745    Input Arguments:
746 .  sf - star forest
747 
748    Output Arguments:
749 +  nranks - number of ranks referenced by local part
750 .  ranks - array of ranks
751 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
752 .  rmine - concatenated array holding local indices referencing each remote rank
753 -  rremote - concatenated array holding remote indices referenced for each remote rank
754 
755    Level: developer
756 
757 .seealso: PetscSFGetLeafRanks()
758 @*/
759 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
760 {
761   PetscErrorCode ierr;
762 
763   PetscFunctionBegin;
764   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
765   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
766   if (sf->ops->GetRootRanks) {
767     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
768   } else {
769     /* The generic implementation */
770     if (nranks)  *nranks  = sf->nranks;
771     if (ranks)   *ranks   = sf->ranks;
772     if (roffset) *roffset = sf->roffset;
773     if (rmine)   *rmine   = sf->rmine;
774     if (rremote) *rremote = sf->rremote;
775   }
776   PetscFunctionReturn(0);
777 }
778 
779 /*@C
780    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
781 
782    Not Collective
783 
784    Input Arguments:
785 .  sf - star forest
786 
787    Output Arguments:
788 +  niranks - number of leaf ranks referencing roots on this process
789 .  iranks - array of ranks
790 .  ioffset - offset in irootloc for each rank (length niranks+1)
791 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
792 
793    Level: developer
794 
795 .seealso: PetscSFGetRootRanks()
796 @*/
797 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
798 {
799   PetscErrorCode ierr;
800 
801   PetscFunctionBegin;
802   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
803   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
804   if (sf->ops->GetLeafRanks) {
805     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
806   } else {
807     PetscSFType type;
808     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
809     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
810   }
811   PetscFunctionReturn(0);
812 }
813 
814 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
815   PetscInt i;
816   for (i=0; i<n; i++) {
817     if (needle == list[i]) return PETSC_TRUE;
818   }
819   return PETSC_FALSE;
820 }
821 
822 /*@C
823    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
824 
825    Collective
826 
827    Input Arguments:
828 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
829 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
830 
831    Level: developer
832 
833 .seealso: PetscSFGetRootRanks()
834 @*/
835 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
836 {
837   PetscErrorCode     ierr;
838   PetscTable         table;
839   PetscTablePosition pos;
840   PetscMPIInt        size,groupsize,*groupranks;
841   PetscInt           *rcount,*ranks;
842   PetscInt           i, irank = -1,orank = -1;
843 
844   PetscFunctionBegin;
845   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
846   PetscSFCheckGraphSet(sf,1);
847   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
848   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
849   for (i=0; i<sf->nleaves; i++) {
850     /* Log 1-based rank */
851     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
852   }
853   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
854   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
855   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
856   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
857   for (i=0; i<sf->nranks; i++) {
858     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
859     ranks[i]--;             /* Convert back to 0-based */
860   }
861   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
862 
863   /* We expect that dgroup is reliably "small" while nranks could be large */
864   {
865     MPI_Group group = MPI_GROUP_NULL;
866     PetscMPIInt *dgroupranks;
867     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
868     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
869     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
870     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
871     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
872     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
873     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
874     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
875   }
876 
877   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
878   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
879     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
880       if (InList(ranks[i],groupsize,groupranks)) break;
881     }
882     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
883       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
884     }
885     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
886       PetscInt    tmprank,tmpcount;
887 
888       tmprank             = ranks[i];
889       tmpcount            = rcount[i];
890       ranks[i]            = ranks[sf->ndranks];
891       rcount[i]           = rcount[sf->ndranks];
892       ranks[sf->ndranks]  = tmprank;
893       rcount[sf->ndranks] = tmpcount;
894       sf->ndranks++;
895     }
896   }
897   ierr = PetscFree(groupranks);CHKERRQ(ierr);
898   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
899   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
900   sf->roffset[0] = 0;
901   for (i=0; i<sf->nranks; i++) {
902     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
903     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
904     rcount[i]        = 0;
905   }
906   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
907     /* short circuit */
908     if (orank != sf->remote[i].rank) {
909       /* Search for index of iremote[i].rank in sf->ranks */
910       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
911       if (irank < 0) {
912         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
913         if (irank >= 0) irank += sf->ndranks;
914       }
915       orank = sf->remote[i].rank;
916     }
917     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
918     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
919     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
920     rcount[irank]++;
921   }
922   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 /*@C
927    PetscSFGetGroups - gets incoming and outgoing process groups
928 
929    Collective
930 
931    Input Argument:
932 .  sf - star forest
933 
934    Output Arguments:
935 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
936 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
937 
938    Level: developer
939 
940 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
941 @*/
942 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
943 {
944   PetscErrorCode ierr;
945   MPI_Group      group = MPI_GROUP_NULL;
946 
947   PetscFunctionBegin;
948   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
949   if (sf->ingroup == MPI_GROUP_NULL) {
950     PetscInt       i;
951     const PetscInt *indegree;
952     PetscMPIInt    rank,*outranks,*inranks;
953     PetscSFNode    *remote;
954     PetscSF        bgcount;
955 
956     /* Compute the number of incoming ranks */
957     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
958     for (i=0; i<sf->nranks; i++) {
959       remote[i].rank  = sf->ranks[i];
960       remote[i].index = 0;
961     }
962     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
963     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
964     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
965     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
966 
967     /* Enumerate the incoming ranks */
968     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
969     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
970     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
971     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
972     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
973     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
974     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
975     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
976     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
977     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
978   }
979   *incoming = sf->ingroup;
980 
981   if (sf->outgroup == MPI_GROUP_NULL) {
982     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
983     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
984     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
985   }
986   *outgoing = sf->outgroup;
987   PetscFunctionReturn(0);
988 }
989 
990 /*@
991    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
992 
993    Collective
994 
995    Input Argument:
996 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
997 
998    Output Arguments:
999 .  multi - star forest with split roots, such that each root has degree exactly 1
1000 
1001    Level: developer
1002 
1003    Notes:
1004 
1005    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1006    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1007    edge, it is a candidate for future optimization that might involve its removal.
1008 
1009 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1010 @*/
1011 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1012 {
1013   PetscErrorCode ierr;
1014 
1015   PetscFunctionBegin;
1016   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1017   PetscValidPointer(multi,2);
1018   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1019     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1020     *multi = sf->multi;
1021     PetscFunctionReturn(0);
1022   }
1023   if (!sf->multi) {
1024     const PetscInt *indegree;
1025     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1026     PetscSFNode    *remote;
1027     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1028     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1029     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1030     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1031     inoffset[0] = 0;
1032     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1033     for (i=0; i<maxlocal; i++) outones[i] = 1;
1034     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1035     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1036     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1037 #if 0
1038 #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
1039     for (i=0; i<sf->nroots; i++) {
1040       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1041     }
1042 #endif
1043 #endif
1044     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1045     for (i=0; i<sf->nleaves; i++) {
1046       remote[i].rank  = sf->remote[i].rank;
1047       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1048     }
1049     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1050     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1051     if (sf->rankorder) {        /* Sort the ranks */
1052       PetscMPIInt rank;
1053       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1054       PetscSFNode *newremote;
1055       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1056       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1057       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1058       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1059       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1060       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1061       /* Sort the incoming ranks at each vertex, build the inverse map */
1062       for (i=0; i<sf->nroots; i++) {
1063         PetscInt j;
1064         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1065         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1066         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1067       }
1068       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1069       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1070       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1071       for (i=0; i<sf->nleaves; i++) {
1072         newremote[i].rank  = sf->remote[i].rank;
1073         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1074       }
1075       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1076       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1077     }
1078     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1079   }
1080   *multi = sf->multi;
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 /*@C
1085    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1086 
1087    Collective
1088 
1089    Input Arguments:
1090 +  sf - original star forest
1091 .  nselected  - number of selected roots on this process
1092 -  selected   - indices of the selected roots on this process
1093 
1094    Output Arguments:
1095 .  newsf - new star forest
1096 
1097    Level: advanced
1098 
1099    Note:
1100    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1101    be done by calling PetscSFGetGraph().
1102 
1103 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1104 @*/
1105 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1106 {
1107   PetscInt          i,n,*roots,*rootdata,*leafdata,nroots,nleaves,connected_leaves,*new_ilocal;
1108   const PetscSFNode *iremote;
1109   PetscSFNode       *new_iremote;
1110   PetscSF           tmpsf;
1111   MPI_Comm          comm;
1112   PetscErrorCode    ierr;
1113 
1114   PetscFunctionBegin;
1115   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1116   PetscSFCheckGraphSet(sf,1);
1117   if (nselected) PetscValidPointer(selected,3);
1118   PetscValidPointer(newsf,4);
1119 
1120   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1121 
1122   /* Uniq selected[] and put results in roots[] */
1123   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1124   ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr);
1125   ierr = PetscArraycpy(roots,selected,nselected);CHKERRQ(ierr);
1126   ierr = PetscSortedRemoveDupsInt(&nselected,roots);CHKERRQ(ierr);
1127   if (nselected && (roots[0] < 0 || roots[nselected-1] >= sf->nroots)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",roots[0],roots[nselected-1],sf->nroots);
1128 
1129   if (sf->ops->CreateEmbeddedSF) {
1130     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,roots,newsf);CHKERRQ(ierr);
1131   } else {
1132     /* A generic version of creating embedded sf. Note that we called PetscSFSetGraph() twice, which is certainly expensive */
1133     /* Find out which leaves (not leaf data items) are still connected to roots in the embedded sf */
1134     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);CHKERRQ(ierr);
1135     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&tmpsf);CHKERRQ(ierr);
1136     ierr = PetscSFSetGraph(tmpsf,nroots,nleaves,NULL/*contiguous*/,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);CHKERRQ(ierr);
1137     ierr = PetscCalloc2(nroots,&rootdata,nleaves,&leafdata);CHKERRQ(ierr);
1138     for (i=0; i<nselected; ++i) rootdata[roots[i]] = 1;
1139     ierr = PetscSFBcastBegin(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1140     ierr = PetscSFBcastEnd(tmpsf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
1141     ierr = PetscSFDestroy(&tmpsf);CHKERRQ(ierr);
1142 
1143     /* Build newsf with leaves that are still connected */
1144     connected_leaves = 0;
1145     for (i=0; i<nleaves; ++i) connected_leaves += leafdata[i];
1146     ierr = PetscMalloc1(connected_leaves,&new_ilocal);CHKERRQ(ierr);
1147     ierr = PetscMalloc1(connected_leaves,&new_iremote);CHKERRQ(ierr);
1148     for (i=0, n=0; i<nleaves; ++i) {
1149       if (leafdata[i]) {
1150         new_ilocal[n]        = sf->mine ? sf->mine[i] : i;
1151         new_iremote[n].rank  = sf->remote[i].rank;
1152         new_iremote[n].index = sf->remote[i].index;
1153         ++n;
1154       }
1155     }
1156 
1157     if (n != connected_leaves) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"There is a size mismatch in the SF embedding, %d != %d",n,connected_leaves);
1158     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1159     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1160     ierr = PetscSFSetGraph(*newsf,nroots,connected_leaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1161     ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
1162   }
1163   ierr = PetscFree(roots);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 /*@C
1168   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1169 
1170   Collective
1171 
1172   Input Arguments:
1173 + sf - original star forest
1174 . nselected  - number of selected leaves on this process
1175 - selected   - indices of the selected leaves on this process
1176 
1177   Output Arguments:
1178 .  newsf - new star forest
1179 
1180   Level: advanced
1181 
1182 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1183 @*/
1184 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1185 {
1186   const PetscSFNode *iremote;
1187   PetscSFNode       *new_iremote;
1188   const PetscInt    *ilocal;
1189   PetscInt          i,nroots,*leaves,*new_ilocal;
1190   MPI_Comm          comm;
1191   PetscErrorCode    ierr;
1192 
1193   PetscFunctionBegin;
1194   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1195   PetscSFCheckGraphSet(sf,1);
1196   if (nselected) PetscValidPointer(selected,3);
1197   PetscValidPointer(newsf,4);
1198 
1199   /* Uniq selected[] and put results in leaves[] */
1200   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1201   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1202   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1203   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1204   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1205 
1206   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1207   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1208     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1209   } else {
1210     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1211     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1212     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1213     for (i=0; i<nselected; ++i) {
1214       const PetscInt l     = leaves[i];
1215       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1216       new_iremote[i].rank  = iremote[l].rank;
1217       new_iremote[i].index = iremote[l].index;
1218     }
1219     ierr = PetscSFCreate(comm,newsf);CHKERRQ(ierr);
1220     ierr = PetscSFSetFromOptions(*newsf);CHKERRQ(ierr);
1221     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1222   }
1223   ierr = PetscFree(leaves);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 /*@C
1228    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1229 
1230    Collective on PetscSF
1231 
1232    Input Arguments:
1233 +  sf - star forest on which to communicate
1234 .  unit - data type associated with each node
1235 .  rootdata - buffer to broadcast
1236 -  op - operation to use for reduction
1237 
1238    Output Arguments:
1239 .  leafdata - buffer to be reduced with values from each leaf's respective root
1240 
1241    Level: intermediate
1242 
1243 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd()
1244 @*/
1245 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1246 {
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1251   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1252   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1253   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1254   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 /*@C
1259    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1260 
1261    Collective
1262 
1263    Input Arguments:
1264 +  sf - star forest
1265 .  unit - data type
1266 .  rootdata - buffer to broadcast
1267 -  op - operation to use for reduction
1268 
1269    Output Arguments:
1270 .  leafdata - buffer to be reduced with values from each leaf's respective root
1271 
1272    Level: intermediate
1273 
1274 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1275 @*/
1276 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1277 {
1278   PetscErrorCode ierr;
1279 
1280   PetscFunctionBegin;
1281   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1282   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1283   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1284   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1285   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 /*@C
1290    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1291 
1292    Collective
1293 
1294    Input Arguments:
1295 +  sf - star forest
1296 .  unit - data type
1297 .  leafdata - values to reduce
1298 -  op - reduction operation
1299 
1300    Output Arguments:
1301 .  rootdata - result of reduction of values from all leaves of each root
1302 
1303    Level: intermediate
1304 
1305 .seealso: PetscSFBcastBegin()
1306 @*/
1307 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1308 {
1309   PetscErrorCode ierr;
1310 
1311   PetscFunctionBegin;
1312   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1313   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1314   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1315   ierr = (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1316   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 /*@C
1321    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1322 
1323    Collective
1324 
1325    Input Arguments:
1326 +  sf - star forest
1327 .  unit - data type
1328 .  leafdata - values to reduce
1329 -  op - reduction operation
1330 
1331    Output Arguments:
1332 .  rootdata - result of reduction of values from all leaves of each root
1333 
1334    Level: intermediate
1335 
1336 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1337 @*/
1338 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1339 {
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1344   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1345   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1346   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1347   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 /*@C
1352    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1353 
1354    Collective
1355 
1356    Input Arguments:
1357 .  sf - star forest
1358 
1359    Output Arguments:
1360 .  degree - degree of each root vertex
1361 
1362    Level: advanced
1363 
1364    Notes:
1365    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1366 
1367 .seealso: PetscSFGatherBegin()
1368 @*/
1369 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1370 {
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1375   PetscSFCheckGraphSet(sf,1);
1376   PetscValidPointer(degree,2);
1377   if (!sf->degreeknown) {
1378     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1379     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1380     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1381     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1382     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1383     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1384     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1385   }
1386   *degree = NULL;
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 /*@C
1391    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1392 
1393    Collective
1394 
1395    Input Arguments:
1396 .  sf - star forest
1397 
1398    Output Arguments:
1399 .  degree - degree of each root vertex
1400 
1401    Level: developer
1402 
1403    Notes:
1404    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1405 
1406 .seealso:
1407 @*/
1408 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1409 {
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1414   PetscSFCheckGraphSet(sf,1);
1415   PetscValidPointer(degree,2);
1416   if (!sf->degreeknown) {
1417     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1418     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1419     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1420     sf->degreeknown = PETSC_TRUE;
1421   }
1422   *degree = sf->degree;
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 
1427 /*@C
1428    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1429    Each multi-root is assigned index of the corresponding original root.
1430 
1431    Collective
1432 
1433    Input Arguments:
1434 +  sf - star forest
1435 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1436 
1437    Output Arguments:
1438 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1439 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1440 
1441    Level: developer
1442 
1443    Notes:
1444    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1445 
1446 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1447 @*/
1448 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1449 {
1450   PetscSF             msf;
1451   PetscInt            i, j, k, nroots, nmroots;
1452   PetscErrorCode      ierr;
1453 
1454   PetscFunctionBegin;
1455   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1456   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1457   if (nroots) PetscValidIntPointer(degree,2);
1458   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1459   PetscValidPointer(multiRootsOrigNumbering,4);
1460   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1461   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1462   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1463   for (i=0,j=0,k=0; i<nroots; i++) {
1464     if (!degree[i]) continue;
1465     for (j=0; j<degree[i]; j++,k++) {
1466       (*multiRootsOrigNumbering)[k] = i;
1467     }
1468   }
1469 #if defined(PETSC_USE_DEBUG)
1470   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1471 #endif
1472   if (nMultiRoots) *nMultiRoots = nmroots;
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 /*@C
1477    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1478 
1479    Collective
1480 
1481    Input Arguments:
1482 +  sf - star forest
1483 .  unit - data type
1484 .  leafdata - leaf values to use in reduction
1485 -  op - operation to use for reduction
1486 
1487    Output Arguments:
1488 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1489 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1490 
1491    Level: advanced
1492 
1493    Note:
1494    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1495    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1496    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1497    integers.
1498 
1499 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1500 @*/
1501 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1502 {
1503   PetscErrorCode ierr;
1504 
1505   PetscFunctionBegin;
1506   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1507   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1508   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1509   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1510   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 /*@C
1515    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1516 
1517    Collective
1518 
1519    Input Arguments:
1520 +  sf - star forest
1521 .  unit - data type
1522 .  leafdata - leaf values to use in reduction
1523 -  op - operation to use for reduction
1524 
1525    Output Arguments:
1526 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1527 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1528 
1529    Level: advanced
1530 
1531 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1532 @*/
1533 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1534 {
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1539   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1540   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1541   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1542   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 /*@C
1547    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1548 
1549    Collective
1550 
1551    Input Arguments:
1552 +  sf - star forest
1553 .  unit - data type
1554 -  leafdata - leaf data to gather to roots
1555 
1556    Output Argument:
1557 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1558 
1559    Level: intermediate
1560 
1561 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1562 @*/
1563 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1564 {
1565   PetscErrorCode ierr;
1566   PetscSF        multi;
1567 
1568   PetscFunctionBegin;
1569   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1570   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1571   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1572   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 /*@C
1577    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1578 
1579    Collective
1580 
1581    Input Arguments:
1582 +  sf - star forest
1583 .  unit - data type
1584 -  leafdata - leaf data to gather to roots
1585 
1586    Output Argument:
1587 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1588 
1589    Level: intermediate
1590 
1591 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1592 @*/
1593 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1594 {
1595   PetscErrorCode ierr;
1596   PetscSF        multi;
1597 
1598   PetscFunctionBegin;
1599   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1600   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1601   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1602   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 /*@C
1607    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1608 
1609    Collective
1610 
1611    Input Arguments:
1612 +  sf - star forest
1613 .  unit - data type
1614 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1615 
1616    Output Argument:
1617 .  leafdata - leaf data to be update with personal data from each respective root
1618 
1619    Level: intermediate
1620 
1621 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1622 @*/
1623 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1624 {
1625   PetscErrorCode ierr;
1626   PetscSF        multi;
1627 
1628   PetscFunctionBegin;
1629   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1630   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1631   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1632   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 /*@C
1637    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1638 
1639    Collective
1640 
1641    Input Arguments:
1642 +  sf - star forest
1643 .  unit - data type
1644 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1645 
1646    Output Argument:
1647 .  leafdata - leaf data to be update with personal data from each respective root
1648 
1649    Level: intermediate
1650 
1651 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1652 @*/
1653 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1654 {
1655   PetscErrorCode ierr;
1656   PetscSF        multi;
1657 
1658   PetscFunctionBegin;
1659   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1660   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1661   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1662   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 /*@
1667   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1668 
1669   Input Parameters:
1670 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1671 - sfB - The second PetscSF, whose number of roots must be equal to number of leaves of sfA on each processor
1672 
1673   Output Parameters:
1674 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on sfB
1675 
1676   Level: developer
1677 
1678 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1679 @*/
1680 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1681 {
1682   PetscErrorCode    ierr;
1683   MPI_Comm          comm;
1684   const PetscSFNode *remotePointsA,*remotePointsB;
1685   PetscSFNode       *remotePointsBA;
1686   const PetscInt    *localPointsA,*localPointsB;
1687   PetscInt          numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;
1688 
1689   PetscFunctionBegin;
1690   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1691   PetscSFCheckGraphSet(sfA,1);
1692   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1693   PetscSFCheckGraphSet(sfB,2);
1694   PetscValidPointer(sfBA,3);
1695   ierr = PetscObjectGetComm((PetscObject)sfA,&comm);CHKERRQ(ierr);
1696   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1697   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1698   ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr);
1699   if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1700   if (numRootsB != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of roots must be equal to the first SF's number of leaves");
1701   ierr = PetscMalloc1(numLeavesB,&remotePointsBA);CHKERRQ(ierr);
1702   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA);CHKERRQ(ierr);
1703   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA);CHKERRQ(ierr);
1704   ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr);
1705   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesB,localPointsB,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 /*@
1710   PetscSFComposeInverse - Compose a new PetscSF by putting inverse of the second SF under the first one
1711 
1712   Input Parameters:
1713 + sfA - The first PetscSF, whose local space may be a permutation, but can not be sparse.
1714 - sfB - The second PetscSF, whose number of leaves must be equal to number of leaves of sfA on each processor. All roots must have degree 1.
1715 
1716   Output Parameters:
1717 . sfBA - The composite SF. Doing a Bcast on the new SF is equvalent to doing Bcast on sfA, then Bcast on inverse of sfB
1718 
1719   Level: developer
1720 
1721 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
1722 @*/
1723 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1724 {
1725   PetscErrorCode    ierr;
1726   MPI_Comm          comm;
1727   const PetscSFNode *remotePointsA,*remotePointsB;
1728   PetscSFNode       *remotePointsBA;
1729   const PetscInt    *localPointsA,*localPointsB;
1730   PetscInt          numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf;
1731 
1732   PetscFunctionBegin;
1733   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1734   PetscSFCheckGraphSet(sfA,1);
1735   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1736   PetscSFCheckGraphSet(sfB,2);
1737   PetscValidPointer(sfBA,3);
1738   ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr);
1739   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1740   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1741   ierr = PetscSFGetLeafRange(sfA,&minleaf,&maxleaf);CHKERRQ(ierr);
1742   if (maxleaf+1-minleaf != numLeavesA) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The first SF can not have sparse local space");
1743   if (numLeavesA != numLeavesB) SETERRQ(comm,PETSC_ERR_ARG_INCOMP,"The second SF's number of leaves must be equal to the first SF's number of leaves");
1744   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
1745   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr);
1746   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,remotePointsA,remotePointsBA,MPIU_REPLACE);CHKERRQ(ierr);
1747   ierr = PetscSFCreate(comm,sfBA);CHKERRQ(ierr);
1748   ierr = PetscSFSetGraph(*sfBA,numRootsA,numRootsB,NULL,PETSC_COPY_VALUES,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 /*
1753   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
1754 
1755   Input Parameters:
1756 . sf - The global PetscSF
1757 
1758   Output Parameters:
1759 . out - The local PetscSF
1760  */
1761 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
1762 {
1763   MPI_Comm           comm;
1764   PetscMPIInt        myrank;
1765   const PetscInt     *ilocal;
1766   const PetscSFNode  *iremote;
1767   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
1768   PetscSFNode        *liremote;
1769   PetscSF            lsf;
1770   PetscErrorCode     ierr;
1771 
1772   PetscFunctionBegin;
1773   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1774   if (sf->ops->CreateLocalSF) {
1775     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
1776   } else {
1777     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
1778     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1779     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
1780 
1781     /* Find out local edges and build a local SF */
1782     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1783     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
1784     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
1785     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
1786 
1787     for (i=j=0; i<nleaves; i++) {
1788       if (iremote[i].rank == (PetscInt)myrank) {
1789         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
1790         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
1791         liremote[j].index = iremote[i].index;
1792         j++;
1793       }
1794     }
1795     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
1796     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1797     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
1798     *out = lsf;
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
1804 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1805 {
1806   PetscErrorCode     ierr;
1807 
1808   PetscFunctionBegin;
1809   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1810   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1811   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1812   if (sf->ops->BcastToZero) {
1813     ierr = (*sf->ops->BcastToZero)(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
1814   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
1815   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819