xref: /petsc/src/vec/is/sf/interface/sf.c (revision 373e0d914d8d7bc4efff479c8f6ebb4174f33a0f)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petscctable.h>
3 
4 #if defined(PETSC_USE_DEBUG)
5 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
6     if (PetscUnlikely(!(sf)->graphset))                              \
7       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
8   } while (0)
9 #else
10 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
11 #endif
12 
13 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",0};
14 
15 /*@
16    PetscSFCreate - create a star forest communication context
17 
18    Not Collective
19 
20    Input Arguments:
21 .  comm - communicator on which the star forest will operate
22 
23    Output Arguments:
24 .  sf - new star forest context
25 
26    Level: intermediate
27 
28 .seealso: PetscSFSetGraph(), PetscSFDestroy()
29 @*/
30 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
31 {
32   PetscErrorCode ierr;
33   PetscSF        b;
34 
35   PetscFunctionBegin;
36   PetscValidPointer(sf,2);
37   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
38 
39   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
40 
41   b->nroots    = -1;
42   b->nleaves   = -1;
43   b->minleaf   = PETSC_MAX_INT;
44   b->maxleaf   = PETSC_MIN_INT;
45   b->nranks    = -1;
46   b->rankorder = PETSC_TRUE;
47   b->ingroup   = MPI_GROUP_NULL;
48   b->outgroup  = MPI_GROUP_NULL;
49   b->graphset  = PETSC_FALSE;
50 
51   *sf = b;
52   PetscFunctionReturn(0);
53 }
54 
55 /*@
56    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
57 
58    Collective
59 
60    Input Arguments:
61 .  sf - star forest
62 
63    Level: advanced
64 
65 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
66 @*/
67 PetscErrorCode PetscSFReset(PetscSF sf)
68 {
69   PetscErrorCode ierr;
70 
71   PetscFunctionBegin;
72   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
73   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
74   sf->nroots   = -1;
75   sf->nleaves  = -1;
76   sf->minleaf  = PETSC_MAX_INT;
77   sf->maxleaf  = PETSC_MIN_INT;
78   sf->mine     = NULL;
79   sf->remote   = NULL;
80   sf->graphset = PETSC_FALSE;
81   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
82   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
83   sf->nranks = -1;
84   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
85   sf->degreeknown = PETSC_FALSE;
86   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
87   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
88   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
89   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
90   sf->setupcalled = PETSC_FALSE;
91   PetscFunctionReturn(0);
92 }
93 
94 /*@C
95    PetscSFSetType - Set the PetscSF communication implementation
96 
97    Collective on PetscSF
98 
99    Input Parameters:
100 +  sf - the PetscSF context
101 -  type - a known method
102 
103    Options Database Key:
104 .  -sf_type <type> - Sets the method; use -help for a list
105    of available methods (for instance, window, pt2pt, neighbor)
106 
107    Notes:
108    See "include/petscsf.h" for available methods (for instance)
109 +    PETSCSFWINDOW - MPI-2/3 one-sided
110 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
111 
112   Level: intermediate
113 
114 .keywords: PetscSF, set, type
115 
116 .seealso: PetscSFType, PetscSFCreate()
117 @*/
118 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
119 {
120   PetscErrorCode ierr,(*r)(PetscSF);
121   PetscBool      match;
122 
123   PetscFunctionBegin;
124   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
125   PetscValidCharPointer(type,2);
126 
127   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
128   if (match) PetscFunctionReturn(0);
129 
130   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
131   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
132   /* Destroy the previous PetscSF implementation context */
133   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
134   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
135   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
136   ierr = (*r)(sf);CHKERRQ(ierr);
137   PetscFunctionReturn(0);
138 }
139 
140 /*@C
141   PetscSFGetType - Get the PetscSF communication implementation
142 
143   Not Collective
144 
145   Input Parameter:
146 . sf  - the PetscSF context
147 
148   Output Parameter:
149 . type - the PetscSF type name
150 
151   Level: intermediate
152 
153 .keywords: PetscSF, get, type
154 .seealso: PetscSFSetType(), PetscSFCreate()
155 @*/
156 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
157 {
158   PetscFunctionBegin;
159   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
160   PetscValidPointer(type,2);
161   *type = ((PetscObject)sf)->type_name;
162   PetscFunctionReturn(0);
163 }
164 
165 /*@
166    PetscSFDestroy - destroy star forest
167 
168    Collective
169 
170    Input Arguments:
171 .  sf - address of star forest
172 
173    Level: intermediate
174 
175 .seealso: PetscSFCreate(), PetscSFReset()
176 @*/
177 PetscErrorCode PetscSFDestroy(PetscSF *sf)
178 {
179   PetscErrorCode ierr;
180 
181   PetscFunctionBegin;
182   if (!*sf) PetscFunctionReturn(0);
183   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
184   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
185   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
186   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
187   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
188   PetscFunctionReturn(0);
189 }
190 
191 /*@
192    PetscSFSetUp - set up communication structures
193 
194    Collective
195 
196    Input Arguments:
197 .  sf - star forest communication object
198 
199    Level: beginner
200 
201 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
202 @*/
203 PetscErrorCode PetscSFSetUp(PetscSF sf)
204 {
205   PetscErrorCode ierr;
206 
207   PetscFunctionBegin;
208   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
209   PetscSFCheckGraphSet(sf,1);
210   if (sf->setupcalled) PetscFunctionReturn(0);
211   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);}
212   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
213   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
214   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
215   sf->setupcalled = PETSC_TRUE;
216   PetscFunctionReturn(0);
217 }
218 
219 /*@
220    PetscSFSetFromOptions - set PetscSF options using the options database
221 
222    Logically Collective
223 
224    Input Arguments:
225 .  sf - star forest
226 
227    Options Database Keys:
228 +  -sf_type - implementation type, see PetscSFSetType()
229 -  -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
230 
231    Level: intermediate
232 
233 .keywords: KSP, set, from, options, database
234 
235 .seealso: PetscSFWindowSetSyncType()
236 @*/
237 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
238 {
239   PetscSFType    deft;
240   char           type[256];
241   PetscErrorCode ierr;
242   PetscBool      flg;
243 
244   PetscFunctionBegin;
245   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
246   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
247   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
248   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
249   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
250   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
251   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
252   ierr = PetscOptionsEnd();CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 /*@
257    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
258 
259    Logically Collective
260 
261    Input Arguments:
262 +  sf - star forest
263 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
264 
265    Level: advanced
266 
267 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
268 @*/
269 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
270 {
271 
272   PetscFunctionBegin;
273   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
274   PetscValidLogicalCollectiveBool(sf,flg,2);
275   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
276   sf->rankorder = flg;
277   PetscFunctionReturn(0);
278 }
279 
280 /*@
281    PetscSFSetGraph - Set a parallel star forest
282 
283    Collective
284 
285    Input Arguments:
286 +  sf - star forest
287 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
288 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
289 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage
290 .  localmode - copy mode for ilocal
291 .  iremote - remote locations of root vertices for each leaf on the current process
292 -  remotemode - copy mode for iremote
293 
294    Level: intermediate
295 
296    Notes: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
297 
298 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
299 @*/
300 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
301 {
302   PetscErrorCode ierr;
303 
304   PetscFunctionBegin;
305   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
306   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
307   if (nleaves > 0) PetscValidPointer(iremote,6);
308   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
309   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
310 
311   ierr = PetscSFReset(sf);CHKERRQ(ierr);
312   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
313 
314   sf->nroots  = nroots;
315   sf->nleaves = nleaves;
316 
317   if (nleaves && ilocal) {
318     PetscInt i;
319     PetscInt minleaf = PETSC_MAX_INT;
320     PetscInt maxleaf = PETSC_MIN_INT;
321     for (i=0; i<nleaves; i++) {
322       minleaf = PetscMin(minleaf,ilocal[i]);
323       maxleaf = PetscMax(maxleaf,ilocal[i]);
324     }
325     sf->minleaf = minleaf;
326     sf->maxleaf = maxleaf;
327   } else {
328     sf->minleaf = 0;
329     sf->maxleaf = nleaves - 1;
330   }
331 
332   if (ilocal) {
333     switch (localmode) {
334     case PETSC_COPY_VALUES:
335       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
336       ierr = PetscMemcpy(sf->mine_alloc,ilocal,nleaves*sizeof(*ilocal));CHKERRQ(ierr);
337       sf->mine = sf->mine_alloc;
338       break;
339     case PETSC_OWN_POINTER:
340       sf->mine_alloc = (PetscInt*)ilocal;
341       sf->mine       = sf->mine_alloc;
342       break;
343     case PETSC_USE_POINTER:
344       sf->mine_alloc = NULL;
345       sf->mine       = (PetscInt*)ilocal;
346       break;
347     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
348     }
349   }
350 
351   switch (remotemode) {
352   case PETSC_COPY_VALUES:
353     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
354     ierr = PetscMemcpy(sf->remote_alloc,iremote,nleaves*sizeof(*iremote));CHKERRQ(ierr);
355     sf->remote = sf->remote_alloc;
356     break;
357   case PETSC_OWN_POINTER:
358     sf->remote_alloc = (PetscSFNode*)iremote;
359     sf->remote       = sf->remote_alloc;
360     break;
361   case PETSC_USE_POINTER:
362     sf->remote_alloc = NULL;
363     sf->remote       = (PetscSFNode*)iremote;
364     break;
365   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
366   }
367 
368   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
369   sf->graphset = PETSC_TRUE;
370   PetscFunctionReturn(0);
371 }
372 
373 /*@
374    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
375 
376    Collective
377 
378    Input Arguments:
379 .  sf - star forest to invert
380 
381    Output Arguments:
382 .  isf - inverse of sf
383 
384    Level: advanced
385 
386    Notes:
387    All roots must have degree 1.
388 
389    The local space may be a permutation, but cannot be sparse.
390 
391 .seealso: PetscSFSetGraph()
392 @*/
393 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
394 {
395   PetscErrorCode ierr;
396   PetscMPIInt    rank;
397   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
398   const PetscInt *ilocal;
399   PetscSFNode    *roots,*leaves;
400 
401   PetscFunctionBegin;
402   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
403   PetscSFCheckGraphSet(sf,1);
404   PetscValidPointer(isf,2);
405 
406   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
407   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
408 
409   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
410   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
411   for (i=0; i<maxlocal; i++) {
412     leaves[i].rank  = rank;
413     leaves[i].index = i;
414   }
415   for (i=0; i <nroots; i++) {
416     roots[i].rank  = -1;
417     roots[i].index = -1;
418   }
419   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
420   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
421 
422   /* Check whether our leaves are sparse */
423   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
424   if (count == nroots) newilocal = NULL;
425   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
426     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
427     for (i=0,count=0; i<nroots; i++) {
428       if (roots[i].rank >= 0) {
429         newilocal[count]   = i;
430         roots[count].rank  = roots[i].rank;
431         roots[count].index = roots[i].index;
432         count++;
433       }
434     }
435   }
436 
437   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
438   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
439   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 /*@
444    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
445 
446    Collective
447 
448    Input Arguments:
449 +  sf - communication object to duplicate
450 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
451 
452    Output Arguments:
453 .  newsf - new communication object
454 
455    Level: beginner
456 
457 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
458 @*/
459 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
460 {
461   PetscSFType    type;
462   PetscErrorCode ierr;
463 
464   PetscFunctionBegin;
465   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
466   PetscValidLogicalCollectiveEnum(sf,opt,2);
467   PetscValidPointer(newsf,3);
468   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
469   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
470   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
471   if (opt == PETSCSF_DUPLICATE_GRAPH) {
472     PetscInt          nroots,nleaves;
473     const PetscInt    *ilocal;
474     const PetscSFNode *iremote;
475     PetscSFCheckGraphSet(sf,1);
476     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
477     ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
478   }
479   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
480   PetscFunctionReturn(0);
481 }
482 
483 /*@C
484    PetscSFGetGraph - Get the graph specifying a parallel star forest
485 
486    Not Collective
487 
488    Input Arguments:
489 .  sf - star forest
490 
491    Output Arguments:
492 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
493 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
494 .  ilocal - locations of leaves in leafdata buffers
495 -  iremote - remote locations of root vertices for each leaf on the current process
496 
497    Notes:
498    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
499 
500    Level: intermediate
501 
502 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
503 @*/
504 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
505 {
506 
507   PetscFunctionBegin;
508   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
509   if (nroots) *nroots = sf->nroots;
510   if (nleaves) *nleaves = sf->nleaves;
511   if (ilocal) *ilocal = sf->mine;
512   if (iremote) *iremote = sf->remote;
513   PetscFunctionReturn(0);
514 }
515 
516 /*@
517    PetscSFGetLeafRange - Get the active leaf ranges
518 
519    Not Collective
520 
521    Input Arguments:
522 .  sf - star forest
523 
524    Output Arguments:
525 +  minleaf - minimum active leaf on this process
526 -  maxleaf - maximum active leaf on this process
527 
528    Level: developer
529 
530 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
531 @*/
532 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
533 {
534 
535   PetscFunctionBegin;
536   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
537   PetscSFCheckGraphSet(sf,1);
538   if (minleaf) *minleaf = sf->minleaf;
539   if (maxleaf) *maxleaf = sf->maxleaf;
540   PetscFunctionReturn(0);
541 }
542 
543 /*@C
544    PetscSFView - view a star forest
545 
546    Collective
547 
548    Input Arguments:
549 +  sf - star forest
550 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
551 
552    Level: beginner
553 
554 .seealso: PetscSFCreate(), PetscSFSetGraph()
555 @*/
556 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
557 {
558   PetscErrorCode    ierr;
559   PetscBool         iascii;
560   PetscViewerFormat format;
561 
562   PetscFunctionBegin;
563   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
564   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
565   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
566   PetscCheckSameComm(sf,1,viewer,2);
567   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
568   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
569   if (iascii) {
570     PetscMPIInt rank;
571     PetscInt    ii,i,j;
572 
573     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
574     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
575     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
576     if (!sf->graphset) {
577       ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
578       ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
579       PetscFunctionReturn(0);
580     }
581     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
582     ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
583     ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
584     for (i=0; i<sf->nleaves; i++) {
585       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
586     }
587     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
588     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
589     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
590       PetscMPIInt *tmpranks,*perm;
591       ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
592       ierr = PetscMemcpy(tmpranks,sf->ranks,sf->nranks*sizeof(tmpranks[0]));CHKERRQ(ierr);
593       for (i=0; i<sf->nranks; i++) perm[i] = i;
594       ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
595       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
596       for (ii=0; ii<sf->nranks; ii++) {
597         i = perm[ii];
598         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
599         for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
600           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
601         }
602       }
603       ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
604     }
605     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
606     ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
607     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 /*@C
613    PetscSFGetRanks - Get ranks and number of vertices referenced by leaves on this process
614 
615    Not Collective
616 
617    Input Arguments:
618 .  sf - star forest
619 
620    Output Arguments:
621 +  nranks - number of ranks referenced by local part
622 .  ranks - array of ranks
623 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
624 .  rmine - concatenated array holding local indices referencing each remote rank
625 -  rremote - concatenated array holding remote indices referenced for each remote rank
626 
627    Level: developer
628 
629 .seealso: PetscSFSetGraph()
630 @*/
631 PetscErrorCode PetscSFGetRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
632 {
633 
634   PetscFunctionBegin;
635   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
636   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
637   if (nranks)  *nranks  = sf->nranks;
638   if (ranks)   *ranks   = sf->ranks;
639   if (roffset) *roffset = sf->roffset;
640   if (rmine)   *rmine   = sf->rmine;
641   if (rremote) *rremote = sf->rremote;
642   PetscFunctionReturn(0);
643 }
644 
645 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
646   PetscInt i;
647   for (i=0; i<n; i++) {
648     if (needle == list[i]) return PETSC_TRUE;
649   }
650   return PETSC_FALSE;
651 }
652 
653 /*@C
654    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
655 
656    Collective
657 
658    Input Arguments:
659 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
660 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
661 
662    Level: developer
663 
664 .seealso: PetscSFGetRanks()
665 @*/
666 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
667 {
668   PetscErrorCode     ierr;
669   PetscTable         table;
670   PetscTablePosition pos;
671   PetscMPIInt        size,groupsize,*groupranks;
672   PetscInt           i,*rcount,*ranks;
673 
674   PetscFunctionBegin;
675   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
676   PetscSFCheckGraphSet(sf,1);
677   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
678   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
679   for (i=0; i<sf->nleaves; i++) {
680     /* Log 1-based rank */
681     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
682   }
683   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
684   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
685   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
686   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
687   for (i=0; i<sf->nranks; i++) {
688     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
689     ranks[i]--;             /* Convert back to 0-based */
690   }
691   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
692 
693   /* We expect that dgroup is reliably "small" while nranks could be large */
694   {
695     MPI_Group group = MPI_GROUP_NULL;
696     PetscMPIInt *dgroupranks;
697     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
698     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
699     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
700     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
701     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
702     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
703     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
704     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
705   }
706 
707   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
708   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i; ) {
709     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
710       if (InList(ranks[i],groupsize,groupranks)) break;
711     }
712     for ( ; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
713       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
714     }
715     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
716       PetscInt    tmprank,tmpcount;
717       tmprank = ranks[i];
718       tmpcount = rcount[i];
719       ranks[i] = ranks[sf->ndranks];
720       rcount[i] = rcount[sf->ndranks];
721       ranks[sf->ndranks] = tmprank;
722       rcount[sf->ndranks] = tmpcount;
723       sf->ndranks++;
724     }
725   }
726   ierr = PetscFree(groupranks);CHKERRQ(ierr);
727   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
728   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
729   sf->roffset[0] = 0;
730   for (i=0; i<sf->nranks; i++) {
731     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
732     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
733     rcount[i]        = 0;
734   }
735   for (i=0; i<sf->nleaves; i++) {
736     PetscInt irank;
737     /* Search for index of iremote[i].rank in sf->ranks */
738     ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
739     if (irank < 0) {
740       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
741       if (irank >= 0) irank += sf->ndranks;
742     }
743     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
744     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
745     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
746     rcount[irank]++;
747   }
748   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 /*@C
753    PetscSFGetGroups - gets incoming and outgoing process groups
754 
755    Collective
756 
757    Input Argument:
758 .  sf - star forest
759 
760    Output Arguments:
761 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
762 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
763 
764    Level: developer
765 
766 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
767 @*/
768 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
769 {
770   PetscErrorCode ierr;
771   MPI_Group      group = MPI_GROUP_NULL;
772 
773   PetscFunctionBegin;
774   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining groups");
775   if (sf->ingroup == MPI_GROUP_NULL) {
776     PetscInt       i;
777     const PetscInt *indegree;
778     PetscMPIInt    rank,*outranks,*inranks;
779     PetscSFNode    *remote;
780     PetscSF        bgcount;
781 
782     /* Compute the number of incoming ranks */
783     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
784     for (i=0; i<sf->nranks; i++) {
785       remote[i].rank  = sf->ranks[i];
786       remote[i].index = 0;
787     }
788     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
789     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
790     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
791     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
792 
793     /* Enumerate the incoming ranks */
794     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
795     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
796     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
797     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
798     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
799     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
800     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
801     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
802     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
803     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
804   }
805   *incoming = sf->ingroup;
806 
807   if (sf->outgroup == MPI_GROUP_NULL) {
808     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
809     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
810     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
811   }
812   *outgoing = sf->outgroup;
813   PetscFunctionReturn(0);
814 }
815 
816 /*@
817    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
818 
819    Collective
820 
821    Input Argument:
822 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
823 
824    Output Arguments:
825 .  multi - star forest with split roots, such that each root has degree exactly 1
826 
827    Level: developer
828 
829    Notes:
830 
831    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
832    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
833    edge, it is a candidate for future optimization that might involve its removal.
834 
835 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin()
836 @*/
837 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
838 {
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
843   PetscValidPointer(multi,2);
844   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
845     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
846     *multi = sf->multi;
847     PetscFunctionReturn(0);
848   }
849   if (!sf->multi) {
850     const PetscInt *indegree;
851     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
852     PetscSFNode    *remote;
853     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
854     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
855     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
856     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
857     inoffset[0] = 0;
858     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
859     for (i=0; i<maxlocal; i++) outones[i] = 1;
860     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
861     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
862     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
863 #if 0
864 #if defined(PETSC_USE_DEBUG)                                 /* Check that the expected number of increments occurred */
865     for (i=0; i<sf->nroots; i++) {
866       if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
867     }
868 #endif
869 #endif
870     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
871     for (i=0; i<sf->nleaves; i++) {
872       remote[i].rank  = sf->remote[i].rank;
873       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
874     }
875     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
876     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
877     if (sf->rankorder) {        /* Sort the ranks */
878       PetscMPIInt rank;
879       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
880       PetscSFNode *newremote;
881       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
882       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
883       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
884       for (i=0; i<maxlocal; i++) outranks[i] = rank;
885       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
886       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
887       /* Sort the incoming ranks at each vertex, build the inverse map */
888       for (i=0; i<sf->nroots; i++) {
889         PetscInt j;
890         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
891         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
892         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
893       }
894       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
895       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
896       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
897       for (i=0; i<sf->nleaves; i++) {
898         newremote[i].rank  = sf->remote[i].rank;
899         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
900       }
901       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
902       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
903     }
904     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
905   }
906   *multi = sf->multi;
907   PetscFunctionReturn(0);
908 }
909 
910 /*@C
911    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
912 
913    Collective
914 
915    Input Arguments:
916 +  sf - original star forest
917 .  nroots - number of roots to select on this process
918 -  selected - selected roots on this process
919 
920    Output Arguments:
921 .  newsf - new star forest
922 
923    Level: advanced
924 
925    Note:
926    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
927    be done by calling PetscSFGetGraph().
928 
929 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
930 @*/
931 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nroots,const PetscInt *selected,PetscSF *newsf)
932 {
933   PetscInt      *rootdata, *leafdata, *ilocal;
934   PetscSFNode   *iremote;
935   PetscInt       leafsize = 0, nleaves = 0, n, i;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
940   PetscSFCheckGraphSet(sf,1);
941   if (nroots) PetscValidPointer(selected,3);
942   PetscValidPointer(newsf,4);
943   if (sf->mine) for (i = 0; i < sf->nleaves; ++i) {leafsize = PetscMax(leafsize, sf->mine[i]+1);}
944   else leafsize = sf->nleaves;
945   ierr = PetscCalloc2(sf->nroots,&rootdata,leafsize,&leafdata);CHKERRQ(ierr);
946   for (i=0; i<nroots; ++i) rootdata[selected[i]] = 1;
947   ierr = PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
948   ierr = PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);CHKERRQ(ierr);
949   for (i = 0; i < leafsize; ++i) nleaves += leafdata[i];
950   ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
951   ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
952   for (i = 0, n = 0; i < sf->nleaves; ++i) {
953     const PetscInt lidx = sf->mine ? sf->mine[i] : i;
954 
955     if (leafdata[lidx]) {
956       ilocal[n]        = lidx;
957       iremote[n].rank  = sf->remote[i].rank;
958       iremote[n].index = sf->remote[i].index;
959       ++n;
960     }
961   }
962   if (n != nleaves) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "There is a size mismatch in the SF embedding, %d != %d", n, nleaves);
963   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,newsf);CHKERRQ(ierr);
964   ierr = PetscSFSetGraph(*newsf,sf->nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
965   ierr = PetscFree2(rootdata,leafdata);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 /*@C
970   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
971 
972   Collective
973 
974   Input Arguments:
975 + sf - original star forest
976 . nleaves - number of leaves to select on this process
977 - selected - selected leaves on this process
978 
979   Output Arguments:
980 .  newsf - new star forest
981 
982   Level: advanced
983 
984 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
985 @*/
986 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf, PetscInt nleaves, const PetscInt *selected, PetscSF *newsf)
987 {
988   PetscSFNode   *iremote;
989   PetscInt      *ilocal;
990   PetscInt       i;
991   PetscErrorCode ierr;
992 
993   PetscFunctionBegin;
994   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID, 1);
995   PetscSFCheckGraphSet(sf, 1);
996   if (nleaves) PetscValidPointer(selected, 3);
997   PetscValidPointer(newsf, 4);
998   ierr = PetscMalloc1(nleaves, &ilocal);CHKERRQ(ierr);
999   ierr = PetscMalloc1(nleaves, &iremote);CHKERRQ(ierr);
1000   for (i = 0; i < nleaves; ++i) {
1001     const PetscInt l = selected[i];
1002 
1003     ilocal[i]        = sf->mine ? sf->mine[l] : l;
1004     iremote[i].rank  = sf->remote[l].rank;
1005     iremote[i].index = sf->remote[l].index;
1006   }
1007   ierr = PetscSFDuplicate(sf, PETSCSF_DUPLICATE_RANKS, newsf);CHKERRQ(ierr);
1008   ierr = PetscSFSetGraph(*newsf, sf->nroots, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 /*@C
1013    PetscSFBcastBegin - begin pointwise broadcast to be concluded with call to PetscSFBcastEnd()
1014 
1015    Collective on PetscSF
1016 
1017    Input Arguments:
1018 +  sf - star forest on which to communicate
1019 .  unit - data type associated with each node
1020 -  rootdata - buffer to broadcast
1021 
1022    Output Arguments:
1023 .  leafdata - buffer to update with values from each leaf's respective root
1024 
1025    Level: intermediate
1026 
1027 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFView(), PetscSFBcastEnd(), PetscSFReduceBegin()
1028 @*/
1029 PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1030 {
1031   PetscErrorCode ierr;
1032 
1033   PetscFunctionBegin;
1034   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1035   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1036   ierr = PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
1037   ierr = (*sf->ops->BcastBegin)(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
1038   ierr = PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);CHKERRQ(ierr);
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 /*@C
1043    PetscSFBcastEnd - end a broadcast operation started with PetscSFBcastBegin()
1044 
1045    Collective
1046 
1047    Input Arguments:
1048 +  sf - star forest
1049 .  unit - data type
1050 -  rootdata - buffer to broadcast
1051 
1052    Output Arguments:
1053 .  leafdata - buffer to update with values from each leaf's respective root
1054 
1055    Level: intermediate
1056 
1057 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1058 @*/
1059 PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
1060 {
1061   PetscErrorCode ierr;
1062 
1063   PetscFunctionBegin;
1064   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1065   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1066   ierr = PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);
1067   ierr = (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata);CHKERRQ(ierr);
1068   ierr = PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 /*@C
1073    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1074 
1075    Collective
1076 
1077    Input Arguments:
1078 +  sf - star forest
1079 .  unit - data type
1080 .  leafdata - values to reduce
1081 -  op - reduction operation
1082 
1083    Output Arguments:
1084 .  rootdata - result of reduction of values from all leaves of each root
1085 
1086    Level: intermediate
1087 
1088 .seealso: PetscSFBcastBegin()
1089 @*/
1090 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1091 {
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1096   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1097   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1098   ierr = (sf->ops->ReduceBegin)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1099   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /*@C
1104    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1105 
1106    Collective
1107 
1108    Input Arguments:
1109 +  sf - star forest
1110 .  unit - data type
1111 .  leafdata - values to reduce
1112 -  op - reduction operation
1113 
1114    Output Arguments:
1115 .  rootdata - result of reduction of values from all leaves of each root
1116 
1117    Level: intermediate
1118 
1119 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1120 @*/
1121 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1122 {
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1127   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1128   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1129   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1130   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 /*@C
1135    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1136 
1137    Collective
1138 
1139    Input Arguments:
1140 .  sf - star forest
1141 
1142    Output Arguments:
1143 .  degree - degree of each root vertex
1144 
1145    Level: advanced
1146 
1147 .seealso: PetscSFGatherBegin()
1148 @*/
1149 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1150 {
1151   PetscErrorCode ierr;
1152 
1153   PetscFunctionBegin;
1154   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1155   PetscSFCheckGraphSet(sf,1);
1156   PetscValidPointer(degree,2);
1157   if (!sf->degreeknown) {
1158     PetscInt i, nroots = sf->nroots, maxlocal = sf->maxleaf+1;  /* TODO: We should use PetscSFGetLeafRange() */
1159     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1160     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1161     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1162     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1163     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1164     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1165   }
1166   *degree = NULL;
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 /*@C
1171    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1172 
1173    Collective
1174 
1175    Input Arguments:
1176 .  sf - star forest
1177 
1178    Output Arguments:
1179 .  degree - degree of each root vertex
1180 
1181    Level: developer
1182 
1183 .seealso:
1184 @*/
1185 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1186 {
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1191   PetscSFCheckGraphSet(sf,1);
1192   PetscValidPointer(degree,2);
1193   if (!sf->degreeknown) {
1194     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1195     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp,sf->degree,MPI_SUM);CHKERRQ(ierr);
1196     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1197     sf->degreeknown = PETSC_TRUE;
1198   }
1199   *degree = sf->degree;
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 /*@C
1204    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1205 
1206    Collective
1207 
1208    Input Arguments:
1209 +  sf - star forest
1210 .  unit - data type
1211 .  leafdata - leaf values to use in reduction
1212 -  op - operation to use for reduction
1213 
1214    Output Arguments:
1215 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1216 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1217 
1218    Level: advanced
1219 
1220    Note:
1221    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1222    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1223    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1224    integers.
1225 
1226 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1227 @*/
1228 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1229 {
1230   PetscErrorCode ierr;
1231 
1232   PetscFunctionBegin;
1233   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1234   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1235   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1236   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1237   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 /*@C
1242    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1243 
1244    Collective
1245 
1246    Input Arguments:
1247 +  sf - star forest
1248 .  unit - data type
1249 .  leafdata - leaf values to use in reduction
1250 -  op - operation to use for reduction
1251 
1252    Output Arguments:
1253 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1254 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1255 
1256    Level: advanced
1257 
1258 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1259 @*/
1260 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1261 {
1262   PetscErrorCode ierr;
1263 
1264   PetscFunctionBegin;
1265   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1266   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1267   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1268   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1269   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 /*@C
1274    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1275 
1276    Collective
1277 
1278    Input Arguments:
1279 +  sf - star forest
1280 .  unit - data type
1281 -  leafdata - leaf data to gather to roots
1282 
1283    Output Argument:
1284 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1285 
1286    Level: intermediate
1287 
1288 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1289 @*/
1290 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1291 {
1292   PetscErrorCode ierr;
1293   PetscSF        multi;
1294 
1295   PetscFunctionBegin;
1296   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1297   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1298   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1299   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 /*@C
1304    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1305 
1306    Collective
1307 
1308    Input Arguments:
1309 +  sf - star forest
1310 .  unit - data type
1311 -  leafdata - leaf data to gather to roots
1312 
1313    Output Argument:
1314 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1315 
1316    Level: intermediate
1317 
1318 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1319 @*/
1320 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1321 {
1322   PetscErrorCode ierr;
1323   PetscSF        multi;
1324 
1325   PetscFunctionBegin;
1326   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1327   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1328   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1329   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 /*@C
1334    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1335 
1336    Collective
1337 
1338    Input Arguments:
1339 +  sf - star forest
1340 .  unit - data type
1341 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1342 
1343    Output Argument:
1344 .  leafdata - leaf data to be update with personal data from each respective root
1345 
1346    Level: intermediate
1347 
1348 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1349 @*/
1350 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1351 {
1352   PetscErrorCode ierr;
1353   PetscSF        multi;
1354 
1355   PetscFunctionBegin;
1356   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1357   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1358   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1359   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 /*@C
1364    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1365 
1366    Collective
1367 
1368    Input Arguments:
1369 +  sf - star forest
1370 .  unit - data type
1371 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1372 
1373    Output Argument:
1374 .  leafdata - leaf data to be update with personal data from each respective root
1375 
1376    Level: intermediate
1377 
1378 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1379 @*/
1380 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1381 {
1382   PetscErrorCode ierr;
1383   PetscSF        multi;
1384 
1385   PetscFunctionBegin;
1386   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1387   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1388   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1389   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /*@
1394   PetscSFCompose - Compose a new PetscSF equivalent to action to PetscSFs
1395 
1396   Input Parameters:
1397 + sfA - The first PetscSF
1398 - sfB - The second PetscSF
1399 
1400   Output Parameters:
1401 . sfBA - equvalent PetscSF for applying A then B
1402 
1403   Level: developer
1404 
1405 .seealso: PetscSF, PetscSFGetGraph(), PetscSFSetGraph()
1406 @*/
1407 PetscErrorCode PetscSFCompose(PetscSF sfA, PetscSF sfB, PetscSF *sfBA)
1408 {
1409   MPI_Comm           comm;
1410   const PetscSFNode *remotePointsA, *remotePointsB;
1411   PetscSFNode       *remotePointsBA;
1412   const PetscInt    *localPointsA, *localPointsB;
1413   PetscInt           numRootsA, numLeavesA, numRootsB, numLeavesB;
1414   PetscErrorCode     ierr;
1415 
1416   PetscFunctionBegin;
1417   PetscValidHeaderSpecific(sfA, PETSCSF_CLASSID, 1);
1418   PetscSFCheckGraphSet(sfA, 1);
1419   PetscValidHeaderSpecific(sfB, PETSCSF_CLASSID, 2);
1420   PetscSFCheckGraphSet(sfB, 2);
1421   PetscValidPointer(sfBA, 3);
1422   ierr = PetscObjectGetComm((PetscObject) sfA, &comm);CHKERRQ(ierr);
1423   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
1424   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
1425   ierr = PetscMalloc1(numLeavesB, &remotePointsBA);CHKERRQ(ierr);
1426   ierr = PetscSFBcastBegin(sfB, MPIU_2INT, remotePointsA, remotePointsBA);CHKERRQ(ierr);
1427   ierr = PetscSFBcastEnd(sfB, MPIU_2INT, remotePointsA, remotePointsBA);CHKERRQ(ierr);
1428   ierr = PetscSFCreate(comm, sfBA);CHKERRQ(ierr);
1429   ierr = PetscSFSetGraph(*sfBA, numRootsA, numLeavesB, localPointsB, PETSC_COPY_VALUES, remotePointsBA, PETSC_OWN_POINTER);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432