xref: /petsc/src/vec/is/sf/interface/sf.c (revision 59af0bd3658d6c64d35e37f76ad6a8a026fa611f)
1 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2 #include <petsc/private/hashseti.h>
3 #include <petscctable.h>
4 
5 #if defined(PETSC_HAVE_CUDA)
6   #include <cuda_runtime.h>
7 #endif
8 
9 #if defined(PETSC_HAVE_HIP)
10   #include <hip/hip_runtime.h>
11 #endif
12 
13 #if defined(PETSC_USE_DEBUG)
14 #  define PetscSFCheckGraphSet(sf,arg) do {                          \
15     if (PetscUnlikely(!(sf)->graphset))                              \
16       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
17   } while (0)
18 #else
19 #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
20 #endif
21 
22 const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
23 
24 PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
25 {
26   PetscFunctionBegin;
27   PetscValidPointer(type,2);
28   *type = PETSC_MEMTYPE_HOST;
29 #if defined(PETSC_HAVE_CUDA)
30   if (PetscCUDAInitialized && data) {
31     cudaError_t                  cerr;
32     struct cudaPointerAttributes attr;
33     enum cudaMemoryType          mtype;
34     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
35     cudaGetLastError(); /* Reset the last error */
36     #if (CUDART_VERSION < 10000)
37       mtype = attr.memoryType;
38     #else
39       mtype = attr.type;
40     #endif
41     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
42   }
43 #endif
44 
45 #if defined(PETSC_HAVE_HIP)
46   if (PetscHIPInitialized && data) {
47     hipError_t                   cerr;
48     struct hipPointerAttribute_t attr;
49     enum hipMemoryType           mtype;
50     cerr = hipPointerGetAttributes(&attr,data);
51     hipGetLastError(); /* Reset the last error */
52     mtype = attr.memoryType;
53     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
54   }
55 #endif
56   PetscFunctionReturn(0);
57 }
58 
59 /*@
60    PetscSFCreate - create a star forest communication context
61 
62    Collective
63 
64    Input Arguments:
65 .  comm - communicator on which the star forest will operate
66 
67    Output Arguments:
68 .  sf - new star forest context
69 
70    Options Database Keys:
71 +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
72 .  -sf_type window    -Use MPI-3 one-sided window for communication
73 -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
74 
75    Level: intermediate
76 
77    Notes:
78    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80    SFs are optimized and they have better performance than general SFs.
81 
82 .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
83 @*/
84 PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
85 {
86   PetscErrorCode ierr;
87   PetscSF        b;
88 
89   PetscFunctionBegin;
90   PetscValidPointer(sf,2);
91   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
92 
93   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
94 
95   b->nroots    = -1;
96   b->nleaves   = -1;
97   b->minleaf   = PETSC_MAX_INT;
98   b->maxleaf   = PETSC_MIN_INT;
99   b->nranks    = -1;
100   b->rankorder = PETSC_TRUE;
101   b->ingroup   = MPI_GROUP_NULL;
102   b->outgroup  = MPI_GROUP_NULL;
103   b->graphset  = PETSC_FALSE;
104 #if defined(PETSC_HAVE_DEVICE)
105   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
106   b->use_stream_aware_mpi = PETSC_FALSE;
107   b->use_default_stream   = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
108   /* Set the default */
109   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
110     b->backend = PETSCSF_BACKEND_KOKKOS;
111   #elif defined(PETSC_HAVE_CUDA)
112     b->backend = PETSCSF_BACKEND_CUDA;
113   #elif defined(PETSC_HAVE_HIP)
114     b->backend = PETSCSF_BACKEND_HIP;
115   #endif
116 #endif
117   *sf = b;
118   PetscFunctionReturn(0);
119 }
120 
121 /*@
122    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
123 
124    Collective
125 
126    Input Arguments:
127 .  sf - star forest
128 
129    Level: advanced
130 
131 .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
132 @*/
133 PetscErrorCode PetscSFReset(PetscSF sf)
134 {
135   PetscErrorCode ierr;
136 
137   PetscFunctionBegin;
138   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
139   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
140   sf->nroots   = -1;
141   sf->nleaves  = -1;
142   sf->minleaf  = PETSC_MAX_INT;
143   sf->maxleaf  = PETSC_MIN_INT;
144   sf->mine     = NULL;
145   sf->remote   = NULL;
146   sf->graphset = PETSC_FALSE;
147   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
148   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
149   sf->nranks = -1;
150   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
151 #if defined(PETSC_HAVE_DEVICE)
152   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
153 #endif
154   sf->degreeknown = PETSC_FALSE;
155   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
156   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
157   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
158   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
159   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
160   sf->setupcalled = PETSC_FALSE;
161   PetscFunctionReturn(0);
162 }
163 
164 /*@C
165    PetscSFSetType - Set the PetscSF communication implementation
166 
167    Collective on PetscSF
168 
169    Input Parameters:
170 +  sf - the PetscSF context
171 -  type - a known method
172 
173    Options Database Key:
174 .  -sf_type <type> - Sets the method; use -help for a list
175    of available methods (for instance, window, basic, neighbor)
176 
177    Notes:
178    See "include/petscsf.h" for available methods (for instance)
179 +    PETSCSFWINDOW - MPI-2/3 one-sided
180 -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
181 
182   Level: intermediate
183 
184 .seealso: PetscSFType, PetscSFCreate()
185 @*/
186 PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
187 {
188   PetscErrorCode ierr,(*r)(PetscSF);
189   PetscBool      match;
190 
191   PetscFunctionBegin;
192   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
193   PetscValidCharPointer(type,2);
194 
195   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
196   if (match) PetscFunctionReturn(0);
197 
198   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
199   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
200   /* Destroy the previous PetscSF implementation context */
201   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
202   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
203   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
204   ierr = (*r)(sf);CHKERRQ(ierr);
205   PetscFunctionReturn(0);
206 }
207 
208 /*@C
209   PetscSFGetType - Get the PetscSF communication implementation
210 
211   Not Collective
212 
213   Input Parameter:
214 . sf  - the PetscSF context
215 
216   Output Parameter:
217 . type - the PetscSF type name
218 
219   Level: intermediate
220 
221 .seealso: PetscSFSetType(), PetscSFCreate()
222 @*/
223 PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
224 {
225   PetscFunctionBegin;
226   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
227   PetscValidPointer(type,2);
228   *type = ((PetscObject)sf)->type_name;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@
233    PetscSFDestroy - destroy star forest
234 
235    Collective
236 
237    Input Arguments:
238 .  sf - address of star forest
239 
240    Level: intermediate
241 
242 .seealso: PetscSFCreate(), PetscSFReset()
243 @*/
244 PetscErrorCode PetscSFDestroy(PetscSF *sf)
245 {
246   PetscErrorCode ierr;
247 
248   PetscFunctionBegin;
249   if (!*sf) PetscFunctionReturn(0);
250   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
251   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
252   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
253   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
254   ierr = PetscSFDestroy(&(*sf)->vscat.lsf);CHKERRQ(ierr);
255   if ((*sf)->vscat.bs > 1) {ierr = MPI_Type_free(&(*sf)->vscat.unit);CHKERRQ(ierr);}
256   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
257   PetscFunctionReturn(0);
258 }
259 
260 static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
261 {
262   PetscInt           i, nleaves;
263   PetscMPIInt        size;
264   const PetscInt    *ilocal;
265   const PetscSFNode *iremote;
266   PetscErrorCode     ierr;
267 
268   PetscFunctionBegin;
269   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
270   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
272   for (i = 0; i < nleaves; i++) {
273     const PetscInt rank = iremote[i].rank;
274     const PetscInt remote = iremote[i].index;
275     const PetscInt leaf = ilocal ? ilocal[i] : i;
276     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
277     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
278     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 
284 
285 
286 /*@
287    PetscSFSetUp - set up communication structures
288 
289    Collective
290 
291    Input Arguments:
292 .  sf - star forest communication object
293 
294    Level: beginner
295 
296 .seealso: PetscSFSetFromOptions(), PetscSFSetType()
297 @*/
298 PetscErrorCode PetscSFSetUp(PetscSF sf)
299 {
300   PetscErrorCode ierr;
301 
302   PetscFunctionBegin;
303   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
304   PetscSFCheckGraphSet(sf,1);
305   if (sf->setupcalled) PetscFunctionReturn(0);
306   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
307   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
308   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
309   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
310 #if defined(PETSC_HAVE_CUDA)
311   if (sf->backend == PETSCSF_BACKEND_CUDA) {
312     sf->ops->Malloc = PetscSFMalloc_Cuda;
313     sf->ops->Free   = PetscSFFree_Cuda;
314   }
315 #endif
316 #if defined(PETSC_HAVE_HIP)
317   if (sf->backend == PETSCSF_BACKEND_HIP) {
318     sf->ops->Malloc = PetscSFMalloc_HIP;
319     sf->ops->Free   = PetscSFFree_HIP;
320   }
321 #endif
322 
323 #
324 #if defined(PETSC_HAVE_KOKKOS)
325   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
326     sf->ops->Malloc = PetscSFMalloc_Kokkos;
327     sf->ops->Free   = PetscSFFree_Kokkos;
328   }
329 #endif
330   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
331   sf->setupcalled = PETSC_TRUE;
332   PetscFunctionReturn(0);
333 }
334 
335 /*@
336    PetscSFSetFromOptions - set PetscSF options using the options database
337 
338    Logically Collective
339 
340    Input Arguments:
341 .  sf - star forest
342 
343    Options Database Keys:
344 +  -sf_type               - implementation type, see PetscSFSetType()
345 .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
346 .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
347                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
348                             If true, this option only works with -use_gpu_aware_mpi 1.
349 .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
350                                If true, this option only works with -use_gpu_aware_mpi 1.
351 
352 -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
353                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
354                               the only available is kokkos.
355 
356    Level: intermediate
357 @*/
358 PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
359 {
360   PetscSFType    deft;
361   char           type[256];
362   PetscErrorCode ierr;
363   PetscBool      flg;
364 
365   PetscFunctionBegin;
366   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
367   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
368   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
369   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
370   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
371   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
372 #if defined(PETSC_HAVE_DEVICE)
373   {
374     char        backendstr[32] = {0};
375     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
376     /* Change the defaults set in PetscSFCreate() with command line options */
377     ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
378     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
379     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
380     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
381     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
382     ierr = PetscStrcasecmp("hip",backendstr,&isHip);CHKERRQ(ierr);
383   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
384     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
385     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
386     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
387     else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You may choose cuda, hip or kokkos (if installed)", backendstr);
388   #elif defined(PETSC_HAVE_KOKKOS)
389     if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
390   #endif
391   }
392 #endif
393   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
394   ierr = PetscOptionsEnd();CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*@
399    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
400 
401    Logically Collective
402 
403    Input Arguments:
404 +  sf - star forest
405 -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
406 
407    Level: advanced
408 
409 .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
410 @*/
411 PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
412 {
413   PetscFunctionBegin;
414   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
415   PetscValidLogicalCollectiveBool(sf,flg,2);
416   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
417   sf->rankorder = flg;
418   PetscFunctionReturn(0);
419 }
420 
421 /*@
422    PetscSFSetGraph - Set a parallel star forest
423 
424    Collective
425 
426    Input Arguments:
427 +  sf - star forest
428 .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
429 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
430 .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
431 during setup in debug mode)
432 .  localmode - copy mode for ilocal
433 .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
434 during setup in debug mode)
435 -  remotemode - copy mode for iremote
436 
437    Level: intermediate
438 
439    Notes:
440     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
441 
442    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
443    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
444    needed
445 
446    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
447    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
448 
449 .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
450 @*/
451 PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
452 {
453   PetscErrorCode ierr;
454 
455   PetscFunctionBegin;
456   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
457   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
458   if (nleaves > 0) PetscValidPointer(iremote,6);
459   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
460   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
461 
462   if (sf->nroots >= 0) { /* Reset only if graph already set */
463     ierr = PetscSFReset(sf);CHKERRQ(ierr);
464   }
465 
466   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
467 
468   sf->nroots  = nroots;
469   sf->nleaves = nleaves;
470 
471   if (nleaves && ilocal) {
472     PetscInt i;
473     PetscInt minleaf = PETSC_MAX_INT;
474     PetscInt maxleaf = PETSC_MIN_INT;
475     int      contiguous = 1;
476     for (i=0; i<nleaves; i++) {
477       minleaf = PetscMin(minleaf,ilocal[i]);
478       maxleaf = PetscMax(maxleaf,ilocal[i]);
479       contiguous &= (ilocal[i] == i);
480     }
481     sf->minleaf = minleaf;
482     sf->maxleaf = maxleaf;
483     if (contiguous) {
484       if (localmode == PETSC_OWN_POINTER) {
485         ierr = PetscFree(ilocal);CHKERRQ(ierr);
486       }
487       ilocal = NULL;
488     }
489   } else {
490     sf->minleaf = 0;
491     sf->maxleaf = nleaves - 1;
492   }
493 
494   if (ilocal) {
495     switch (localmode) {
496     case PETSC_COPY_VALUES:
497       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
498       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
499       sf->mine = sf->mine_alloc;
500       break;
501     case PETSC_OWN_POINTER:
502       sf->mine_alloc = (PetscInt*)ilocal;
503       sf->mine       = sf->mine_alloc;
504       break;
505     case PETSC_USE_POINTER:
506       sf->mine_alloc = NULL;
507       sf->mine       = (PetscInt*)ilocal;
508       break;
509     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
510     }
511   }
512 
513   switch (remotemode) {
514   case PETSC_COPY_VALUES:
515     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
516     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
517     sf->remote = sf->remote_alloc;
518     break;
519   case PETSC_OWN_POINTER:
520     sf->remote_alloc = (PetscSFNode*)iremote;
521     sf->remote       = sf->remote_alloc;
522     break;
523   case PETSC_USE_POINTER:
524     sf->remote_alloc = NULL;
525     sf->remote       = (PetscSFNode*)iremote;
526     break;
527   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
528   }
529 
530   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
531   sf->graphset = PETSC_TRUE;
532   PetscFunctionReturn(0);
533 }
534 
535 /*@
536   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
537 
538   Collective
539 
540   Input Parameters:
541 + sf      - The PetscSF
542 . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
543 - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
544 
545   Notes:
546   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
547   n and N are local and global sizes of x respectively.
548 
549   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
550   sequential vectors y on all ranks.
551 
552   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
553   sequential vector y on rank 0.
554 
555   In above cases, entries of x are roots and entries of y are leaves.
556 
557   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
558   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
559   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
560   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
561   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
562 
563   In this case, roots and leaves are symmetric.
564 
565   Level: intermediate
566  @*/
567 PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
568 {
569   MPI_Comm       comm;
570   PetscInt       n,N,res[2];
571   PetscMPIInt    rank,size;
572   PetscSFType    type;
573   PetscErrorCode ierr;
574 
575   PetscFunctionBegin;
576   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
577   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
578   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
579   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
580 
581   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
582     type = PETSCSFALLTOALL;
583     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
584     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
585     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
586     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
587   } else {
588     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
589     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
590     res[0] = n;
591     res[1] = -n;
592     /* Check if n are same over all ranks so that we can optimize it */
593     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
594     if (res[0] == -res[1]) { /* same n */
595       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
596     } else {
597       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
598     }
599     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
600   }
601   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
602 
603   sf->pattern = pattern;
604   sf->mine    = NULL; /* Contiguous */
605 
606   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
607      Also set other easy stuff.
608    */
609   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
610     sf->nleaves      = N;
611     sf->nroots       = n;
612     sf->nranks       = size;
613     sf->minleaf      = 0;
614     sf->maxleaf      = N - 1;
615   } else if (pattern == PETSCSF_PATTERN_GATHER) {
616     sf->nleaves      = rank ? 0 : N;
617     sf->nroots       = n;
618     sf->nranks       = rank ? 0 : size;
619     sf->minleaf      = 0;
620     sf->maxleaf      = rank ? -1 : N - 1;
621   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
622     sf->nleaves      = size;
623     sf->nroots       = size;
624     sf->nranks       = size;
625     sf->minleaf      = 0;
626     sf->maxleaf      = size - 1;
627   }
628   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
629   sf->graphset = PETSC_TRUE;
630   PetscFunctionReturn(0);
631 }
632 
633 /*@
634    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
635 
636    Collective
637 
638    Input Arguments:
639 
640 .  sf - star forest to invert
641 
642    Output Arguments:
643 .  isf - inverse of sf
644    Level: advanced
645 
646    Notes:
647    All roots must have degree 1.
648 
649    The local space may be a permutation, but cannot be sparse.
650 
651 .seealso: PetscSFSetGraph()
652 @*/
653 PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
654 {
655   PetscErrorCode ierr;
656   PetscMPIInt    rank;
657   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
658   const PetscInt *ilocal;
659   PetscSFNode    *roots,*leaves;
660 
661   PetscFunctionBegin;
662   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
663   PetscSFCheckGraphSet(sf,1);
664   PetscValidPointer(isf,2);
665 
666   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
667   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
668 
669   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
670   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
671   for (i=0; i<maxlocal; i++) {
672     leaves[i].rank  = rank;
673     leaves[i].index = i;
674   }
675   for (i=0; i <nroots; i++) {
676     roots[i].rank  = -1;
677     roots[i].index = -1;
678   }
679   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
680   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
681 
682   /* Check whether our leaves are sparse */
683   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
684   if (count == nroots) newilocal = NULL;
685   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
686     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
687     for (i=0,count=0; i<nroots; i++) {
688       if (roots[i].rank >= 0) {
689         newilocal[count]   = i;
690         roots[count].rank  = roots[i].rank;
691         roots[count].index = roots[i].index;
692         count++;
693       }
694     }
695   }
696 
697   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
698   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
699   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 /*@
704    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
705 
706    Collective
707 
708    Input Arguments:
709 +  sf - communication object to duplicate
710 -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
711 
712    Output Arguments:
713 .  newsf - new communication object
714 
715    Level: beginner
716 
717 .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
718 @*/
719 PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
720 {
721   PetscSFType    type;
722   PetscErrorCode ierr;
723   MPI_Datatype   dtype=MPIU_SCALAR;
724 
725   PetscFunctionBegin;
726   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
727   PetscValidLogicalCollectiveEnum(sf,opt,2);
728   PetscValidPointer(newsf,3);
729   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
730   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
731   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
732   if (opt == PETSCSF_DUPLICATE_GRAPH) {
733     PetscSFCheckGraphSet(sf,1);
734     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
735       PetscInt          nroots,nleaves;
736       const PetscInt    *ilocal;
737       const PetscSFNode *iremote;
738       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
739       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
740     } else {
741       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
742     }
743   }
744   /* Since oldtype is committed, so is newtype, according to MPI */
745   if (sf->vscat.bs > 1) {ierr = MPI_Type_dup(sf->vscat.unit,&dtype);CHKERRQ(ierr);}
746   (*newsf)->vscat.bs     = sf->vscat.bs;
747   (*newsf)->vscat.unit   = dtype;
748   (*newsf)->vscat.to_n   = sf->vscat.to_n;
749   (*newsf)->vscat.from_n = sf->vscat.from_n;
750   /* Do not copy lsf. Build it on demand since it is rarely used */
751 
752 #if defined(PETSC_HAVE_DEVICE)
753   (*newsf)->backend              = sf->backend;
754   (*newsf)->use_default_stream   = sf->use_default_stream;
755   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
756   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
757 #endif
758   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
759   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
760   PetscFunctionReturn(0);
761 }
762 
763 /*@C
764    PetscSFGetGraph - Get the graph specifying a parallel star forest
765 
766    Not Collective
767 
768    Input Arguments:
769 .  sf - star forest
770 
771    Output Arguments:
772 +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
773 .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
774 .  ilocal - locations of leaves in leafdata buffers
775 -  iremote - remote locations of root vertices for each leaf on the current process
776 
777    Notes:
778    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
779 
780    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
781    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
782 
783    Level: intermediate
784 
785 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
786 @*/
787 PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
788 {
789   PetscErrorCode ierr;
790 
791   PetscFunctionBegin;
792   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
793   if (sf->ops->GetGraph) {
794     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
795   } else {
796     if (nroots) *nroots = sf->nroots;
797     if (nleaves) *nleaves = sf->nleaves;
798     if (ilocal) *ilocal = sf->mine;
799     if (iremote) *iremote = sf->remote;
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 /*@
805    PetscSFGetLeafRange - Get the active leaf ranges
806 
807    Not Collective
808 
809    Input Arguments:
810 .  sf - star forest
811 
812    Output Arguments:
813 +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
814 -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
815 
816    Level: developer
817 
818 .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
819 @*/
820 PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
821 {
822   PetscFunctionBegin;
823   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
824   PetscSFCheckGraphSet(sf,1);
825   if (minleaf) *minleaf = sf->minleaf;
826   if (maxleaf) *maxleaf = sf->maxleaf;
827   PetscFunctionReturn(0);
828 }
829 
830 /*@C
831    PetscSFViewFromOptions - View from Options
832 
833    Collective on PetscSF
834 
835    Input Parameters:
836 +  A - the star forest
837 .  obj - Optional object
838 -  name - command line option
839 
840    Level: intermediate
841 .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
842 @*/
843 PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
844 {
845   PetscErrorCode ierr;
846 
847   PetscFunctionBegin;
848   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
849   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*@C
854    PetscSFView - view a star forest
855 
856    Collective
857 
858    Input Arguments:
859 +  sf - star forest
860 -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
861 
862    Level: beginner
863 
864 .seealso: PetscSFCreate(), PetscSFSetGraph()
865 @*/
866 PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
867 {
868   PetscErrorCode    ierr;
869   PetscBool         iascii;
870   PetscViewerFormat format;
871 
872   PetscFunctionBegin;
873   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
874   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
875   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
876   PetscCheckSameComm(sf,1,viewer,2);
877   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
878   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
879   if (iascii) {
880     PetscMPIInt rank;
881     PetscInt    ii,i,j;
882 
883     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
884     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
885     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
886     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
887       if (!sf->graphset) {
888         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
889         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
890         PetscFunctionReturn(0);
891       }
892       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
893       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
894       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
895       for (i=0; i<sf->nleaves; i++) {
896         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
897       }
898       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
899       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
900       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
901         PetscMPIInt *tmpranks,*perm;
902         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
903         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
904         for (i=0; i<sf->nranks; i++) perm[i] = i;
905         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
906         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
907         for (ii=0; ii<sf->nranks; ii++) {
908           i = perm[ii];
909           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
910           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
911             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
912           }
913         }
914         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
915       }
916       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
917       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
918     }
919     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
920   }
921   PetscFunctionReturn(0);
922 }
923 
924 /*@C
925    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
926 
927    Not Collective
928 
929    Input Arguments:
930 .  sf - star forest
931 
932    Output Arguments:
933 +  nranks - number of ranks referenced by local part
934 .  ranks - array of ranks
935 .  roffset - offset in rmine/rremote for each rank (length nranks+1)
936 .  rmine - concatenated array holding local indices referencing each remote rank
937 -  rremote - concatenated array holding remote indices referenced for each remote rank
938 
939    Level: developer
940 
941 .seealso: PetscSFGetLeafRanks()
942 @*/
943 PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
944 {
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
949   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
950   if (sf->ops->GetRootRanks) {
951     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
952   } else {
953     /* The generic implementation */
954     if (nranks)  *nranks  = sf->nranks;
955     if (ranks)   *ranks   = sf->ranks;
956     if (roffset) *roffset = sf->roffset;
957     if (rmine)   *rmine   = sf->rmine;
958     if (rremote) *rremote = sf->rremote;
959   }
960   PetscFunctionReturn(0);
961 }
962 
963 /*@C
964    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
965 
966    Not Collective
967 
968    Input Arguments:
969 .  sf - star forest
970 
971    Output Arguments:
972 +  niranks - number of leaf ranks referencing roots on this process
973 .  iranks - array of ranks
974 .  ioffset - offset in irootloc for each rank (length niranks+1)
975 -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
976 
977    Level: developer
978 
979 .seealso: PetscSFGetRootRanks()
980 @*/
981 PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
982 {
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
987   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
988   if (sf->ops->GetLeafRanks) {
989     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
990   } else {
991     PetscSFType type;
992     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
993     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
994   }
995   PetscFunctionReturn(0);
996 }
997 
998 static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
999   PetscInt i;
1000   for (i=0; i<n; i++) {
1001     if (needle == list[i]) return PETSC_TRUE;
1002   }
1003   return PETSC_FALSE;
1004 }
1005 
1006 /*@C
1007    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
1008 
1009    Collective
1010 
1011    Input Arguments:
1012 +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
1013 -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
1014 
1015    Level: developer
1016 
1017 .seealso: PetscSFGetRootRanks()
1018 @*/
1019 PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
1020 {
1021   PetscErrorCode     ierr;
1022   PetscTable         table;
1023   PetscTablePosition pos;
1024   PetscMPIInt        size,groupsize,*groupranks;
1025   PetscInt           *rcount,*ranks;
1026   PetscInt           i, irank = -1,orank = -1;
1027 
1028   PetscFunctionBegin;
1029   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1030   PetscSFCheckGraphSet(sf,1);
1031   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
1032   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
1033   for (i=0; i<sf->nleaves; i++) {
1034     /* Log 1-based rank */
1035     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
1036   }
1037   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
1038   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
1039   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
1040   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
1041   for (i=0; i<sf->nranks; i++) {
1042     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
1043     ranks[i]--;             /* Convert back to 0-based */
1044   }
1045   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1046 
1047   /* We expect that dgroup is reliably "small" while nranks could be large */
1048   {
1049     MPI_Group group = MPI_GROUP_NULL;
1050     PetscMPIInt *dgroupranks;
1051     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1052     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
1053     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1054     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1055     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1056     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
1057     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1058     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1059   }
1060 
1061   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1062   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1063     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1064       if (InList(ranks[i],groupsize,groupranks)) break;
1065     }
1066     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1067       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1068     }
1069     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1070       PetscInt tmprank,tmpcount;
1071 
1072       tmprank             = ranks[i];
1073       tmpcount            = rcount[i];
1074       ranks[i]            = ranks[sf->ndranks];
1075       rcount[i]           = rcount[sf->ndranks];
1076       ranks[sf->ndranks]  = tmprank;
1077       rcount[sf->ndranks] = tmpcount;
1078       sf->ndranks++;
1079     }
1080   }
1081   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1082   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1083   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
1084   sf->roffset[0] = 0;
1085   for (i=0; i<sf->nranks; i++) {
1086     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
1087     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1088     rcount[i]        = 0;
1089   }
1090   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1091     /* short circuit */
1092     if (orank != sf->remote[i].rank) {
1093       /* Search for index of iremote[i].rank in sf->ranks */
1094       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1095       if (irank < 0) {
1096         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1097         if (irank >= 0) irank += sf->ndranks;
1098       }
1099       orank = sf->remote[i].rank;
1100     }
1101     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
1102     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1103     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1104     rcount[irank]++;
1105   }
1106   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 /*@C
1111    PetscSFGetGroups - gets incoming and outgoing process groups
1112 
1113    Collective
1114 
1115    Input Argument:
1116 .  sf - star forest
1117 
1118    Output Arguments:
1119 +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1120 -  outgoing - group of destination processes for outgoing edges (roots that I reference)
1121 
1122    Level: developer
1123 
1124 .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1125 @*/
1126 PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1127 {
1128   PetscErrorCode ierr;
1129   MPI_Group      group = MPI_GROUP_NULL;
1130 
1131   PetscFunctionBegin;
1132   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
1133   if (sf->ingroup == MPI_GROUP_NULL) {
1134     PetscInt       i;
1135     const PetscInt *indegree;
1136     PetscMPIInt    rank,*outranks,*inranks;
1137     PetscSFNode    *remote;
1138     PetscSF        bgcount;
1139 
1140     /* Compute the number of incoming ranks */
1141     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
1142     for (i=0; i<sf->nranks; i++) {
1143       remote[i].rank  = sf->ranks[i];
1144       remote[i].index = 0;
1145     }
1146     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
1147     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1148     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
1149     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
1150     /* Enumerate the incoming ranks */
1151     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
1152     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1153     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1154     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1155     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
1156     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1157     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
1158     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1159     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
1160     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
1161   }
1162   *incoming = sf->ingroup;
1163 
1164   if (sf->outgroup == MPI_GROUP_NULL) {
1165     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1166     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
1167     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1168   }
1169   *outgoing = sf->outgroup;
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 /*@
1174    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
1175 
1176    Collective
1177 
1178    Input Argument:
1179 .  sf - star forest that may contain roots with 0 or with more than 1 vertex
1180 
1181    Output Arguments:
1182 .  multi - star forest with split roots, such that each root has degree exactly 1
1183 
1184    Level: developer
1185 
1186    Notes:
1187 
1188    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1189    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1190    edge, it is a candidate for future optimization that might involve its removal.
1191 
1192 .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1193 @*/
1194 PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1195 {
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1200   PetscValidPointer(multi,2);
1201   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1202     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1203     *multi = sf->multi;
1204     PetscFunctionReturn(0);
1205   }
1206   if (!sf->multi) {
1207     const PetscInt *indegree;
1208     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1209     PetscSFNode    *remote;
1210     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1211     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
1212     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
1213     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
1214     inoffset[0] = 0;
1215     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1216     for (i=0; i<maxlocal; i++) outones[i] = 1;
1217     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1218     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1219     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1220     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1221       for (i=0; i<sf->nroots; i++) {
1222         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
1223       }
1224     }
1225     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
1226     for (i=0; i<sf->nleaves; i++) {
1227       remote[i].rank  = sf->remote[i].rank;
1228       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1229     }
1230     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
1231     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1232     if (sf->rankorder) {        /* Sort the ranks */
1233       PetscMPIInt rank;
1234       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1235       PetscSFNode *newremote;
1236       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
1237       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1238       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
1239       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1240       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1241       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
1242       /* Sort the incoming ranks at each vertex, build the inverse map */
1243       for (i=0; i<sf->nroots; i++) {
1244         PetscInt j;
1245         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1246         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
1247         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1248       }
1249       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1250       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1251       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
1252       for (i=0; i<sf->nleaves; i++) {
1253         newremote[i].rank  = sf->remote[i].rank;
1254         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1255       }
1256       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1257       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
1258     }
1259     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
1260   }
1261   *multi = sf->multi;
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 /*@C
1266    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
1267 
1268    Collective
1269 
1270    Input Arguments:
1271 +  sf - original star forest
1272 .  nselected  - number of selected roots on this process
1273 -  selected   - indices of the selected roots on this process
1274 
1275    Output Arguments:
1276 .  esf - new star forest
1277 
1278    Level: advanced
1279 
1280    Note:
1281    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1282    be done by calling PetscSFGetGraph().
1283 
1284 .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1285 @*/
1286 PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1287 {
1288   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1289   const PetscInt    *ilocal;
1290   signed char       *rootdata,*leafdata,*leafmem;
1291   const PetscSFNode *iremote;
1292   PetscSFNode       *new_iremote;
1293   MPI_Comm          comm;
1294   PetscErrorCode    ierr;
1295 
1296   PetscFunctionBegin;
1297   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1298   PetscSFCheckGraphSet(sf,1);
1299   if (nselected) PetscValidPointer(selected,3);
1300   PetscValidPointer(esf,4);
1301 
1302   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1303   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1304   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1305   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1306 
1307   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1308     PetscBool dups;
1309     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1310     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1311     for (i=0; i<nselected; i++)
1312       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1313   }
1314 
1315   if (sf->ops->CreateEmbeddedSF) {
1316     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1317   } else {
1318     /* A generic version of creating embedded sf */
1319     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1320     maxlocal = maxleaf - minleaf + 1;
1321     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1322     leafdata = leafmem - minleaf;
1323     /* Tag selected roots and bcast to leaves */
1324     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1325     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1326     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1327 
1328     /* Build esf with leaves that are still connected */
1329     esf_nleaves = 0;
1330     for (i=0; i<nleaves; i++) {
1331       j = ilocal ? ilocal[i] : i;
1332       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1333          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1334       */
1335       esf_nleaves += (leafdata[j] ? 1 : 0);
1336     }
1337     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1338     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1339     for (i=n=0; i<nleaves; i++) {
1340       j = ilocal ? ilocal[i] : i;
1341       if (leafdata[j]) {
1342         new_ilocal[n]        = j;
1343         new_iremote[n].rank  = iremote[i].rank;
1344         new_iremote[n].index = iremote[i].index;
1345         ++n;
1346       }
1347     }
1348     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1349     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1350     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1351     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1352   }
1353   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 /*@C
1358   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1359 
1360   Collective
1361 
1362   Input Arguments:
1363 + sf - original star forest
1364 . nselected  - number of selected leaves on this process
1365 - selected   - indices of the selected leaves on this process
1366 
1367   Output Arguments:
1368 .  newsf - new star forest
1369 
1370   Level: advanced
1371 
1372 .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
1373 @*/
1374 PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1375 {
1376   const PetscSFNode *iremote;
1377   PetscSFNode       *new_iremote;
1378   const PetscInt    *ilocal;
1379   PetscInt          i,nroots,*leaves,*new_ilocal;
1380   MPI_Comm          comm;
1381   PetscErrorCode    ierr;
1382 
1383   PetscFunctionBegin;
1384   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1385   PetscSFCheckGraphSet(sf,1);
1386   if (nselected) PetscValidPointer(selected,3);
1387   PetscValidPointer(newsf,4);
1388 
1389   /* Uniq selected[] and put results in leaves[] */
1390   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1391   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1392   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1393   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1394   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1395 
1396   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1397   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1398     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1399   } else {
1400     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1401     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1402     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1403     for (i=0; i<nselected; ++i) {
1404       const PetscInt l     = leaves[i];
1405       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1406       new_iremote[i].rank  = iremote[l].rank;
1407       new_iremote[i].index = iremote[l].index;
1408     }
1409     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1410     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1411   }
1412   ierr = PetscFree(leaves);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /*@C
1417    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
1418 
1419    Collective on PetscSF
1420 
1421    Input Arguments:
1422 +  sf - star forest on which to communicate
1423 .  unit - data type associated with each node
1424 .  rootdata - buffer to broadcast
1425 -  op - operation to use for reduction
1426 
1427    Output Arguments:
1428 .  leafdata - buffer to be reduced with values from each leaf's respective root
1429 
1430    Level: intermediate
1431 
1432    Notes:
1433     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1434     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1435     use PetscSFBcastAndOpWithMemTypeBegin() instead.
1436 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin()
1437 @*/
1438 PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1439 {
1440   PetscErrorCode ierr;
1441   PetscMemType   rootmtype,leafmtype;
1442 
1443   PetscFunctionBegin;
1444   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1445   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1446   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);}
1447   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1448   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1449   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1450   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);}
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 /*@C
1455    PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd()
1456 
1457    Collective on PetscSF
1458 
1459    Input Arguments:
1460 +  sf - star forest on which to communicate
1461 .  unit - data type associated with each node
1462 .  rootmtype - memory type of rootdata
1463 .  rootdata - buffer to broadcast
1464 .  leafmtype - memory type of leafdata
1465 -  op - operation to use for reduction
1466 
1467    Output Arguments:
1468 .  leafdata - buffer to be reduced with values from each leaf's respective root
1469 
1470    Level: intermediate
1471 
1472 .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin()
1473 @*/
1474 PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1475 {
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1480   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1481   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);}
1482   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1483   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);}
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 /*@C
1488    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
1489 
1490    Collective
1491 
1492    Input Arguments:
1493 +  sf - star forest
1494 .  unit - data type
1495 .  rootdata - buffer to broadcast
1496 -  op - operation to use for reduction
1497 
1498    Output Arguments:
1499 .  leafdata - buffer to be reduced with values from each leaf's respective root
1500 
1501    Level: intermediate
1502 
1503 .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1504 @*/
1505 PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1506 {
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1511   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);}
1512   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
1513   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);}
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 /*@C
1518    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1519 
1520    Collective
1521 
1522    Input Arguments:
1523 +  sf - star forest
1524 .  unit - data type
1525 .  leafdata - values to reduce
1526 -  op - reduction operation
1527 
1528    Output Arguments:
1529 .  rootdata - result of reduction of values from all leaves of each root
1530 
1531    Level: intermediate
1532 
1533    Notes:
1534     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1535     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1536     use PetscSFReduceWithMemTypeBegin() instead.
1537 
1538 .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1539 @*/
1540 PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1541 {
1542   PetscErrorCode ierr;
1543   PetscMemType   rootmtype,leafmtype;
1544 
1545   PetscFunctionBegin;
1546   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1547   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1548   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1549   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1550   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1551   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1552   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 /*@C
1557    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1558 
1559    Collective
1560 
1561    Input Arguments:
1562 +  sf - star forest
1563 .  unit - data type
1564 .  leafmtype - memory type of leafdata
1565 .  leafdata - values to reduce
1566 .  rootmtype - memory type of rootdata
1567 -  op - reduction operation
1568 
1569    Output Arguments:
1570 .  rootdata - result of reduction of values from all leaves of each root
1571 
1572    Level: intermediate
1573 
1574 .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1575 @*/
1576 PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1577 {
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1582   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1583   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1584   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1585   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);}
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 /*@C
1590    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1591 
1592    Collective
1593 
1594    Input Arguments:
1595 +  sf - star forest
1596 .  unit - data type
1597 .  leafdata - values to reduce
1598 -  op - reduction operation
1599 
1600    Output Arguments:
1601 .  rootdata - result of reduction of values from all leaves of each root
1602 
1603    Level: intermediate
1604 
1605 .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1606 @*/
1607 PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1608 {
1609   PetscErrorCode ierr;
1610 
1611   PetscFunctionBegin;
1612   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1613   if (!sf->vscat.logging) {ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1614   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1615   if (!sf->vscat.logging) {ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);}
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 /*@C
1620    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1621 
1622    Collective
1623 
1624    Input Arguments:
1625 +  sf - star forest
1626 .  unit - data type
1627 .  leafdata - leaf values to use in reduction
1628 -  op - operation to use for reduction
1629 
1630    Output Arguments:
1631 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1632 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1633 
1634    Level: advanced
1635 
1636    Note:
1637    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1638    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1639    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1640    integers.
1641 
1642 .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1643 @*/
1644 PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1645 {
1646   PetscErrorCode ierr;
1647   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1648 
1649   PetscFunctionBegin;
1650   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1651   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1652   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1653   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1654   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1655   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1656   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1657   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1658   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 /*@C
1663    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1664 
1665    Collective
1666 
1667    Input Arguments:
1668 +  sf - star forest
1669 .  unit - data type
1670 .  leafdata - leaf values to use in reduction
1671 -  op - operation to use for reduction
1672 
1673    Output Arguments:
1674 +  rootdata - root values to be updated, input state is seen by first process to perform an update
1675 -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1676 
1677    Level: advanced
1678 
1679 .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1680 @*/
1681 PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1682 {
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1687   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1688   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1689   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 /*@C
1694    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1695 
1696    Collective
1697 
1698    Input Arguments:
1699 .  sf - star forest
1700 
1701    Output Arguments:
1702 .  degree - degree of each root vertex
1703 
1704    Level: advanced
1705 
1706    Notes:
1707    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1708 
1709 .seealso: PetscSFGatherBegin()
1710 @*/
1711 PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1712 {
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1717   PetscSFCheckGraphSet(sf,1);
1718   PetscValidPointer(degree,2);
1719   if (!sf->degreeknown) {
1720     PetscInt i, nroots = sf->nroots, maxlocal;
1721     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
1722     maxlocal = sf->maxleaf-sf->minleaf+1;
1723     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
1724     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1725     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1726     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1727     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1728   }
1729   *degree = NULL;
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 /*@C
1734    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1735 
1736    Collective
1737 
1738    Input Arguments:
1739 .  sf - star forest
1740 
1741    Output Arguments:
1742 .  degree - degree of each root vertex
1743 
1744    Level: developer
1745 
1746    Notes:
1747    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1748 
1749 .seealso:
1750 @*/
1751 PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1752 {
1753   PetscErrorCode ierr;
1754 
1755   PetscFunctionBegin;
1756   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1757   PetscSFCheckGraphSet(sf,1);
1758   PetscValidPointer(degree,2);
1759   if (!sf->degreeknown) {
1760     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
1761     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
1762     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
1763     sf->degreeknown = PETSC_TRUE;
1764   }
1765   *degree = sf->degree;
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 
1770 /*@C
1771    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1772    Each multi-root is assigned index of the corresponding original root.
1773 
1774    Collective
1775 
1776    Input Arguments:
1777 +  sf - star forest
1778 -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1779 
1780    Output Arguments:
1781 +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1782 -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1783 
1784    Level: developer
1785 
1786    Notes:
1787    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1788 
1789 .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1790 @*/
1791 PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1792 {
1793   PetscSF             msf;
1794   PetscInt            i, j, k, nroots, nmroots;
1795   PetscErrorCode      ierr;
1796 
1797   PetscFunctionBegin;
1798   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1799   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
1800   if (nroots) PetscValidIntPointer(degree,2);
1801   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
1802   PetscValidPointer(multiRootsOrigNumbering,4);
1803   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1804   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
1805   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1806   for (i=0,j=0,k=0; i<nroots; i++) {
1807     if (!degree[i]) continue;
1808     for (j=0; j<degree[i]; j++,k++) {
1809       (*multiRootsOrigNumbering)[k] = i;
1810     }
1811   }
1812   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
1813   if (nMultiRoots) *nMultiRoots = nmroots;
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 /*@C
1818    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1819 
1820    Collective
1821 
1822    Input Arguments:
1823 +  sf - star forest
1824 .  unit - data type
1825 -  leafdata - leaf data to gather to roots
1826 
1827    Output Argument:
1828 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1829 
1830    Level: intermediate
1831 
1832 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1833 @*/
1834 PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1835 {
1836   PetscErrorCode ierr;
1837   PetscSF        multi = NULL;
1838 
1839   PetscFunctionBegin;
1840   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1841   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1842   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1843   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 /*@C
1848    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1849 
1850    Collective
1851 
1852    Input Arguments:
1853 +  sf - star forest
1854 .  unit - data type
1855 -  leafdata - leaf data to gather to roots
1856 
1857    Output Argument:
1858 .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1859 
1860    Level: intermediate
1861 
1862 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1863 @*/
1864 PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1865 {
1866   PetscErrorCode ierr;
1867   PetscSF        multi = NULL;
1868 
1869   PetscFunctionBegin;
1870   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1871   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1872   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 /*@C
1877    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1878 
1879    Collective
1880 
1881    Input Arguments:
1882 +  sf - star forest
1883 .  unit - data type
1884 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1885 
1886    Output Argument:
1887 .  leafdata - leaf data to be update with personal data from each respective root
1888 
1889    Level: intermediate
1890 
1891 .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1892 @*/
1893 PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1894 {
1895   PetscErrorCode ierr;
1896   PetscSF        multi = NULL;
1897 
1898   PetscFunctionBegin;
1899   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1900   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1901   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1902   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 /*@C
1907    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1908 
1909    Collective
1910 
1911    Input Arguments:
1912 +  sf - star forest
1913 .  unit - data type
1914 -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
1915 
1916    Output Argument:
1917 .  leafdata - leaf data to be update with personal data from each respective root
1918 
1919    Level: intermediate
1920 
1921 .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1922 @*/
1923 PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1924 {
1925   PetscErrorCode ierr;
1926   PetscSF        multi = NULL;
1927 
1928   PetscFunctionBegin;
1929   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1930   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
1931   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1936 {
1937   PetscInt        i, n, nleaves;
1938   const PetscInt *ilocal = NULL;
1939   PetscHSetI      seen;
1940   PetscErrorCode  ierr;
1941 
1942   PetscFunctionBegin;
1943   if (PetscDefined(USE_DEBUG)) {
1944     ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1945     ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1946     for (i = 0; i < nleaves; i++) {
1947       const PetscInt leaf = ilocal ? ilocal[i] : i;
1948       ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1949     }
1950     ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1951     if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1952     ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 /*@
1958   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1959 
1960   Input Parameters:
1961 + sfA - The first PetscSF
1962 - sfB - The second PetscSF
1963 
1964   Output Parameters:
1965 . sfBA - The composite SF
1966 
1967   Level: developer
1968 
1969   Notes:
1970   Currently, the two SFs must be defined on congruent communicators and they must be true star
1971   forests, i.e. the same leaf is not connected with different roots.
1972 
1973   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1974   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1975   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1976   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1977 
1978 .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1979 @*/
1980 PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1981 {
1982   PetscErrorCode    ierr;
1983   const PetscSFNode *remotePointsA,*remotePointsB;
1984   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1985   const PetscInt    *localPointsA,*localPointsB;
1986   PetscInt          *localPointsBA;
1987   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1988   PetscBool         denseB;
1989 
1990   PetscFunctionBegin;
1991   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
1992   PetscSFCheckGraphSet(sfA,1);
1993   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
1994   PetscSFCheckGraphSet(sfB,2);
1995   PetscCheckSameComm(sfA,1,sfB,2);
1996   PetscValidPointer(sfBA,3);
1997   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
1998   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
1999 
2000   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
2001   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
2002   if (localPointsA) {
2003     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
2004     for (i=0; i<numRootsB; i++) {
2005       reorderedRemotePointsA[i].rank = -1;
2006       reorderedRemotePointsA[i].index = -1;
2007     }
2008     for (i=0; i<numLeavesA; i++) {
2009       if (localPointsA[i] >= numRootsB) continue;
2010       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
2011     }
2012     remotePointsA = reorderedRemotePointsA;
2013   }
2014   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
2015   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
2016   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
2017   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
2018   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2019 
2020   denseB = (PetscBool)!localPointsB;
2021   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2022     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
2023     else numLeavesBA++;
2024   }
2025   if (denseB) {
2026     localPointsBA  = NULL;
2027     remotePointsBA = leafdataB;
2028   } else {
2029     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
2030     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
2031     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
2032       const PetscInt l = localPointsB ? localPointsB[i] : i;
2033 
2034       if (leafdataB[l-minleaf].rank == -1) continue;
2035       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
2036       localPointsBA[numLeavesBA] = l;
2037       numLeavesBA++;
2038     }
2039     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2040   }
2041   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2042   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2043   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 /*@
2048   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
2049 
2050   Input Parameters:
2051 + sfA - The first PetscSF
2052 - sfB - The second PetscSF
2053 
2054   Output Parameters:
2055 . sfBA - The composite SF.
2056 
2057   Level: developer
2058 
2059   Notes:
2060   Currently, the two SFs must be defined on congruent communicators and they must be true star
2061   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
2062   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
2063 
2064   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
2065   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
2066   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2067   a Reduce on sfB, on connected roots.
2068 
2069 .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2070 @*/
2071 PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2072 {
2073   PetscErrorCode    ierr;
2074   const PetscSFNode *remotePointsA,*remotePointsB;
2075   PetscSFNode       *remotePointsBA;
2076   const PetscInt    *localPointsA,*localPointsB;
2077   PetscSFNode       *reorderedRemotePointsA = NULL;
2078   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2079   MPI_Op            op;
2080 #if defined(PETSC_USE_64BIT_INDICES)
2081   PetscBool         iswin;
2082 #endif
2083 
2084   PetscFunctionBegin;
2085   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
2086   PetscSFCheckGraphSet(sfA,1);
2087   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
2088   PetscSFCheckGraphSet(sfB,2);
2089   PetscCheckSameComm(sfA,1,sfB,2);
2090   PetscValidPointer(sfBA,3);
2091   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
2092   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
2093 
2094   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
2095   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
2096 
2097   /* TODO: Check roots of sfB have degree of 1 */
2098   /* Once we implement it, we can replace the MPI_MAXLOC
2099      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
2100      We use MPI_MAXLOC only to have a deterministic output from this routine if
2101      the root condition is not meet.
2102    */
2103   op = MPI_MAXLOC;
2104 #if defined(PETSC_USE_64BIT_INDICES)
2105   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2106   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
2107   if (iswin) op = MPIU_REPLACE;
2108 #endif
2109 
2110   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
2111   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
2112   for (i=0; i<maxleaf - minleaf + 1; i++) {
2113     reorderedRemotePointsA[i].rank = -1;
2114     reorderedRemotePointsA[i].index = -1;
2115   }
2116   if (localPointsA) {
2117     for (i=0; i<numLeavesA; i++) {
2118       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2119       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2120     }
2121   } else {
2122     for (i=0; i<numLeavesA; i++) {
2123       if (i > maxleaf || i < minleaf) continue;
2124       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2125     }
2126   }
2127 
2128   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
2129   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
2130   for (i=0; i<numRootsB; i++) {
2131     remotePointsBA[i].rank = -1;
2132     remotePointsBA[i].index = -1;
2133   }
2134 
2135   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2136   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
2137   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
2138   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2139     if (remotePointsBA[i].rank == -1) continue;
2140     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2141     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2142     localPointsBA[numLeavesBA] = i;
2143     numLeavesBA++;
2144   }
2145   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
2146   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
2147   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 /*
2152   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2153 
2154   Input Parameters:
2155 . sf - The global PetscSF
2156 
2157   Output Parameters:
2158 . out - The local PetscSF
2159  */
2160 PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2161 {
2162   MPI_Comm           comm;
2163   PetscMPIInt        myrank;
2164   const PetscInt     *ilocal;
2165   const PetscSFNode  *iremote;
2166   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2167   PetscSFNode        *liremote;
2168   PetscSF            lsf;
2169   PetscErrorCode     ierr;
2170 
2171   PetscFunctionBegin;
2172   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2173   if (sf->ops->CreateLocalSF) {
2174     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
2175   } else {
2176     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2177     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
2178     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
2179 
2180     /* Find out local edges and build a local SF */
2181     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
2182     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2183     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
2184     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
2185 
2186     for (i=j=0; i<nleaves; i++) {
2187       if (iremote[i].rank == (PetscInt)myrank) {
2188         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2189         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2190         liremote[j].index = iremote[i].index;
2191         j++;
2192       }
2193     }
2194     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
2195     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
2196     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
2197     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
2198     *out = lsf;
2199   }
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2204 PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2205 {
2206   PetscErrorCode     ierr;
2207   PetscMemType       rootmtype,leafmtype;
2208 
2209   PetscFunctionBegin;
2210   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2211   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2212   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2213   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2214   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2215   if (sf->ops->BcastToZero) {
2216     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2217   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2218   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222