xref: /petsc/src/vec/is/sf/interface/sf.c (revision d0295fc027abbea29f13fa82c19cf92da8e9ba99)
1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h>
395fce210SBarry Smith #include <petscctable.h>
495fce210SBarry Smith 
57fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
67fd2d3dbSJunchao Zhang   #include <cuda_runtime.h>
77fd2d3dbSJunchao Zhang #endif
87fd2d3dbSJunchao Zhang 
97fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
107fd2d3dbSJunchao Zhang   #include <hip/hip_runtime.h>
117fd2d3dbSJunchao Zhang #endif
127fd2d3dbSJunchao Zhang 
1395fce210SBarry Smith #if defined(PETSC_USE_DEBUG)
1495fce210SBarry Smith #  define PetscSFCheckGraphSet(sf,arg) do {                          \
1595fce210SBarry Smith     if (PetscUnlikely(!(sf)->graphset))                              \
16dd5b3ca6SJunchao Zhang       SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
1795fce210SBarry Smith   } while (0)
1895fce210SBarry Smith #else
1995fce210SBarry Smith #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
2095fce210SBarry Smith #endif
2195fce210SBarry Smith 
224c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
2395fce210SBarry Smith 
247fd2d3dbSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type)
257fd2d3dbSJunchao Zhang {
267fd2d3dbSJunchao Zhang   PetscFunctionBegin;
277fd2d3dbSJunchao Zhang   PetscValidPointer(type,2);
287fd2d3dbSJunchao Zhang   *type = PETSC_MEMTYPE_HOST;
297fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
307fd2d3dbSJunchao Zhang   if (PetscCUDAInitialized && data) {
317fd2d3dbSJunchao Zhang     cudaError_t                  cerr;
327fd2d3dbSJunchao Zhang     struct cudaPointerAttributes attr;
337fd2d3dbSJunchao Zhang     enum cudaMemoryType          mtype;
347fd2d3dbSJunchao Zhang     cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
357fd2d3dbSJunchao Zhang     cudaGetLastError(); /* Reset the last error */
367fd2d3dbSJunchao Zhang     #if (CUDART_VERSION < 10000)
377fd2d3dbSJunchao Zhang       mtype = attr.memoryType;
387fd2d3dbSJunchao Zhang     #else
397fd2d3dbSJunchao Zhang       mtype = attr.type;
407fd2d3dbSJunchao Zhang     #endif
417fd2d3dbSJunchao Zhang     if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
427fd2d3dbSJunchao Zhang   }
437fd2d3dbSJunchao Zhang #endif
447fd2d3dbSJunchao Zhang 
457fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP)
467fd2d3dbSJunchao Zhang   if (PetscHIPInitialized && data) {
477fd2d3dbSJunchao Zhang     hipError_t                   cerr;
487fd2d3dbSJunchao Zhang     struct hipPointerAttribute_t attr;
497fd2d3dbSJunchao Zhang     enum hipMemoryType           mtype;
507fd2d3dbSJunchao Zhang     cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */
517fd2d3dbSJunchao Zhang     hipGetLastError(); /* Reset the last error */
527fd2d3dbSJunchao Zhang     mtype = attr.memoryType;
537fd2d3dbSJunchao Zhang     if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE;
547fd2d3dbSJunchao Zhang   }
557fd2d3dbSJunchao Zhang #endif
567fd2d3dbSJunchao Zhang   PetscFunctionReturn(0);
577fd2d3dbSJunchao Zhang }
587fd2d3dbSJunchao Zhang 
598af6ec1cSBarry Smith /*@
6095fce210SBarry Smith    PetscSFCreate - create a star forest communication context
6195fce210SBarry Smith 
62d083f849SBarry Smith    Collective
6395fce210SBarry Smith 
6495fce210SBarry Smith    Input Arguments:
6595fce210SBarry Smith .  comm - communicator on which the star forest will operate
6695fce210SBarry Smith 
6795fce210SBarry Smith    Output Arguments:
6895fce210SBarry Smith .  sf - new star forest context
6995fce210SBarry Smith 
70dd5b3ca6SJunchao Zhang    Options Database Keys:
71dd5b3ca6SJunchao Zhang +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
72dd5b3ca6SJunchao Zhang .  -sf_type window    -Use MPI-3 one-sided window for communication
73dd5b3ca6SJunchao Zhang -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication
74dd5b3ca6SJunchao Zhang 
7595fce210SBarry Smith    Level: intermediate
7695fce210SBarry Smith 
77dd5b3ca6SJunchao Zhang    Notes:
78dd5b3ca6SJunchao Zhang    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
79dd5b3ca6SJunchao Zhang    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
80dd5b3ca6SJunchao Zhang    SFs are optimized and they have better performance than general SFs.
81dd5b3ca6SJunchao Zhang 
82dd5b3ca6SJunchao Zhang .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
8395fce210SBarry Smith @*/
8495fce210SBarry Smith PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
8595fce210SBarry Smith {
8695fce210SBarry Smith   PetscErrorCode ierr;
8795fce210SBarry Smith   PetscSF        b;
8895fce210SBarry Smith 
8995fce210SBarry Smith   PetscFunctionBegin;
9095fce210SBarry Smith   PetscValidPointer(sf,2);
91607a6623SBarry Smith   ierr = PetscSFInitializePackage();CHKERRQ(ierr);
9295fce210SBarry Smith 
9373107ff1SLisandro Dalcin   ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr);
9495fce210SBarry Smith 
9595fce210SBarry Smith   b->nroots    = -1;
9695fce210SBarry Smith   b->nleaves   = -1;
9729046d53SLisandro Dalcin   b->minleaf   = PETSC_MAX_INT;
9829046d53SLisandro Dalcin   b->maxleaf   = PETSC_MIN_INT;
9995fce210SBarry Smith   b->nranks    = -1;
10095fce210SBarry Smith   b->rankorder = PETSC_TRUE;
10195fce210SBarry Smith   b->ingroup   = MPI_GROUP_NULL;
10295fce210SBarry Smith   b->outgroup  = MPI_GROUP_NULL;
10395fce210SBarry Smith   b->graphset  = PETSC_FALSE;
10420c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
10520c24465SJunchao Zhang   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
10620c24465SJunchao Zhang   b->use_stream_aware_mpi = PETSC_FALSE;
10720c24465SJunchao Zhang   b->use_default_stream   = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */
10827f636e8SJunchao Zhang   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
10920c24465SJunchao Zhang     b->backend = PETSCSF_BACKEND_KOKKOS;
11027f636e8SJunchao Zhang   #elif defined(PETSC_HAVE_CUDA)
11127f636e8SJunchao Zhang     b->backend = PETSCSF_BACKEND_CUDA;
11220c24465SJunchao Zhang   #endif
11320c24465SJunchao Zhang #endif
11495fce210SBarry Smith   *sf = b;
11595fce210SBarry Smith   PetscFunctionReturn(0);
11695fce210SBarry Smith }
11795fce210SBarry Smith 
11829046d53SLisandro Dalcin /*@
11995fce210SBarry Smith    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
12095fce210SBarry Smith 
12195fce210SBarry Smith    Collective
12295fce210SBarry Smith 
12395fce210SBarry Smith    Input Arguments:
12495fce210SBarry Smith .  sf - star forest
12595fce210SBarry Smith 
12695fce210SBarry Smith    Level: advanced
12795fce210SBarry Smith 
12895fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
12995fce210SBarry Smith @*/
13095fce210SBarry Smith PetscErrorCode PetscSFReset(PetscSF sf)
13195fce210SBarry Smith {
13295fce210SBarry Smith   PetscErrorCode ierr;
13395fce210SBarry Smith 
13495fce210SBarry Smith   PetscFunctionBegin;
13595fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
13679715d56SJed Brown   if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);}
13729046d53SLisandro Dalcin   sf->nroots   = -1;
13829046d53SLisandro Dalcin   sf->nleaves  = -1;
13929046d53SLisandro Dalcin   sf->minleaf  = PETSC_MAX_INT;
14029046d53SLisandro Dalcin   sf->maxleaf  = PETSC_MIN_INT;
14195fce210SBarry Smith   sf->mine     = NULL;
14295fce210SBarry Smith   sf->remote   = NULL;
14329046d53SLisandro Dalcin   sf->graphset = PETSC_FALSE;
14429046d53SLisandro Dalcin   ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr);
14595fce210SBarry Smith   ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr);
14621c688dcSJed Brown   sf->nranks = -1;
14729046d53SLisandro Dalcin   ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr);
1487fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
14920c24465SJunchao Zhang   for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);}
150eb02082bSJunchao Zhang #endif
15129046d53SLisandro Dalcin   sf->degreeknown = PETSC_FALSE;
15295fce210SBarry Smith   ierr = PetscFree(sf->degree);CHKERRQ(ierr);
15395fce210SBarry Smith   if (sf->ingroup  != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);}
15495fce210SBarry Smith   if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);}
15595fce210SBarry Smith   ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr);
156dd5b3ca6SJunchao Zhang   ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr);
15795fce210SBarry Smith   sf->setupcalled = PETSC_FALSE;
15895fce210SBarry Smith   PetscFunctionReturn(0);
15995fce210SBarry Smith }
16095fce210SBarry Smith 
16195fce210SBarry Smith /*@C
16229046d53SLisandro Dalcin    PetscSFSetType - Set the PetscSF communication implementation
16395fce210SBarry Smith 
16495fce210SBarry Smith    Collective on PetscSF
16595fce210SBarry Smith 
16695fce210SBarry Smith    Input Parameters:
16795fce210SBarry Smith +  sf - the PetscSF context
16895fce210SBarry Smith -  type - a known method
16995fce210SBarry Smith 
17095fce210SBarry Smith    Options Database Key:
17195fce210SBarry Smith .  -sf_type <type> - Sets the method; use -help for a list
17270616304SStefano Zampini    of available methods (for instance, window, basic, neighbor)
17395fce210SBarry Smith 
17495fce210SBarry Smith    Notes:
17595fce210SBarry Smith    See "include/petscsf.h" for available methods (for instance)
17695fce210SBarry Smith +    PETSCSFWINDOW - MPI-2/3 one-sided
17795fce210SBarry Smith -    PETSCSFBASIC - basic implementation using MPI-1 two-sided
17895fce210SBarry Smith 
17995fce210SBarry Smith   Level: intermediate
18095fce210SBarry Smith 
18195fce210SBarry Smith .seealso: PetscSFType, PetscSFCreate()
18295fce210SBarry Smith @*/
18395fce210SBarry Smith PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
18495fce210SBarry Smith {
18595fce210SBarry Smith   PetscErrorCode ierr,(*r)(PetscSF);
18695fce210SBarry Smith   PetscBool      match;
18795fce210SBarry Smith 
18895fce210SBarry Smith   PetscFunctionBegin;
18995fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
19095fce210SBarry Smith   PetscValidCharPointer(type,2);
19195fce210SBarry Smith 
19295fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr);
19395fce210SBarry Smith   if (match) PetscFunctionReturn(0);
19495fce210SBarry Smith 
195adc40e5bSBarry Smith   ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr);
19695fce210SBarry Smith   if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type);
19729046d53SLisandro Dalcin   /* Destroy the previous PetscSF implementation context */
19829046d53SLisandro Dalcin   if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);}
19995fce210SBarry Smith   ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr);
20095fce210SBarry Smith   ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr);
20195fce210SBarry Smith   ierr = (*r)(sf);CHKERRQ(ierr);
20295fce210SBarry Smith   PetscFunctionReturn(0);
20395fce210SBarry Smith }
20495fce210SBarry Smith 
20529046d53SLisandro Dalcin /*@C
20629046d53SLisandro Dalcin   PetscSFGetType - Get the PetscSF communication implementation
20729046d53SLisandro Dalcin 
20829046d53SLisandro Dalcin   Not Collective
20929046d53SLisandro Dalcin 
21029046d53SLisandro Dalcin   Input Parameter:
21129046d53SLisandro Dalcin . sf  - the PetscSF context
21229046d53SLisandro Dalcin 
21329046d53SLisandro Dalcin   Output Parameter:
21429046d53SLisandro Dalcin . type - the PetscSF type name
21529046d53SLisandro Dalcin 
21629046d53SLisandro Dalcin   Level: intermediate
21729046d53SLisandro Dalcin 
21829046d53SLisandro Dalcin .seealso: PetscSFSetType(), PetscSFCreate()
21929046d53SLisandro Dalcin @*/
22029046d53SLisandro Dalcin PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
22129046d53SLisandro Dalcin {
22229046d53SLisandro Dalcin   PetscFunctionBegin;
22329046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1);
22429046d53SLisandro Dalcin   PetscValidPointer(type,2);
22529046d53SLisandro Dalcin   *type = ((PetscObject)sf)->type_name;
22629046d53SLisandro Dalcin   PetscFunctionReturn(0);
22729046d53SLisandro Dalcin }
22829046d53SLisandro Dalcin 
229d36d33e4SMatthew G. Knepley /*@
23095fce210SBarry Smith    PetscSFDestroy - destroy star forest
23195fce210SBarry Smith 
23295fce210SBarry Smith    Collective
23395fce210SBarry Smith 
23495fce210SBarry Smith    Input Arguments:
23595fce210SBarry Smith .  sf - address of star forest
23695fce210SBarry Smith 
23795fce210SBarry Smith    Level: intermediate
23895fce210SBarry Smith 
23995fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFReset()
24095fce210SBarry Smith @*/
24195fce210SBarry Smith PetscErrorCode PetscSFDestroy(PetscSF *sf)
24295fce210SBarry Smith {
24395fce210SBarry Smith   PetscErrorCode ierr;
24495fce210SBarry Smith 
24595fce210SBarry Smith   PetscFunctionBegin;
24695fce210SBarry Smith   if (!*sf) PetscFunctionReturn(0);
24795fce210SBarry Smith   PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1);
24829046d53SLisandro Dalcin   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);}
24995fce210SBarry Smith   ierr = PetscSFReset(*sf);CHKERRQ(ierr);
25095fce210SBarry Smith   if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);}
25195fce210SBarry Smith   ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr);
25295fce210SBarry Smith   PetscFunctionReturn(0);
25395fce210SBarry Smith }
25495fce210SBarry Smith 
255c4e6a40aSLawrence Mitchell static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
256c4e6a40aSLawrence Mitchell {
257c4e6a40aSLawrence Mitchell   PetscInt           i, nleaves;
258c4e6a40aSLawrence Mitchell   PetscMPIInt        size;
259c4e6a40aSLawrence Mitchell   const PetscInt    *ilocal;
260c4e6a40aSLawrence Mitchell   const PetscSFNode *iremote;
261c4e6a40aSLawrence Mitchell   PetscErrorCode     ierr;
262c4e6a40aSLawrence Mitchell 
263c4e6a40aSLawrence Mitchell   PetscFunctionBegin;
26476bd3646SJed Brown   if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
265c4e6a40aSLawrence Mitchell   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
266c4e6a40aSLawrence Mitchell   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
267c4e6a40aSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
268c4e6a40aSLawrence Mitchell     const PetscInt rank = iremote[i].rank;
269c4e6a40aSLawrence Mitchell     const PetscInt remote = iremote[i].index;
270c4e6a40aSLawrence Mitchell     const PetscInt leaf = ilocal ? ilocal[i] : i;
271c4e6a40aSLawrence Mitchell     if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size);
272c4e6a40aSLawrence Mitchell     if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i);
273c4e6a40aSLawrence Mitchell     if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i);
274c4e6a40aSLawrence Mitchell   }
275c4e6a40aSLawrence Mitchell   PetscFunctionReturn(0);
276c4e6a40aSLawrence Mitchell }
277c4e6a40aSLawrence Mitchell 
27820c24465SJunchao Zhang 
27920c24465SJunchao Zhang 
28020c24465SJunchao Zhang 
28195fce210SBarry Smith /*@
28295fce210SBarry Smith    PetscSFSetUp - set up communication structures
28395fce210SBarry Smith 
28495fce210SBarry Smith    Collective
28595fce210SBarry Smith 
28695fce210SBarry Smith    Input Arguments:
28795fce210SBarry Smith .  sf - star forest communication object
28895fce210SBarry Smith 
28995fce210SBarry Smith    Level: beginner
29095fce210SBarry Smith 
29195fce210SBarry Smith .seealso: PetscSFSetFromOptions(), PetscSFSetType()
29295fce210SBarry Smith @*/
29395fce210SBarry Smith PetscErrorCode PetscSFSetUp(PetscSF sf)
29495fce210SBarry Smith {
29595fce210SBarry Smith   PetscErrorCode ierr;
29695fce210SBarry Smith 
29795fce210SBarry Smith   PetscFunctionBegin;
29829046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
29929046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf,1);
30095fce210SBarry Smith   if (sf->setupcalled) PetscFunctionReturn(0);
30129046d53SLisandro Dalcin   ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
30220c24465SJunchao Zhang   ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr);
30320c24465SJunchao Zhang   if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */
30495fce210SBarry Smith   if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);}
30520c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA)
30620c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_CUDA) {
30720c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Cuda;
30820c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Cuda;
30920c24465SJunchao Zhang   }
31020c24465SJunchao Zhang #endif
31120c24465SJunchao Zhang 
31220c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS)
31320c24465SJunchao Zhang   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
31420c24465SJunchao Zhang     sf->ops->Malloc = PetscSFMalloc_Kokkos;
31520c24465SJunchao Zhang     sf->ops->Free   = PetscSFFree_Kokkos;
31620c24465SJunchao Zhang   }
31720c24465SJunchao Zhang #endif
31829046d53SLisandro Dalcin   ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr);
31995fce210SBarry Smith   sf->setupcalled = PETSC_TRUE;
32095fce210SBarry Smith   PetscFunctionReturn(0);
32195fce210SBarry Smith }
32295fce210SBarry Smith 
3238af6ec1cSBarry Smith /*@
32495fce210SBarry Smith    PetscSFSetFromOptions - set PetscSF options using the options database
32595fce210SBarry Smith 
32695fce210SBarry Smith    Logically Collective
32795fce210SBarry Smith 
32895fce210SBarry Smith    Input Arguments:
32995fce210SBarry Smith .  sf - star forest
33095fce210SBarry Smith 
33195fce210SBarry Smith    Options Database Keys:
33260263706SJed Brown +  -sf_type               - implementation type, see PetscSFSetType()
33351ccb202SJunchao Zhang .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
334b85e67b7SJunchao Zhang .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
335c2a741eeSJunchao Zhang                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
336c06a8e02SRichard Tran Mills                             If true, this option only works with -use_gpu_aware_mpi 1.
33720c24465SJunchao Zhang .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
338c06a8e02SRichard Tran Mills                                If true, this option only works with -use_gpu_aware_mpi 1.
33995fce210SBarry Smith 
34020c24465SJunchao Zhang -  -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos.
34120c24465SJunchao Zhang                               On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices,
34220c24465SJunchao Zhang                               the only available is kokkos.
34320c24465SJunchao Zhang 
34495fce210SBarry Smith    Level: intermediate
34595fce210SBarry Smith @*/
34695fce210SBarry Smith PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
34795fce210SBarry Smith {
34895fce210SBarry Smith   PetscSFType    deft;
34995fce210SBarry Smith   char           type[256];
35095fce210SBarry Smith   PetscErrorCode ierr;
35195fce210SBarry Smith   PetscBool      flg;
35295fce210SBarry Smith 
35395fce210SBarry Smith   PetscFunctionBegin;
35495fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
35595fce210SBarry Smith   ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr);
35695fce210SBarry Smith   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
35729046d53SLisandro Dalcin   ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr);
35895fce210SBarry Smith   ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr);
35995fce210SBarry Smith   ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr);
3607fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
36120c24465SJunchao Zhang   {
36220c24465SJunchao Zhang     char        backendstr[32] = {0};
36320c24465SJunchao Zhang     PetscBool   isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
36420c24465SJunchao Zhang     /* Change the defaults set in PetscSFCreate() with command line options */
365cd620004SJunchao Zhang     ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr);
366b85e67b7SJunchao Zhang     ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr);
36720c24465SJunchao Zhang     ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr);
36820c24465SJunchao Zhang     ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr);
36920c24465SJunchao Zhang     ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr);
37020c24465SJunchao Zhang   #if defined(PETSC_HAVE_CUDA)
37120c24465SJunchao Zhang     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
37220c24465SJunchao Zhang     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
37320c24465SJunchao Zhang     else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr);
37420c24465SJunchao Zhang   #elif defined(PETSC_HAVE_KOKKOS)
37520c24465SJunchao Zhang     if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr);
37620c24465SJunchao Zhang   #endif
37720c24465SJunchao Zhang   }
378c2a741eeSJunchao Zhang #endif
379e55864a3SBarry Smith   if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);}
38095fce210SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
38195fce210SBarry Smith   PetscFunctionReturn(0);
38295fce210SBarry Smith }
38395fce210SBarry Smith 
38429046d53SLisandro Dalcin /*@
38595fce210SBarry Smith    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
38695fce210SBarry Smith 
38795fce210SBarry Smith    Logically Collective
38895fce210SBarry Smith 
38995fce210SBarry Smith    Input Arguments:
39095fce210SBarry Smith +  sf - star forest
39195fce210SBarry Smith -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
39295fce210SBarry Smith 
39395fce210SBarry Smith    Level: advanced
39495fce210SBarry Smith 
39595fce210SBarry Smith .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
39695fce210SBarry Smith @*/
39795fce210SBarry Smith PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
39895fce210SBarry Smith {
39995fce210SBarry Smith   PetscFunctionBegin;
40095fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
40195fce210SBarry Smith   PetscValidLogicalCollectiveBool(sf,flg,2);
40295fce210SBarry Smith   if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()");
40395fce210SBarry Smith   sf->rankorder = flg;
40495fce210SBarry Smith   PetscFunctionReturn(0);
40595fce210SBarry Smith }
40695fce210SBarry Smith 
4078af6ec1cSBarry Smith /*@
40895fce210SBarry Smith    PetscSFSetGraph - Set a parallel star forest
40995fce210SBarry Smith 
41095fce210SBarry Smith    Collective
41195fce210SBarry Smith 
41295fce210SBarry Smith    Input Arguments:
41395fce210SBarry Smith +  sf - star forest
41495fce210SBarry Smith .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
41595fce210SBarry Smith .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
416c4e6a40aSLawrence Mitchell .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
417c4e6a40aSLawrence Mitchell during setup in debug mode)
41895fce210SBarry Smith .  localmode - copy mode for ilocal
419c4e6a40aSLawrence Mitchell .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
420c4e6a40aSLawrence Mitchell during setup in debug mode)
42195fce210SBarry Smith -  remotemode - copy mode for iremote
42295fce210SBarry Smith 
42395fce210SBarry Smith    Level: intermediate
42495fce210SBarry Smith 
42595452b02SPatrick Sanan    Notes:
42695452b02SPatrick Sanan     In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode
42738ab3f8aSBarry Smith 
4282ad1e87fSLisandro Dalcin    Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they
4292ad1e87fSLisandro Dalcin    encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not
4302ad1e87fSLisandro Dalcin    needed
4312ad1e87fSLisandro Dalcin 
432c4e6a40aSLawrence Mitchell    Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf
433c4e6a40aSLawrence Mitchell    indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode).
434c4e6a40aSLawrence Mitchell 
43595fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
43695fce210SBarry Smith @*/
43795fce210SBarry Smith PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode)
43895fce210SBarry Smith {
43995fce210SBarry Smith   PetscErrorCode ierr;
44095fce210SBarry Smith 
44195fce210SBarry Smith   PetscFunctionBegin;
44295fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
44329046d53SLisandro Dalcin   if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4);
44429046d53SLisandro Dalcin   if (nleaves > 0) PetscValidPointer(iremote,6);
44529046d53SLisandro Dalcin   if (nroots  < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots);
44695fce210SBarry Smith   if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves);
44729046d53SLisandro Dalcin 
4482a67d2daSStefano Zampini   if (sf->nroots >= 0) { /* Reset only if graph already set */
44995fce210SBarry Smith     ierr = PetscSFReset(sf);CHKERRQ(ierr);
4502a67d2daSStefano Zampini   }
4512a67d2daSStefano Zampini 
45229046d53SLisandro Dalcin   ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
45329046d53SLisandro Dalcin 
45495fce210SBarry Smith   sf->nroots  = nroots;
45595fce210SBarry Smith   sf->nleaves = nleaves;
45629046d53SLisandro Dalcin 
45729046d53SLisandro Dalcin   if (nleaves && ilocal) {
45821c688dcSJed Brown     PetscInt i;
45929046d53SLisandro Dalcin     PetscInt minleaf = PETSC_MAX_INT;
46029046d53SLisandro Dalcin     PetscInt maxleaf = PETSC_MIN_INT;
4612ad1e87fSLisandro Dalcin     int      contiguous = 1;
46229046d53SLisandro Dalcin     for (i=0; i<nleaves; i++) {
46329046d53SLisandro Dalcin       minleaf = PetscMin(minleaf,ilocal[i]);
46429046d53SLisandro Dalcin       maxleaf = PetscMax(maxleaf,ilocal[i]);
4652ad1e87fSLisandro Dalcin       contiguous &= (ilocal[i] == i);
46629046d53SLisandro Dalcin     }
46729046d53SLisandro Dalcin     sf->minleaf = minleaf;
46829046d53SLisandro Dalcin     sf->maxleaf = maxleaf;
4692ad1e87fSLisandro Dalcin     if (contiguous) {
4702ad1e87fSLisandro Dalcin       if (localmode == PETSC_OWN_POINTER) {
4712ad1e87fSLisandro Dalcin         ierr = PetscFree(ilocal);CHKERRQ(ierr);
4722ad1e87fSLisandro Dalcin       }
4732ad1e87fSLisandro Dalcin       ilocal = NULL;
4742ad1e87fSLisandro Dalcin     }
47529046d53SLisandro Dalcin   } else {
47629046d53SLisandro Dalcin     sf->minleaf = 0;
47729046d53SLisandro Dalcin     sf->maxleaf = nleaves - 1;
47829046d53SLisandro Dalcin   }
47929046d53SLisandro Dalcin 
48029046d53SLisandro Dalcin   if (ilocal) {
48195fce210SBarry Smith     switch (localmode) {
48295fce210SBarry Smith     case PETSC_COPY_VALUES:
483785e854fSJed Brown       ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr);
484580bdb30SBarry Smith       ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr);
48595fce210SBarry Smith       sf->mine = sf->mine_alloc;
48695fce210SBarry Smith       break;
48795fce210SBarry Smith     case PETSC_OWN_POINTER:
48895fce210SBarry Smith       sf->mine_alloc = (PetscInt*)ilocal;
48995fce210SBarry Smith       sf->mine       = sf->mine_alloc;
49095fce210SBarry Smith       break;
49195fce210SBarry Smith     case PETSC_USE_POINTER:
49229046d53SLisandro Dalcin       sf->mine_alloc = NULL;
49395fce210SBarry Smith       sf->mine       = (PetscInt*)ilocal;
49495fce210SBarry Smith       break;
49595fce210SBarry Smith     default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode");
49695fce210SBarry Smith     }
49795fce210SBarry Smith   }
49829046d53SLisandro Dalcin 
49995fce210SBarry Smith   switch (remotemode) {
50095fce210SBarry Smith   case PETSC_COPY_VALUES:
501785e854fSJed Brown     ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr);
502580bdb30SBarry Smith     ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr);
50395fce210SBarry Smith     sf->remote = sf->remote_alloc;
50495fce210SBarry Smith     break;
50595fce210SBarry Smith   case PETSC_OWN_POINTER:
50695fce210SBarry Smith     sf->remote_alloc = (PetscSFNode*)iremote;
50795fce210SBarry Smith     sf->remote       = sf->remote_alloc;
50895fce210SBarry Smith     break;
50995fce210SBarry Smith   case PETSC_USE_POINTER:
51029046d53SLisandro Dalcin     sf->remote_alloc = NULL;
51195fce210SBarry Smith     sf->remote       = (PetscSFNode*)iremote;
51295fce210SBarry Smith     break;
51395fce210SBarry Smith   default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode");
51495fce210SBarry Smith   }
51595fce210SBarry Smith 
516563ffbabSMatthew G. Knepley   ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr);
51729046d53SLisandro Dalcin   sf->graphset = PETSC_TRUE;
51895fce210SBarry Smith   PetscFunctionReturn(0);
51995fce210SBarry Smith }
52095fce210SBarry Smith 
52129046d53SLisandro Dalcin /*@
522dd5b3ca6SJunchao Zhang   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
523dd5b3ca6SJunchao Zhang 
524dd5b3ca6SJunchao Zhang   Collective
525dd5b3ca6SJunchao Zhang 
526dd5b3ca6SJunchao Zhang   Input Parameters:
527dd5b3ca6SJunchao Zhang + sf      - The PetscSF
528dd5b3ca6SJunchao Zhang . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
529dd5b3ca6SJunchao Zhang - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
530dd5b3ca6SJunchao Zhang 
531dd5b3ca6SJunchao Zhang   Notes:
532dd5b3ca6SJunchao Zhang   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
533dd5b3ca6SJunchao Zhang   n and N are local and global sizes of x respectively.
534dd5b3ca6SJunchao Zhang 
535dd5b3ca6SJunchao Zhang   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
536dd5b3ca6SJunchao Zhang   sequential vectors y on all ranks.
537dd5b3ca6SJunchao Zhang 
538dd5b3ca6SJunchao Zhang   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
539dd5b3ca6SJunchao Zhang   sequential vector y on rank 0.
540dd5b3ca6SJunchao Zhang 
541dd5b3ca6SJunchao Zhang   In above cases, entries of x are roots and entries of y are leaves.
542dd5b3ca6SJunchao Zhang 
543dd5b3ca6SJunchao Zhang   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
544dd5b3ca6SJunchao Zhang   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
545dd5b3ca6SJunchao Zhang   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
546dd5b3ca6SJunchao Zhang   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
547dd5b3ca6SJunchao Zhang   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
548dd5b3ca6SJunchao Zhang 
549dd5b3ca6SJunchao Zhang   In this case, roots and leaves are symmetric.
550dd5b3ca6SJunchao Zhang 
551dd5b3ca6SJunchao Zhang   Level: intermediate
552dd5b3ca6SJunchao Zhang  @*/
553dd5b3ca6SJunchao Zhang PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
554dd5b3ca6SJunchao Zhang {
555dd5b3ca6SJunchao Zhang   MPI_Comm       comm;
556dd5b3ca6SJunchao Zhang   PetscInt       n,N,res[2];
557dd5b3ca6SJunchao Zhang   PetscMPIInt    rank,size;
558dd5b3ca6SJunchao Zhang   PetscSFType    type;
559dd5b3ca6SJunchao Zhang   PetscErrorCode ierr;
560dd5b3ca6SJunchao Zhang 
561dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
562dd5b3ca6SJunchao Zhang   ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr);
563dd5b3ca6SJunchao Zhang   if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern);
564dd5b3ca6SJunchao Zhang   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
565dd5b3ca6SJunchao Zhang   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
566dd5b3ca6SJunchao Zhang 
567dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
568dd5b3ca6SJunchao Zhang     type = PETSCSFALLTOALL;
569dd5b3ca6SJunchao Zhang     ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr);
570dd5b3ca6SJunchao Zhang     ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr);
571dd5b3ca6SJunchao Zhang     ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr);
572dd5b3ca6SJunchao Zhang     ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr);
573dd5b3ca6SJunchao Zhang   } else {
574dd5b3ca6SJunchao Zhang     ierr   = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr);
575dd5b3ca6SJunchao Zhang     ierr   = PetscLayoutGetSize(map,&N);CHKERRQ(ierr);
576dd5b3ca6SJunchao Zhang     res[0] = n;
577dd5b3ca6SJunchao Zhang     res[1] = -n;
578dd5b3ca6SJunchao Zhang     /* Check if n are same over all ranks so that we can optimize it */
579dd5b3ca6SJunchao Zhang     ierr   = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
580dd5b3ca6SJunchao Zhang     if (res[0] == -res[1]) { /* same n */
581dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
582dd5b3ca6SJunchao Zhang     } else {
583dd5b3ca6SJunchao Zhang       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
584dd5b3ca6SJunchao Zhang     }
585dd5b3ca6SJunchao Zhang     ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr);
586dd5b3ca6SJunchao Zhang   }
587dd5b3ca6SJunchao Zhang   ierr = PetscSFSetType(sf,type);CHKERRQ(ierr);
588dd5b3ca6SJunchao Zhang 
589dd5b3ca6SJunchao Zhang   sf->pattern = pattern;
590dd5b3ca6SJunchao Zhang   sf->mine    = NULL; /* Contiguous */
591dd5b3ca6SJunchao Zhang 
592dd5b3ca6SJunchao Zhang   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
593dd5b3ca6SJunchao Zhang      Also set other easy stuff.
594dd5b3ca6SJunchao Zhang    */
595dd5b3ca6SJunchao Zhang   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
596dd5b3ca6SJunchao Zhang     sf->nleaves      = N;
597dd5b3ca6SJunchao Zhang     sf->nroots       = n;
598dd5b3ca6SJunchao Zhang     sf->nranks       = size;
599dd5b3ca6SJunchao Zhang     sf->minleaf      = 0;
600dd5b3ca6SJunchao Zhang     sf->maxleaf      = N - 1;
601dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_GATHER) {
602dd5b3ca6SJunchao Zhang     sf->nleaves      = rank ? 0 : N;
603dd5b3ca6SJunchao Zhang     sf->nroots       = n;
604dd5b3ca6SJunchao Zhang     sf->nranks       = rank ? 0 : size;
605dd5b3ca6SJunchao Zhang     sf->minleaf      = 0;
606dd5b3ca6SJunchao Zhang     sf->maxleaf      = rank ? -1 : N - 1;
607dd5b3ca6SJunchao Zhang   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
608dd5b3ca6SJunchao Zhang     sf->nleaves      = size;
609dd5b3ca6SJunchao Zhang     sf->nroots       = size;
610dd5b3ca6SJunchao Zhang     sf->nranks       = size;
611dd5b3ca6SJunchao Zhang     sf->minleaf      = 0;
612dd5b3ca6SJunchao Zhang     sf->maxleaf      = size - 1;
613dd5b3ca6SJunchao Zhang   }
614dd5b3ca6SJunchao Zhang   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
615dd5b3ca6SJunchao Zhang   sf->graphset = PETSC_TRUE;
616dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
617dd5b3ca6SJunchao Zhang }
618dd5b3ca6SJunchao Zhang 
619dd5b3ca6SJunchao Zhang /*@
62095fce210SBarry Smith    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
62195fce210SBarry Smith 
62295fce210SBarry Smith    Collective
62395fce210SBarry Smith 
62495fce210SBarry Smith    Input Arguments:
625dd5b3ca6SJunchao Zhang 
62695fce210SBarry Smith .  sf - star forest to invert
62795fce210SBarry Smith 
62895fce210SBarry Smith    Output Arguments:
62995fce210SBarry Smith .  isf - inverse of sf
63095fce210SBarry Smith    Level: advanced
63195fce210SBarry Smith 
63295fce210SBarry Smith    Notes:
63395fce210SBarry Smith    All roots must have degree 1.
63495fce210SBarry Smith 
63595fce210SBarry Smith    The local space may be a permutation, but cannot be sparse.
63695fce210SBarry Smith 
63795fce210SBarry Smith .seealso: PetscSFSetGraph()
63895fce210SBarry Smith @*/
63995fce210SBarry Smith PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
64095fce210SBarry Smith {
64195fce210SBarry Smith   PetscErrorCode ierr;
64295fce210SBarry Smith   PetscMPIInt    rank;
64395fce210SBarry Smith   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
64495fce210SBarry Smith   const PetscInt *ilocal;
64595fce210SBarry Smith   PetscSFNode    *roots,*leaves;
64695fce210SBarry Smith 
64795fce210SBarry Smith   PetscFunctionBegin;
64829046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
64929046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf,1);
65029046d53SLisandro Dalcin   PetscValidPointer(isf,2);
65129046d53SLisandro Dalcin 
65295fce210SBarry Smith   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
65329046d53SLisandro Dalcin   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
65429046d53SLisandro Dalcin 
65529046d53SLisandro Dalcin   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
656ae9aee6dSMatthew G. Knepley   ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr);
657ae9aee6dSMatthew G. Knepley   for (i=0; i<maxlocal; i++) {
65895fce210SBarry Smith     leaves[i].rank  = rank;
65995fce210SBarry Smith     leaves[i].index = i;
66095fce210SBarry Smith   }
66195fce210SBarry Smith   for (i=0; i <nroots; i++) {
66295fce210SBarry Smith     roots[i].rank  = -1;
66395fce210SBarry Smith     roots[i].index = -1;
66495fce210SBarry Smith   }
6658bfbc91cSJed Brown   ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
6668bfbc91cSJed Brown   ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr);
66795fce210SBarry Smith 
66895fce210SBarry Smith   /* Check whether our leaves are sparse */
66995fce210SBarry Smith   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
67095fce210SBarry Smith   if (count == nroots) newilocal = NULL;
67195fce210SBarry Smith   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
672785e854fSJed Brown     ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr);
67395fce210SBarry Smith     for (i=0,count=0; i<nroots; i++) {
67495fce210SBarry Smith       if (roots[i].rank >= 0) {
67595fce210SBarry Smith         newilocal[count]   = i;
67695fce210SBarry Smith         roots[count].rank  = roots[i].rank;
67795fce210SBarry Smith         roots[count].index = roots[i].index;
67895fce210SBarry Smith         count++;
67995fce210SBarry Smith       }
68095fce210SBarry Smith     }
68195fce210SBarry Smith   }
68295fce210SBarry Smith 
68395fce210SBarry Smith   ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr);
68495fce210SBarry Smith   ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr);
68595fce210SBarry Smith   ierr = PetscFree2(roots,leaves);CHKERRQ(ierr);
68695fce210SBarry Smith   PetscFunctionReturn(0);
68795fce210SBarry Smith }
68895fce210SBarry Smith 
68995fce210SBarry Smith /*@
69095fce210SBarry Smith    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
69195fce210SBarry Smith 
69295fce210SBarry Smith    Collective
69395fce210SBarry Smith 
69495fce210SBarry Smith    Input Arguments:
69595fce210SBarry Smith +  sf - communication object to duplicate
69695fce210SBarry Smith -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
69795fce210SBarry Smith 
69895fce210SBarry Smith    Output Arguments:
69995fce210SBarry Smith .  newsf - new communication object
70095fce210SBarry Smith 
70195fce210SBarry Smith    Level: beginner
70295fce210SBarry Smith 
70395fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
70495fce210SBarry Smith @*/
70595fce210SBarry Smith PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
70695fce210SBarry Smith {
70729046d53SLisandro Dalcin   PetscSFType    type;
70895fce210SBarry Smith   PetscErrorCode ierr;
70995fce210SBarry Smith 
71095fce210SBarry Smith   PetscFunctionBegin;
71129046d53SLisandro Dalcin   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
71229046d53SLisandro Dalcin   PetscValidLogicalCollectiveEnum(sf,opt,2);
71329046d53SLisandro Dalcin   PetscValidPointer(newsf,3);
71495fce210SBarry Smith   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr);
71529046d53SLisandro Dalcin   ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
71629046d53SLisandro Dalcin   if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);}
71795fce210SBarry Smith   if (opt == PETSCSF_DUPLICATE_GRAPH) {
718dd5b3ca6SJunchao Zhang     PetscSFCheckGraphSet(sf,1);
719dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
72095fce210SBarry Smith       PetscInt          nroots,nleaves;
72195fce210SBarry Smith       const PetscInt    *ilocal;
72295fce210SBarry Smith       const PetscSFNode *iremote;
72395fce210SBarry Smith       ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
72495fce210SBarry Smith       ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr);
725dd5b3ca6SJunchao Zhang     } else {
726dd5b3ca6SJunchao Zhang       ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr);
727dd5b3ca6SJunchao Zhang     }
72895fce210SBarry Smith   }
72920c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE)
73020c24465SJunchao Zhang   (*newsf)->backend              = sf->backend;
73120c24465SJunchao Zhang   (*newsf)->use_default_stream   = sf->use_default_stream;
73220c24465SJunchao Zhang   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
73320c24465SJunchao Zhang   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
73420c24465SJunchao Zhang #endif
73529046d53SLisandro Dalcin   if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);}
73620c24465SJunchao Zhang   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
73795fce210SBarry Smith   PetscFunctionReturn(0);
73895fce210SBarry Smith }
73995fce210SBarry Smith 
74095fce210SBarry Smith /*@C
74195fce210SBarry Smith    PetscSFGetGraph - Get the graph specifying a parallel star forest
74295fce210SBarry Smith 
74395fce210SBarry Smith    Not Collective
74495fce210SBarry Smith 
74595fce210SBarry Smith    Input Arguments:
74695fce210SBarry Smith .  sf - star forest
74795fce210SBarry Smith 
74895fce210SBarry Smith    Output Arguments:
74995fce210SBarry Smith +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
75095fce210SBarry Smith .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
75195fce210SBarry Smith .  ilocal - locations of leaves in leafdata buffers
75295fce210SBarry Smith -  iremote - remote locations of root vertices for each leaf on the current process
75395fce210SBarry Smith 
754373e0d91SLisandro Dalcin    Notes:
755373e0d91SLisandro Dalcin    We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
756373e0d91SLisandro Dalcin 
757245d9833Sprj-    When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you
758ca797d7aSLawrence Mitchell    want to update the graph, you must call PetscSFSetGraph after modifying the iremote array.
759ca797d7aSLawrence Mitchell 
76095fce210SBarry Smith    Level: intermediate
76195fce210SBarry Smith 
76295fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
76395fce210SBarry Smith @*/
76495fce210SBarry Smith PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
76595fce210SBarry Smith {
766b8dee149SJunchao Zhang   PetscErrorCode ierr;
76795fce210SBarry Smith 
76895fce210SBarry Smith   PetscFunctionBegin;
76995fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
770b8dee149SJunchao Zhang   if (sf->ops->GetGraph) {
771b8dee149SJunchao Zhang     ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr);
772b8dee149SJunchao Zhang   } else {
77395fce210SBarry Smith     if (nroots) *nroots = sf->nroots;
77495fce210SBarry Smith     if (nleaves) *nleaves = sf->nleaves;
77595fce210SBarry Smith     if (ilocal) *ilocal = sf->mine;
77695fce210SBarry Smith     if (iremote) *iremote = sf->remote;
777b8dee149SJunchao Zhang   }
77895fce210SBarry Smith   PetscFunctionReturn(0);
77995fce210SBarry Smith }
78095fce210SBarry Smith 
78129046d53SLisandro Dalcin /*@
78295fce210SBarry Smith    PetscSFGetLeafRange - Get the active leaf ranges
78395fce210SBarry Smith 
78495fce210SBarry Smith    Not Collective
78595fce210SBarry Smith 
78695fce210SBarry Smith    Input Arguments:
78795fce210SBarry Smith .  sf - star forest
78895fce210SBarry Smith 
78995fce210SBarry Smith    Output Arguments:
790dd5b3ca6SJunchao Zhang +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
791dd5b3ca6SJunchao Zhang -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
79295fce210SBarry Smith 
79395fce210SBarry Smith    Level: developer
79495fce210SBarry Smith 
79595fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
79695fce210SBarry Smith @*/
79795fce210SBarry Smith PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
79895fce210SBarry Smith {
79995fce210SBarry Smith   PetscFunctionBegin;
80095fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
80129046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf,1);
80295fce210SBarry Smith   if (minleaf) *minleaf = sf->minleaf;
80395fce210SBarry Smith   if (maxleaf) *maxleaf = sf->maxleaf;
80495fce210SBarry Smith   PetscFunctionReturn(0);
80595fce210SBarry Smith }
80695fce210SBarry Smith 
80795fce210SBarry Smith /*@C
808fe2efc57SMark    PetscSFViewFromOptions - View from Options
809fe2efc57SMark 
810fe2efc57SMark    Collective on PetscSF
811fe2efc57SMark 
812fe2efc57SMark    Input Parameters:
813fe2efc57SMark +  A - the star forest
814736c3998SJose E. Roman .  obj - Optional object
815736c3998SJose E. Roman -  name - command line option
816fe2efc57SMark 
817fe2efc57SMark    Level: intermediate
818fe2efc57SMark .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
819fe2efc57SMark @*/
820fe2efc57SMark PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
821fe2efc57SMark {
822fe2efc57SMark   PetscErrorCode ierr;
823fe2efc57SMark 
824fe2efc57SMark   PetscFunctionBegin;
825fe2efc57SMark   PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1);
826fe2efc57SMark   ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr);
827fe2efc57SMark   PetscFunctionReturn(0);
828fe2efc57SMark }
829fe2efc57SMark 
830fe2efc57SMark /*@C
83195fce210SBarry Smith    PetscSFView - view a star forest
83295fce210SBarry Smith 
83395fce210SBarry Smith    Collective
83495fce210SBarry Smith 
83595fce210SBarry Smith    Input Arguments:
83695fce210SBarry Smith +  sf - star forest
83795fce210SBarry Smith -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
83895fce210SBarry Smith 
83995fce210SBarry Smith    Level: beginner
84095fce210SBarry Smith 
84195fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetGraph()
84295fce210SBarry Smith @*/
84395fce210SBarry Smith PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
84495fce210SBarry Smith {
84595fce210SBarry Smith   PetscErrorCode    ierr;
84695fce210SBarry Smith   PetscBool         iascii;
84795fce210SBarry Smith   PetscViewerFormat format;
84895fce210SBarry Smith 
84995fce210SBarry Smith   PetscFunctionBegin;
85095fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
85195fce210SBarry Smith   if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);}
85295fce210SBarry Smith   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
85395fce210SBarry Smith   PetscCheckSameComm(sf,1,viewer,2);
85480153354SVaclav Hapla   if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);}
85595fce210SBarry Smith   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
85695fce210SBarry Smith   if (iascii) {
85795fce210SBarry Smith     PetscMPIInt rank;
85881bfa7aaSJed Brown     PetscInt    ii,i,j;
85995fce210SBarry Smith 
860dae58748SBarry Smith     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr);
86195fce210SBarry Smith     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
86295fce210SBarry Smith     if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);}
863dd5b3ca6SJunchao Zhang     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
86480153354SVaclav Hapla       if (!sf->graphset) {
86580153354SVaclav Hapla         ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr);
86680153354SVaclav Hapla         ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
86780153354SVaclav Hapla         PetscFunctionReturn(0);
86880153354SVaclav Hapla       }
86995fce210SBarry Smith       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
8701575c14dSBarry Smith       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
87195fce210SBarry Smith       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr);
87295fce210SBarry Smith       for (i=0; i<sf->nleaves; i++) {
87395fce210SBarry Smith         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr);
87495fce210SBarry Smith       }
87595fce210SBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
87695fce210SBarry Smith       ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
87795fce210SBarry Smith       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
87881bfa7aaSJed Brown         PetscMPIInt *tmpranks,*perm;
87981bfa7aaSJed Brown         ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr);
880580bdb30SBarry Smith         ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr);
88181bfa7aaSJed Brown         for (i=0; i<sf->nranks; i++) perm[i] = i;
88281bfa7aaSJed Brown         ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr);
88395fce210SBarry Smith         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr);
88481bfa7aaSJed Brown         for (ii=0; ii<sf->nranks; ii++) {
88581bfa7aaSJed Brown           i = perm[ii];
8867904a332SBarry Smith           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr);
88795fce210SBarry Smith           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
88895fce210SBarry Smith             ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr);
88995fce210SBarry Smith           }
89095fce210SBarry Smith         }
89181bfa7aaSJed Brown         ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr);
89295fce210SBarry Smith       }
89395fce210SBarry Smith       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
8941575c14dSBarry Smith       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
895dd5b3ca6SJunchao Zhang     }
89695fce210SBarry Smith     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
89795fce210SBarry Smith   }
89895fce210SBarry Smith   PetscFunctionReturn(0);
89995fce210SBarry Smith }
90095fce210SBarry Smith 
90195fce210SBarry Smith /*@C
902dec1416fSJunchao Zhang    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
90395fce210SBarry Smith 
90495fce210SBarry Smith    Not Collective
90595fce210SBarry Smith 
90695fce210SBarry Smith    Input Arguments:
90795fce210SBarry Smith .  sf - star forest
90895fce210SBarry Smith 
90995fce210SBarry Smith    Output Arguments:
91095fce210SBarry Smith +  nranks - number of ranks referenced by local part
91195fce210SBarry Smith .  ranks - array of ranks
91295fce210SBarry Smith .  roffset - offset in rmine/rremote for each rank (length nranks+1)
91395fce210SBarry Smith .  rmine - concatenated array holding local indices referencing each remote rank
91495fce210SBarry Smith -  rremote - concatenated array holding remote indices referenced for each remote rank
91595fce210SBarry Smith 
91695fce210SBarry Smith    Level: developer
91795fce210SBarry Smith 
918dec1416fSJunchao Zhang .seealso: PetscSFGetLeafRanks()
91995fce210SBarry Smith @*/
920dec1416fSJunchao Zhang PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
92195fce210SBarry Smith {
922dec1416fSJunchao Zhang   PetscErrorCode ierr;
92395fce210SBarry Smith 
92495fce210SBarry Smith   PetscFunctionBegin;
92595fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
92629046d53SLisandro Dalcin   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
927dec1416fSJunchao Zhang   if (sf->ops->GetRootRanks) {
928dec1416fSJunchao Zhang     ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr);
929dec1416fSJunchao Zhang   } else {
930dec1416fSJunchao Zhang     /* The generic implementation */
93195fce210SBarry Smith     if (nranks)  *nranks  = sf->nranks;
93295fce210SBarry Smith     if (ranks)   *ranks   = sf->ranks;
93395fce210SBarry Smith     if (roffset) *roffset = sf->roffset;
93495fce210SBarry Smith     if (rmine)   *rmine   = sf->rmine;
93595fce210SBarry Smith     if (rremote) *rremote = sf->rremote;
936dec1416fSJunchao Zhang   }
93795fce210SBarry Smith   PetscFunctionReturn(0);
93895fce210SBarry Smith }
93995fce210SBarry Smith 
9408750ddebSJunchao Zhang /*@C
9418750ddebSJunchao Zhang    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
9428750ddebSJunchao Zhang 
9438750ddebSJunchao Zhang    Not Collective
9448750ddebSJunchao Zhang 
9458750ddebSJunchao Zhang    Input Arguments:
9468750ddebSJunchao Zhang .  sf - star forest
9478750ddebSJunchao Zhang 
9488750ddebSJunchao Zhang    Output Arguments:
9498750ddebSJunchao Zhang +  niranks - number of leaf ranks referencing roots on this process
9508750ddebSJunchao Zhang .  iranks - array of ranks
9518750ddebSJunchao Zhang .  ioffset - offset in irootloc for each rank (length niranks+1)
9528750ddebSJunchao Zhang -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank
9538750ddebSJunchao Zhang 
9548750ddebSJunchao Zhang    Level: developer
9558750ddebSJunchao Zhang 
956dec1416fSJunchao Zhang .seealso: PetscSFGetRootRanks()
9578750ddebSJunchao Zhang @*/
9588750ddebSJunchao Zhang PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
9598750ddebSJunchao Zhang {
9608750ddebSJunchao Zhang   PetscErrorCode ierr;
9618750ddebSJunchao Zhang 
9628750ddebSJunchao Zhang   PetscFunctionBegin;
9638750ddebSJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
9648750ddebSJunchao Zhang   if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks");
9658750ddebSJunchao Zhang   if (sf->ops->GetLeafRanks) {
9668750ddebSJunchao Zhang     ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr);
9678750ddebSJunchao Zhang   } else {
9688750ddebSJunchao Zhang     PetscSFType type;
9698750ddebSJunchao Zhang     ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr);
9708750ddebSJunchao Zhang     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
9718750ddebSJunchao Zhang   }
9728750ddebSJunchao Zhang   PetscFunctionReturn(0);
9738750ddebSJunchao Zhang }
9748750ddebSJunchao Zhang 
975b5a8e515SJed Brown static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
976b5a8e515SJed Brown   PetscInt i;
977b5a8e515SJed Brown   for (i=0; i<n; i++) {
978b5a8e515SJed Brown     if (needle == list[i]) return PETSC_TRUE;
979b5a8e515SJed Brown   }
980b5a8e515SJed Brown   return PETSC_FALSE;
981b5a8e515SJed Brown }
982b5a8e515SJed Brown 
98395fce210SBarry Smith /*@C
98421c688dcSJed Brown    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
98521c688dcSJed Brown 
98621c688dcSJed Brown    Collective
98721c688dcSJed Brown 
98821c688dcSJed Brown    Input Arguments:
989b5a8e515SJed Brown +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
990b5a8e515SJed Brown -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
99121c688dcSJed Brown 
99221c688dcSJed Brown    Level: developer
99321c688dcSJed Brown 
994dec1416fSJunchao Zhang .seealso: PetscSFGetRootRanks()
99521c688dcSJed Brown @*/
996b5a8e515SJed Brown PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
99721c688dcSJed Brown {
99821c688dcSJed Brown   PetscErrorCode     ierr;
99921c688dcSJed Brown   PetscTable         table;
100021c688dcSJed Brown   PetscTablePosition pos;
1001b5a8e515SJed Brown   PetscMPIInt        size,groupsize,*groupranks;
1002247e8311SStefano Zampini   PetscInt           *rcount,*ranks;
1003247e8311SStefano Zampini   PetscInt           i, irank = -1,orank = -1;
100421c688dcSJed Brown 
100521c688dcSJed Brown   PetscFunctionBegin;
100621c688dcSJed Brown   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
100729046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf,1);
100821c688dcSJed Brown   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr);
100921c688dcSJed Brown   ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr);
101021c688dcSJed Brown   for (i=0; i<sf->nleaves; i++) {
101121c688dcSJed Brown     /* Log 1-based rank */
101221c688dcSJed Brown     ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr);
101321c688dcSJed Brown   }
101421c688dcSJed Brown   ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr);
101521c688dcSJed Brown   ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr);
101621c688dcSJed Brown   ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr);
101721c688dcSJed Brown   ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr);
101821c688dcSJed Brown   for (i=0; i<sf->nranks; i++) {
101921c688dcSJed Brown     ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr);
102021c688dcSJed Brown     ranks[i]--;             /* Convert back to 0-based */
102121c688dcSJed Brown   }
102221c688dcSJed Brown   ierr = PetscTableDestroy(&table);CHKERRQ(ierr);
1023b5a8e515SJed Brown 
1024b5a8e515SJed Brown   /* We expect that dgroup is reliably "small" while nranks could be large */
1025b5a8e515SJed Brown   {
10267fb8a5e4SKarl Rupp     MPI_Group group = MPI_GROUP_NULL;
1027b5a8e515SJed Brown     PetscMPIInt *dgroupranks;
1028b5a8e515SJed Brown     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
1029b5a8e515SJed Brown     ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr);
1030b5a8e515SJed Brown     ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr);
1031b5a8e515SJed Brown     ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr);
1032b5a8e515SJed Brown     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1033f3fc9a17SJed Brown     if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);}
1034b5a8e515SJed Brown     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
1035b5a8e515SJed Brown     ierr = PetscFree(dgroupranks);CHKERRQ(ierr);
1036b5a8e515SJed Brown   }
1037b5a8e515SJed Brown 
1038b5a8e515SJed Brown   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1039b5a8e515SJed Brown   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1040b5a8e515SJed Brown     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1041b5a8e515SJed Brown       if (InList(ranks[i],groupsize,groupranks)) break;
1042b5a8e515SJed Brown     }
1043b5a8e515SJed Brown     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1044b5a8e515SJed Brown       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1045b5a8e515SJed Brown     }
1046b5a8e515SJed Brown     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1047b5a8e515SJed Brown       PetscInt tmprank,tmpcount;
1048247e8311SStefano Zampini 
1049b5a8e515SJed Brown       tmprank             = ranks[i];
1050b5a8e515SJed Brown       tmpcount            = rcount[i];
1051b5a8e515SJed Brown       ranks[i]            = ranks[sf->ndranks];
1052b5a8e515SJed Brown       rcount[i]           = rcount[sf->ndranks];
1053b5a8e515SJed Brown       ranks[sf->ndranks]  = tmprank;
1054b5a8e515SJed Brown       rcount[sf->ndranks] = tmpcount;
1055b5a8e515SJed Brown       sf->ndranks++;
1056b5a8e515SJed Brown     }
1057b5a8e515SJed Brown   }
1058b5a8e515SJed Brown   ierr = PetscFree(groupranks);CHKERRQ(ierr);
1059b5a8e515SJed Brown   ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr);
1060b5a8e515SJed Brown   ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr);
106121c688dcSJed Brown   sf->roffset[0] = 0;
106221c688dcSJed Brown   for (i=0; i<sf->nranks; i++) {
106321c688dcSJed Brown     ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr);
106421c688dcSJed Brown     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
106521c688dcSJed Brown     rcount[i]        = 0;
106621c688dcSJed Brown   }
1067247e8311SStefano Zampini   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1068247e8311SStefano Zampini     /* short circuit */
1069247e8311SStefano Zampini     if (orank != sf->remote[i].rank) {
107021c688dcSJed Brown       /* Search for index of iremote[i].rank in sf->ranks */
1071b5a8e515SJed Brown       ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr);
1072b5a8e515SJed Brown       if (irank < 0) {
1073b5a8e515SJed Brown         ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr);
1074b5a8e515SJed Brown         if (irank >= 0) irank += sf->ndranks;
107521c688dcSJed Brown       }
1076247e8311SStefano Zampini       orank = sf->remote[i].rank;
1077247e8311SStefano Zampini     }
1078b5a8e515SJed Brown     if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank);
107921c688dcSJed Brown     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
108021c688dcSJed Brown     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
108121c688dcSJed Brown     rcount[irank]++;
108221c688dcSJed Brown   }
108321c688dcSJed Brown   ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr);
108421c688dcSJed Brown   PetscFunctionReturn(0);
108521c688dcSJed Brown }
108621c688dcSJed Brown 
108721c688dcSJed Brown /*@C
108895fce210SBarry Smith    PetscSFGetGroups - gets incoming and outgoing process groups
108995fce210SBarry Smith 
109095fce210SBarry Smith    Collective
109195fce210SBarry Smith 
109295fce210SBarry Smith    Input Argument:
109395fce210SBarry Smith .  sf - star forest
109495fce210SBarry Smith 
109595fce210SBarry Smith    Output Arguments:
109695fce210SBarry Smith +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
109795fce210SBarry Smith -  outgoing - group of destination processes for outgoing edges (roots that I reference)
109895fce210SBarry Smith 
109995fce210SBarry Smith    Level: developer
110095fce210SBarry Smith 
110195fce210SBarry Smith .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
110295fce210SBarry Smith @*/
110395fce210SBarry Smith PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
110495fce210SBarry Smith {
110595fce210SBarry Smith   PetscErrorCode ierr;
11067fb8a5e4SKarl Rupp   MPI_Group      group = MPI_GROUP_NULL;
110795fce210SBarry Smith 
110895fce210SBarry Smith   PetscFunctionBegin;
110944ee17edSStefano Zampini   if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups");
111095fce210SBarry Smith   if (sf->ingroup == MPI_GROUP_NULL) {
111195fce210SBarry Smith     PetscInt       i;
111295fce210SBarry Smith     const PetscInt *indegree;
111395fce210SBarry Smith     PetscMPIInt    rank,*outranks,*inranks;
111495fce210SBarry Smith     PetscSFNode    *remote;
111595fce210SBarry Smith     PetscSF        bgcount;
111695fce210SBarry Smith 
111795fce210SBarry Smith     /* Compute the number of incoming ranks */
1118785e854fSJed Brown     ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr);
111995fce210SBarry Smith     for (i=0; i<sf->nranks; i++) {
112095fce210SBarry Smith       remote[i].rank  = sf->ranks[i];
112195fce210SBarry Smith       remote[i].index = 0;
112295fce210SBarry Smith     }
112395fce210SBarry Smith     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr);
112495fce210SBarry Smith     ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
112595fce210SBarry Smith     ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr);
112695fce210SBarry Smith     ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr);
112795fce210SBarry Smith     /* Enumerate the incoming ranks */
1128dcca6d9dSJed Brown     ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr);
112995fce210SBarry Smith     ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
113095fce210SBarry Smith     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
113195fce210SBarry Smith     ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
113295fce210SBarry Smith     ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr);
113395fce210SBarry Smith     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
113495fce210SBarry Smith     ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr);
113595fce210SBarry Smith     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
113695fce210SBarry Smith     ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr);
113795fce210SBarry Smith     ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr);
113895fce210SBarry Smith   }
113995fce210SBarry Smith   *incoming = sf->ingroup;
114095fce210SBarry Smith 
114195fce210SBarry Smith   if (sf->outgroup == MPI_GROUP_NULL) {
114295fce210SBarry Smith     ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr);
114395fce210SBarry Smith     ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr);
114495fce210SBarry Smith     ierr = MPI_Group_free(&group);CHKERRQ(ierr);
114595fce210SBarry Smith   }
114695fce210SBarry Smith   *outgoing = sf->outgroup;
114795fce210SBarry Smith   PetscFunctionReturn(0);
114895fce210SBarry Smith }
114995fce210SBarry Smith 
115029046d53SLisandro Dalcin /*@
115195fce210SBarry Smith    PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters
115295fce210SBarry Smith 
115395fce210SBarry Smith    Collective
115495fce210SBarry Smith 
115595fce210SBarry Smith    Input Argument:
115695fce210SBarry Smith .  sf - star forest that may contain roots with 0 or with more than 1 vertex
115795fce210SBarry Smith 
115895fce210SBarry Smith    Output Arguments:
115995fce210SBarry Smith .  multi - star forest with split roots, such that each root has degree exactly 1
116095fce210SBarry Smith 
116195fce210SBarry Smith    Level: developer
116295fce210SBarry Smith 
116395fce210SBarry Smith    Notes:
116495fce210SBarry Smith 
116595fce210SBarry Smith    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
116695fce210SBarry Smith    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
116795fce210SBarry Smith    edge, it is a candidate for future optimization that might involve its removal.
116895fce210SBarry Smith 
1169673100f5SVaclav Hapla .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
117095fce210SBarry Smith @*/
117195fce210SBarry Smith PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
117295fce210SBarry Smith {
117395fce210SBarry Smith   PetscErrorCode ierr;
117495fce210SBarry Smith 
117595fce210SBarry Smith   PetscFunctionBegin;
117695fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
117795fce210SBarry Smith   PetscValidPointer(multi,2);
117895fce210SBarry Smith   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
117995fce210SBarry Smith     ierr   = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
118095fce210SBarry Smith     *multi = sf->multi;
118195fce210SBarry Smith     PetscFunctionReturn(0);
118295fce210SBarry Smith   }
118395fce210SBarry Smith   if (!sf->multi) {
118495fce210SBarry Smith     const PetscInt *indegree;
11859837ea96SMatthew G. Knepley     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
118695fce210SBarry Smith     PetscSFNode    *remote;
118729046d53SLisandro Dalcin     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
118895fce210SBarry Smith     ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr);
118995fce210SBarry Smith     ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr);
11909837ea96SMatthew G. Knepley     ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr);
119195fce210SBarry Smith     inoffset[0] = 0;
119295fce210SBarry Smith     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
11939837ea96SMatthew G. Knepley     for (i=0; i<maxlocal; i++) outones[i] = 1;
1194dbd2ff41SBarry Smith     ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
1195dbd2ff41SBarry Smith     ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr);
119695fce210SBarry Smith     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
119776bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
119895fce210SBarry Smith       for (i=0; i<sf->nroots; i++) {
119995fce210SBarry Smith         if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp");
120095fce210SBarry Smith       }
120176bd3646SJed Brown     }
1202785e854fSJed Brown     ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr);
120395fce210SBarry Smith     for (i=0; i<sf->nleaves; i++) {
120495fce210SBarry Smith       remote[i].rank  = sf->remote[i].rank;
120538e7336fSToby Isaac       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
120695fce210SBarry Smith     }
120795fce210SBarry Smith     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr);
120801365b40SToby Isaac     ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr);
120995fce210SBarry Smith     if (sf->rankorder) {        /* Sort the ranks */
121095fce210SBarry Smith       PetscMPIInt rank;
121195fce210SBarry Smith       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
121295fce210SBarry Smith       PetscSFNode *newremote;
121395fce210SBarry Smith       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
121495fce210SBarry Smith       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
12159837ea96SMatthew G. Knepley       ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr);
12169837ea96SMatthew G. Knepley       for (i=0; i<maxlocal; i++) outranks[i] = rank;
12178bfbc91cSJed Brown       ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
12188bfbc91cSJed Brown       ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr);
121995fce210SBarry Smith       /* Sort the incoming ranks at each vertex, build the inverse map */
122095fce210SBarry Smith       for (i=0; i<sf->nroots; i++) {
122195fce210SBarry Smith         PetscInt j;
122295fce210SBarry Smith         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
122395fce210SBarry Smith         ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr);
122495fce210SBarry Smith         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
122595fce210SBarry Smith       }
122695fce210SBarry Smith       ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
122795fce210SBarry Smith       ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr);
1228785e854fSJed Brown       ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr);
122995fce210SBarry Smith       for (i=0; i<sf->nleaves; i++) {
123095fce210SBarry Smith         newremote[i].rank  = sf->remote[i].rank;
123101365b40SToby Isaac         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
123295fce210SBarry Smith       }
123301365b40SToby Isaac       ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
123495fce210SBarry Smith       ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr);
123595fce210SBarry Smith     }
123695fce210SBarry Smith     ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr);
123795fce210SBarry Smith   }
123895fce210SBarry Smith   *multi = sf->multi;
123995fce210SBarry Smith   PetscFunctionReturn(0);
124095fce210SBarry Smith }
124195fce210SBarry Smith 
124295fce210SBarry Smith /*@C
124395fce210SBarry Smith    PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices
124495fce210SBarry Smith 
124595fce210SBarry Smith    Collective
124695fce210SBarry Smith 
124795fce210SBarry Smith    Input Arguments:
124895fce210SBarry Smith +  sf - original star forest
1249ba2a7774SJunchao Zhang .  nselected  - number of selected roots on this process
1250ba2a7774SJunchao Zhang -  selected   - indices of the selected roots on this process
125195fce210SBarry Smith 
125295fce210SBarry Smith    Output Arguments:
1253cd620004SJunchao Zhang .  esf - new star forest
125495fce210SBarry Smith 
125595fce210SBarry Smith    Level: advanced
125695fce210SBarry Smith 
125795fce210SBarry Smith    Note:
125895fce210SBarry Smith    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
125995fce210SBarry Smith    be done by calling PetscSFGetGraph().
126095fce210SBarry Smith 
126195fce210SBarry Smith .seealso: PetscSFSetGraph(), PetscSFGetGraph()
126295fce210SBarry Smith @*/
1263cd620004SJunchao Zhang PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
126495fce210SBarry Smith {
1265cd620004SJunchao Zhang   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1266cd620004SJunchao Zhang   const PetscInt    *ilocal;
1267cd620004SJunchao Zhang   signed char       *rootdata,*leafdata,*leafmem;
1268ba2a7774SJunchao Zhang   const PetscSFNode *iremote;
1269f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1270f659e5c7SJunchao Zhang   MPI_Comm          comm;
12710511a646SMatthew G. Knepley   PetscErrorCode    ierr;
127295fce210SBarry Smith 
127395fce210SBarry Smith   PetscFunctionBegin;
127495fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
127529046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf,1);
1276ba2a7774SJunchao Zhang   if (nselected) PetscValidPointer(selected,3);
1277cd620004SJunchao Zhang   PetscValidPointer(esf,4);
12780511a646SMatthew G. Knepley 
1279f659e5c7SJunchao Zhang   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1280140a1472SStefano Zampini   ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
1281f659e5c7SJunchao Zhang   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1282cd620004SJunchao Zhang   ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
1283cd620004SJunchao Zhang 
128476bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1285cd620004SJunchao Zhang     PetscBool dups;
1286cd620004SJunchao Zhang     ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr);
1287cd620004SJunchao Zhang     if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups");
1288cd620004SJunchao Zhang     for (i=0; i<nselected; i++)
1289cd620004SJunchao Zhang       if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots);
1290cd620004SJunchao Zhang   }
1291f659e5c7SJunchao Zhang 
1292f659e5c7SJunchao Zhang   if (sf->ops->CreateEmbeddedSF) {
1293cd620004SJunchao Zhang     ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr);
1294f659e5c7SJunchao Zhang   } else {
1295cd620004SJunchao Zhang     /* A generic version of creating embedded sf */
1296cd620004SJunchao Zhang     ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr);
1297cd620004SJunchao Zhang     maxlocal = maxleaf - minleaf + 1;
1298cd620004SJunchao Zhang     ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr);
1299cd620004SJunchao Zhang     leafdata = leafmem - minleaf;
1300cd620004SJunchao Zhang     /* Tag selected roots and bcast to leaves */
1301cd620004SJunchao Zhang     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1302cd620004SJunchao Zhang     ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1303cd620004SJunchao Zhang     ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr);
1304ba2a7774SJunchao Zhang 
1305cd620004SJunchao Zhang     /* Build esf with leaves that are still connected */
1306cd620004SJunchao Zhang     esf_nleaves = 0;
1307cd620004SJunchao Zhang     for (i=0; i<nleaves; i++) {
1308cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1309cd620004SJunchao Zhang       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1310cd620004SJunchao Zhang          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1311cd620004SJunchao Zhang       */
1312cd620004SJunchao Zhang       esf_nleaves += (leafdata[j] ? 1 : 0);
1313cd620004SJunchao Zhang     }
1314cd620004SJunchao Zhang     ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr);
1315cd620004SJunchao Zhang     ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr);
1316cd620004SJunchao Zhang     for (i=n=0; i<nleaves; i++) {
1317cd620004SJunchao Zhang       j = ilocal ? ilocal[i] : i;
1318cd620004SJunchao Zhang       if (leafdata[j]) {
1319cd620004SJunchao Zhang         new_ilocal[n]        = j;
1320cd620004SJunchao Zhang         new_iremote[n].rank  = iremote[i].rank;
1321cd620004SJunchao Zhang         new_iremote[n].index = iremote[i].index;
1322fc1ede2bSMatthew G. Knepley         ++n;
132395fce210SBarry Smith       }
132495fce210SBarry Smith     }
1325cd620004SJunchao Zhang     ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr);
1326cd620004SJunchao Zhang     ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr);
1327cd620004SJunchao Zhang     ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1328cd620004SJunchao Zhang     ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr);
1329f659e5c7SJunchao Zhang   }
1330140a1472SStefano Zampini   ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr);
133195fce210SBarry Smith   PetscFunctionReturn(0);
133295fce210SBarry Smith }
133395fce210SBarry Smith 
13342f5fb4c2SMatthew G. Knepley /*@C
13352f5fb4c2SMatthew G. Knepley   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
13362f5fb4c2SMatthew G. Knepley 
13372f5fb4c2SMatthew G. Knepley   Collective
13382f5fb4c2SMatthew G. Knepley 
13392f5fb4c2SMatthew G. Knepley   Input Arguments:
13402f5fb4c2SMatthew G. Knepley + sf - original star forest
1341f659e5c7SJunchao Zhang . nselected  - number of selected leaves on this process
1342f659e5c7SJunchao Zhang - selected   - indices of the selected leaves on this process
13432f5fb4c2SMatthew G. Knepley 
13442f5fb4c2SMatthew G. Knepley   Output Arguments:
13452f5fb4c2SMatthew G. Knepley .  newsf - new star forest
13462f5fb4c2SMatthew G. Knepley 
13472f5fb4c2SMatthew G. Knepley   Level: advanced
13482f5fb4c2SMatthew G. Knepley 
13492f5fb4c2SMatthew G. Knepley .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph()
13502f5fb4c2SMatthew G. Knepley @*/
1351f659e5c7SJunchao Zhang PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
13522f5fb4c2SMatthew G. Knepley {
1353f659e5c7SJunchao Zhang   const PetscSFNode *iremote;
1354f659e5c7SJunchao Zhang   PetscSFNode       *new_iremote;
1355f659e5c7SJunchao Zhang   const PetscInt    *ilocal;
1356f659e5c7SJunchao Zhang   PetscInt          i,nroots,*leaves,*new_ilocal;
1357f659e5c7SJunchao Zhang   MPI_Comm          comm;
13582f5fb4c2SMatthew G. Knepley   PetscErrorCode    ierr;
13592f5fb4c2SMatthew G. Knepley 
13602f5fb4c2SMatthew G. Knepley   PetscFunctionBegin;
13612f5fb4c2SMatthew G. Knepley   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
136229046d53SLisandro Dalcin   PetscSFCheckGraphSet(sf,1);
1363f659e5c7SJunchao Zhang   if (nselected) PetscValidPointer(selected,3);
13642f5fb4c2SMatthew G. Knepley   PetscValidPointer(newsf,4);
13652f5fb4c2SMatthew G. Knepley 
1366f659e5c7SJunchao Zhang   /* Uniq selected[] and put results in leaves[] */
1367f659e5c7SJunchao Zhang   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
1368f659e5c7SJunchao Zhang   ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr);
1369dd5b3ca6SJunchao Zhang   ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr);
1370f659e5c7SJunchao Zhang   ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr);
1371f659e5c7SJunchao Zhang   if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves);
1372f659e5c7SJunchao Zhang 
1373f659e5c7SJunchao Zhang   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1374f659e5c7SJunchao Zhang   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1375f659e5c7SJunchao Zhang     ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr);
1376f659e5c7SJunchao Zhang   } else {
1377f659e5c7SJunchao Zhang     ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr);
1378f659e5c7SJunchao Zhang     ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr);
1379f659e5c7SJunchao Zhang     ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr);
1380f659e5c7SJunchao Zhang     for (i=0; i<nselected; ++i) {
1381f659e5c7SJunchao Zhang       const PetscInt l     = leaves[i];
1382f659e5c7SJunchao Zhang       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1383f659e5c7SJunchao Zhang       new_iremote[i].rank  = iremote[l].rank;
1384f659e5c7SJunchao Zhang       new_iremote[i].index = iremote[l].index;
13852f5fb4c2SMatthew G. Knepley     }
13864cc19a0cSStefano Zampini     ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr);
1387f659e5c7SJunchao Zhang     ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
1388f659e5c7SJunchao Zhang   }
1389f659e5c7SJunchao Zhang   ierr = PetscFree(leaves);CHKERRQ(ierr);
13902f5fb4c2SMatthew G. Knepley   PetscFunctionReturn(0);
13912f5fb4c2SMatthew G. Knepley }
13922f5fb4c2SMatthew G. Knepley 
139395fce210SBarry Smith /*@C
13943482bfa8SJunchao Zhang    PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd()
13953482bfa8SJunchao Zhang 
13963482bfa8SJunchao Zhang    Collective on PetscSF
13973482bfa8SJunchao Zhang 
13983482bfa8SJunchao Zhang    Input Arguments:
13993482bfa8SJunchao Zhang +  sf - star forest on which to communicate
14003482bfa8SJunchao Zhang .  unit - data type associated with each node
14013482bfa8SJunchao Zhang .  rootdata - buffer to broadcast
14023482bfa8SJunchao Zhang -  op - operation to use for reduction
14033482bfa8SJunchao Zhang 
14043482bfa8SJunchao Zhang    Output Arguments:
14053482bfa8SJunchao Zhang .  leafdata - buffer to be reduced with values from each leaf's respective root
14063482bfa8SJunchao Zhang 
14073482bfa8SJunchao Zhang    Level: intermediate
14083482bfa8SJunchao Zhang 
1409*d0295fc0SJunchao Zhang    Notes:
1410*d0295fc0SJunchao Zhang     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1411*d0295fc0SJunchao Zhang     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1412*d0295fc0SJunchao Zhang     use PetscSFBcastAndOpWithMemTypeBegin() instead.
1413*d0295fc0SJunchao Zhang .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin()
14143482bfa8SJunchao Zhang @*/
14153482bfa8SJunchao Zhang PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
14163482bfa8SJunchao Zhang {
14173482bfa8SJunchao Zhang   PetscErrorCode ierr;
1418eb02082bSJunchao Zhang   PetscMemType   rootmtype,leafmtype;
14193482bfa8SJunchao Zhang 
14203482bfa8SJunchao Zhang   PetscFunctionBegin;
14213482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
14223482bfa8SJunchao Zhang   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
14233482bfa8SJunchao Zhang   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1424eb02082bSJunchao Zhang   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1425eb02082bSJunchao Zhang   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1426eb02082bSJunchao Zhang   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
14273482bfa8SJunchao Zhang   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
14283482bfa8SJunchao Zhang   PetscFunctionReturn(0);
14293482bfa8SJunchao Zhang }
14303482bfa8SJunchao Zhang 
14313482bfa8SJunchao Zhang /*@C
1432*d0295fc0SJunchao Zhang    PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd()
1433*d0295fc0SJunchao Zhang 
1434*d0295fc0SJunchao Zhang    Collective on PetscSF
1435*d0295fc0SJunchao Zhang 
1436*d0295fc0SJunchao Zhang    Input Arguments:
1437*d0295fc0SJunchao Zhang +  sf - star forest on which to communicate
1438*d0295fc0SJunchao Zhang .  unit - data type associated with each node
1439*d0295fc0SJunchao Zhang .  rootmtype - memory type of rootdata
1440*d0295fc0SJunchao Zhang .  rootdata - buffer to broadcast
1441*d0295fc0SJunchao Zhang .  leafmtype - memory type of leafdata
1442*d0295fc0SJunchao Zhang -  op - operation to use for reduction
1443*d0295fc0SJunchao Zhang 
1444*d0295fc0SJunchao Zhang    Output Arguments:
1445*d0295fc0SJunchao Zhang .  leafdata - buffer to be reduced with values from each leaf's respective root
1446*d0295fc0SJunchao Zhang 
1447*d0295fc0SJunchao Zhang    Level: intermediate
1448*d0295fc0SJunchao Zhang 
1449*d0295fc0SJunchao Zhang .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin()
1450*d0295fc0SJunchao Zhang @*/
1451*d0295fc0SJunchao Zhang PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1452*d0295fc0SJunchao Zhang {
1453*d0295fc0SJunchao Zhang   PetscErrorCode ierr;
1454*d0295fc0SJunchao Zhang 
1455*d0295fc0SJunchao Zhang   PetscFunctionBegin;
1456*d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1457*d0295fc0SJunchao Zhang   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1458*d0295fc0SJunchao Zhang   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1459*d0295fc0SJunchao Zhang   ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr);
1460*d0295fc0SJunchao Zhang   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1461*d0295fc0SJunchao Zhang   PetscFunctionReturn(0);
1462*d0295fc0SJunchao Zhang }
1463*d0295fc0SJunchao Zhang 
1464*d0295fc0SJunchao Zhang /*@C
14653482bfa8SJunchao Zhang    PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin()
14663482bfa8SJunchao Zhang 
14673482bfa8SJunchao Zhang    Collective
14683482bfa8SJunchao Zhang 
14693482bfa8SJunchao Zhang    Input Arguments:
14703482bfa8SJunchao Zhang +  sf - star forest
14713482bfa8SJunchao Zhang .  unit - data type
14723482bfa8SJunchao Zhang .  rootdata - buffer to broadcast
14733482bfa8SJunchao Zhang -  op - operation to use for reduction
14743482bfa8SJunchao Zhang 
14753482bfa8SJunchao Zhang    Output Arguments:
14763482bfa8SJunchao Zhang .  leafdata - buffer to be reduced with values from each leaf's respective root
14773482bfa8SJunchao Zhang 
14783482bfa8SJunchao Zhang    Level: intermediate
14793482bfa8SJunchao Zhang 
14803482bfa8SJunchao Zhang .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
14813482bfa8SJunchao Zhang @*/
14823482bfa8SJunchao Zhang PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
14833482bfa8SJunchao Zhang {
14843482bfa8SJunchao Zhang   PetscErrorCode ierr;
14853482bfa8SJunchao Zhang 
14863482bfa8SJunchao Zhang   PetscFunctionBegin;
14873482bfa8SJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
14883482bfa8SJunchao Zhang   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
148900816365SJunchao Zhang   ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr);
14903482bfa8SJunchao Zhang   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
14913482bfa8SJunchao Zhang   PetscFunctionReturn(0);
14923482bfa8SJunchao Zhang }
14933482bfa8SJunchao Zhang 
14943482bfa8SJunchao Zhang /*@C
149595fce210SBarry Smith    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
149695fce210SBarry Smith 
149795fce210SBarry Smith    Collective
149895fce210SBarry Smith 
149995fce210SBarry Smith    Input Arguments:
150095fce210SBarry Smith +  sf - star forest
150195fce210SBarry Smith .  unit - data type
150295fce210SBarry Smith .  leafdata - values to reduce
150395fce210SBarry Smith -  op - reduction operation
150495fce210SBarry Smith 
150595fce210SBarry Smith    Output Arguments:
150695fce210SBarry Smith .  rootdata - result of reduction of values from all leaves of each root
150795fce210SBarry Smith 
150895fce210SBarry Smith    Level: intermediate
150995fce210SBarry Smith 
1510*d0295fc0SJunchao Zhang    Notes:
1511*d0295fc0SJunchao Zhang     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1512*d0295fc0SJunchao Zhang     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1513*d0295fc0SJunchao Zhang     use PetscSFReduceWithMemTypeBegin() instead.
1514*d0295fc0SJunchao Zhang 
1515*d0295fc0SJunchao Zhang .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
151695fce210SBarry Smith @*/
151795fce210SBarry Smith PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
151895fce210SBarry Smith {
151995fce210SBarry Smith   PetscErrorCode ierr;
1520eb02082bSJunchao Zhang   PetscMemType   rootmtype,leafmtype;
152195fce210SBarry Smith 
152295fce210SBarry Smith   PetscFunctionBegin;
152395fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
152495fce210SBarry Smith   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
152529046d53SLisandro Dalcin   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1526eb02082bSJunchao Zhang   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1527eb02082bSJunchao Zhang   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1528eb02082bSJunchao Zhang   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1529563ffbabSMatthew G. Knepley   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
153095fce210SBarry Smith   PetscFunctionReturn(0);
153195fce210SBarry Smith }
153295fce210SBarry Smith 
153395fce210SBarry Smith /*@C
1534*d0295fc0SJunchao Zhang    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1535*d0295fc0SJunchao Zhang 
1536*d0295fc0SJunchao Zhang    Collective
1537*d0295fc0SJunchao Zhang 
1538*d0295fc0SJunchao Zhang    Input Arguments:
1539*d0295fc0SJunchao Zhang +  sf - star forest
1540*d0295fc0SJunchao Zhang .  unit - data type
1541*d0295fc0SJunchao Zhang .  leafmtype - memory type of leafdata
1542*d0295fc0SJunchao Zhang .  leafdata - values to reduce
1543*d0295fc0SJunchao Zhang .  rootmtype - memory type of rootdata
1544*d0295fc0SJunchao Zhang -  op - reduction operation
1545*d0295fc0SJunchao Zhang 
1546*d0295fc0SJunchao Zhang    Output Arguments:
1547*d0295fc0SJunchao Zhang .  rootdata - result of reduction of values from all leaves of each root
1548*d0295fc0SJunchao Zhang 
1549*d0295fc0SJunchao Zhang    Level: intermediate
1550*d0295fc0SJunchao Zhang 
1551*d0295fc0SJunchao Zhang .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1552*d0295fc0SJunchao Zhang @*/
1553*d0295fc0SJunchao Zhang PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1554*d0295fc0SJunchao Zhang {
1555*d0295fc0SJunchao Zhang   PetscErrorCode ierr;
1556*d0295fc0SJunchao Zhang 
1557*d0295fc0SJunchao Zhang   PetscFunctionBegin;
1558*d0295fc0SJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1559*d0295fc0SJunchao Zhang   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1560*d0295fc0SJunchao Zhang   ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1561*d0295fc0SJunchao Zhang   ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr);
1562*d0295fc0SJunchao Zhang   ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr);
1563*d0295fc0SJunchao Zhang   PetscFunctionReturn(0);
1564*d0295fc0SJunchao Zhang }
1565*d0295fc0SJunchao Zhang 
1566*d0295fc0SJunchao Zhang /*@C
156795fce210SBarry Smith    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
156895fce210SBarry Smith 
156995fce210SBarry Smith    Collective
157095fce210SBarry Smith 
157195fce210SBarry Smith    Input Arguments:
157295fce210SBarry Smith +  sf - star forest
157395fce210SBarry Smith .  unit - data type
157495fce210SBarry Smith .  leafdata - values to reduce
157595fce210SBarry Smith -  op - reduction operation
157695fce210SBarry Smith 
157795fce210SBarry Smith    Output Arguments:
157895fce210SBarry Smith .  rootdata - result of reduction of values from all leaves of each root
157995fce210SBarry Smith 
158095fce210SBarry Smith    Level: intermediate
158195fce210SBarry Smith 
158295fce210SBarry Smith .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
158395fce210SBarry Smith @*/
158495fce210SBarry Smith PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
158595fce210SBarry Smith {
158695fce210SBarry Smith   PetscErrorCode ierr;
158795fce210SBarry Smith 
158895fce210SBarry Smith   PetscFunctionBegin;
158995fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
159029046d53SLisandro Dalcin   ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
159100816365SJunchao Zhang   ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr);
1592563ffbabSMatthew G. Knepley   ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr);
159395fce210SBarry Smith   PetscFunctionReturn(0);
159495fce210SBarry Smith }
159595fce210SBarry Smith 
159695fce210SBarry Smith /*@C
1597a1729e3fSJunchao Zhang    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1598a1729e3fSJunchao Zhang 
1599a1729e3fSJunchao Zhang    Collective
1600a1729e3fSJunchao Zhang 
1601a1729e3fSJunchao Zhang    Input Arguments:
1602a1729e3fSJunchao Zhang +  sf - star forest
1603a1729e3fSJunchao Zhang .  unit - data type
1604a1729e3fSJunchao Zhang .  leafdata - leaf values to use in reduction
1605a1729e3fSJunchao Zhang -  op - operation to use for reduction
1606a1729e3fSJunchao Zhang 
1607a1729e3fSJunchao Zhang    Output Arguments:
1608a1729e3fSJunchao Zhang +  rootdata - root values to be updated, input state is seen by first process to perform an update
1609a1729e3fSJunchao Zhang -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1610a1729e3fSJunchao Zhang 
1611a1729e3fSJunchao Zhang    Level: advanced
1612a1729e3fSJunchao Zhang 
1613a1729e3fSJunchao Zhang    Note:
1614a1729e3fSJunchao Zhang    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1615a1729e3fSJunchao Zhang    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1616a1729e3fSJunchao Zhang    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1617a1729e3fSJunchao Zhang    integers.
1618a1729e3fSJunchao Zhang 
1619a1729e3fSJunchao Zhang .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1620a1729e3fSJunchao Zhang @*/
1621a1729e3fSJunchao Zhang PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1622a1729e3fSJunchao Zhang {
1623a1729e3fSJunchao Zhang   PetscErrorCode ierr;
1624eb02082bSJunchao Zhang   PetscMemType   rootmtype,leafmtype,leafupdatemtype;
1625a1729e3fSJunchao Zhang 
1626a1729e3fSJunchao Zhang   PetscFunctionBegin;
1627a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1628a1729e3fSJunchao Zhang   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
1629a1729e3fSJunchao Zhang   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1630eb02082bSJunchao Zhang   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
1631eb02082bSJunchao Zhang   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
1632eb02082bSJunchao Zhang   ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr);
1633eb02082bSJunchao Zhang   if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types");
1634eb02082bSJunchao Zhang   ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr);
1635a1729e3fSJunchao Zhang   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
1636a1729e3fSJunchao Zhang   PetscFunctionReturn(0);
1637a1729e3fSJunchao Zhang }
1638a1729e3fSJunchao Zhang 
1639a1729e3fSJunchao Zhang /*@C
1640a1729e3fSJunchao Zhang    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1641a1729e3fSJunchao Zhang 
1642a1729e3fSJunchao Zhang    Collective
1643a1729e3fSJunchao Zhang 
1644a1729e3fSJunchao Zhang    Input Arguments:
1645a1729e3fSJunchao Zhang +  sf - star forest
1646a1729e3fSJunchao Zhang .  unit - data type
1647a1729e3fSJunchao Zhang .  leafdata - leaf values to use in reduction
1648a1729e3fSJunchao Zhang -  op - operation to use for reduction
1649a1729e3fSJunchao Zhang 
1650a1729e3fSJunchao Zhang    Output Arguments:
1651a1729e3fSJunchao Zhang +  rootdata - root values to be updated, input state is seen by first process to perform an update
1652a1729e3fSJunchao Zhang -  leafupdate - state at each leaf's respective root immediately prior to my atomic update
1653a1729e3fSJunchao Zhang 
1654a1729e3fSJunchao Zhang    Level: advanced
1655a1729e3fSJunchao Zhang 
1656a1729e3fSJunchao Zhang .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1657a1729e3fSJunchao Zhang @*/
1658a1729e3fSJunchao Zhang PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1659a1729e3fSJunchao Zhang {
1660a1729e3fSJunchao Zhang   PetscErrorCode ierr;
1661a1729e3fSJunchao Zhang 
1662a1729e3fSJunchao Zhang   PetscFunctionBegin;
1663a1729e3fSJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1664a1729e3fSJunchao Zhang   ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
166500816365SJunchao Zhang   ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr);
1666a1729e3fSJunchao Zhang   ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr);
1667a1729e3fSJunchao Zhang   PetscFunctionReturn(0);
1668a1729e3fSJunchao Zhang }
1669a1729e3fSJunchao Zhang 
1670a1729e3fSJunchao Zhang /*@C
167195fce210SBarry Smith    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
167295fce210SBarry Smith 
167395fce210SBarry Smith    Collective
167495fce210SBarry Smith 
167595fce210SBarry Smith    Input Arguments:
167695fce210SBarry Smith .  sf - star forest
167795fce210SBarry Smith 
167895fce210SBarry Smith    Output Arguments:
167995fce210SBarry Smith .  degree - degree of each root vertex
168095fce210SBarry Smith 
168195fce210SBarry Smith    Level: advanced
168295fce210SBarry Smith 
1683ffe67aa5SVáclav Hapla    Notes:
1684ffe67aa5SVáclav Hapla    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1685ffe67aa5SVáclav Hapla 
168695fce210SBarry Smith .seealso: PetscSFGatherBegin()
168795fce210SBarry Smith @*/
168895fce210SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
168995fce210SBarry Smith {
169095fce210SBarry Smith   PetscErrorCode ierr;
169195fce210SBarry Smith 
169295fce210SBarry Smith   PetscFunctionBegin;
169395fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
169495fce210SBarry Smith   PetscSFCheckGraphSet(sf,1);
169595fce210SBarry Smith   PetscValidPointer(degree,2);
1696803bd9e8SMatthew G. Knepley   if (!sf->degreeknown) {
16975b0d146aSStefano Zampini     PetscInt i, nroots = sf->nroots, maxlocal;
1698803bd9e8SMatthew G. Knepley     if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested.");
16995b0d146aSStefano Zampini     maxlocal = sf->maxleaf-sf->minleaf+1;
170029046d53SLisandro Dalcin     ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr);
170129046d53SLisandro Dalcin     ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
170229046d53SLisandro Dalcin     for (i=0; i<nroots; i++) sf->degree[i] = 0;
17039837ea96SMatthew G. Knepley     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
17045b0d146aSStefano Zampini     ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
170595fce210SBarry Smith   }
170695fce210SBarry Smith   *degree = NULL;
170795fce210SBarry Smith   PetscFunctionReturn(0);
170895fce210SBarry Smith }
170995fce210SBarry Smith 
171095fce210SBarry Smith /*@C
171195fce210SBarry Smith    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
171295fce210SBarry Smith 
171395fce210SBarry Smith    Collective
171495fce210SBarry Smith 
171595fce210SBarry Smith    Input Arguments:
171695fce210SBarry Smith .  sf - star forest
171795fce210SBarry Smith 
171895fce210SBarry Smith    Output Arguments:
171995fce210SBarry Smith .  degree - degree of each root vertex
172095fce210SBarry Smith 
172195fce210SBarry Smith    Level: developer
172295fce210SBarry Smith 
1723ffe67aa5SVáclav Hapla    Notes:
1724ffe67aa5SVáclav Hapla    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1725ffe67aa5SVáclav Hapla 
172695fce210SBarry Smith .seealso:
172795fce210SBarry Smith @*/
172895fce210SBarry Smith PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
172995fce210SBarry Smith {
173095fce210SBarry Smith   PetscErrorCode ierr;
173195fce210SBarry Smith 
173295fce210SBarry Smith   PetscFunctionBegin;
173395fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
173495fce210SBarry Smith   PetscSFCheckGraphSet(sf,1);
173529046d53SLisandro Dalcin   PetscValidPointer(degree,2);
173695fce210SBarry Smith   if (!sf->degreeknown) {
173729046d53SLisandro Dalcin     if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()");
17385b0d146aSStefano Zampini     ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr);
173995fce210SBarry Smith     ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr);
174095fce210SBarry Smith     sf->degreeknown = PETSC_TRUE;
174195fce210SBarry Smith   }
174295fce210SBarry Smith   *degree = sf->degree;
174395fce210SBarry Smith   PetscFunctionReturn(0);
174495fce210SBarry Smith }
174595fce210SBarry Smith 
1746673100f5SVaclav Hapla 
1747673100f5SVaclav Hapla /*@C
174866dfcd1aSVaclav Hapla    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
174966dfcd1aSVaclav Hapla    Each multi-root is assigned index of the corresponding original root.
1750673100f5SVaclav Hapla 
1751673100f5SVaclav Hapla    Collective
1752673100f5SVaclav Hapla 
1753673100f5SVaclav Hapla    Input Arguments:
1754673100f5SVaclav Hapla +  sf - star forest
1755673100f5SVaclav Hapla -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1756673100f5SVaclav Hapla 
1757673100f5SVaclav Hapla    Output Arguments:
175866dfcd1aSVaclav Hapla +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
175966dfcd1aSVaclav Hapla -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1760673100f5SVaclav Hapla 
1761673100f5SVaclav Hapla    Level: developer
1762673100f5SVaclav Hapla 
1763ffe67aa5SVáclav Hapla    Notes:
1764ffe67aa5SVáclav Hapla    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1765ffe67aa5SVáclav Hapla 
1766673100f5SVaclav Hapla .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1767673100f5SVaclav Hapla @*/
176866dfcd1aSVaclav Hapla PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1769673100f5SVaclav Hapla {
1770673100f5SVaclav Hapla   PetscSF             msf;
1771673100f5SVaclav Hapla   PetscInt            i, j, k, nroots, nmroots;
1772673100f5SVaclav Hapla   PetscErrorCode      ierr;
1773673100f5SVaclav Hapla 
1774673100f5SVaclav Hapla   PetscFunctionBegin;
1775673100f5SVaclav Hapla   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
1776673100f5SVaclav Hapla   ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
177729328920SVaclav Hapla   if (nroots) PetscValidIntPointer(degree,2);
177866dfcd1aSVaclav Hapla   if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3);
177966dfcd1aSVaclav Hapla   PetscValidPointer(multiRootsOrigNumbering,4);
1780673100f5SVaclav Hapla   ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr);
1781673100f5SVaclav Hapla   ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr);
178266dfcd1aSVaclav Hapla   ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr);
1783673100f5SVaclav Hapla   for (i=0,j=0,k=0; i<nroots; i++) {
1784673100f5SVaclav Hapla     if (!degree[i]) continue;
1785673100f5SVaclav Hapla     for (j=0; j<degree[i]; j++,k++) {
178666dfcd1aSVaclav Hapla       (*multiRootsOrigNumbering)[k] = i;
1787673100f5SVaclav Hapla     }
1788673100f5SVaclav Hapla   }
1789673100f5SVaclav Hapla   if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail");
179066dfcd1aSVaclav Hapla   if (nMultiRoots) *nMultiRoots = nmroots;
1791673100f5SVaclav Hapla   PetscFunctionReturn(0);
1792673100f5SVaclav Hapla }
1793673100f5SVaclav Hapla 
179495fce210SBarry Smith /*@C
179595fce210SBarry Smith    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
179695fce210SBarry Smith 
179795fce210SBarry Smith    Collective
179895fce210SBarry Smith 
179995fce210SBarry Smith    Input Arguments:
180095fce210SBarry Smith +  sf - star forest
180195fce210SBarry Smith .  unit - data type
180295fce210SBarry Smith -  leafdata - leaf data to gather to roots
180395fce210SBarry Smith 
180495fce210SBarry Smith    Output Argument:
180595fce210SBarry Smith .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
180695fce210SBarry Smith 
180795fce210SBarry Smith    Level: intermediate
180895fce210SBarry Smith 
180995fce210SBarry Smith .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
181095fce210SBarry Smith @*/
181195fce210SBarry Smith PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
181295fce210SBarry Smith {
181395fce210SBarry Smith   PetscErrorCode ierr;
1814a5526d50SJunchao Zhang   PetscSF        multi = NULL;
181595fce210SBarry Smith 
181695fce210SBarry Smith   PetscFunctionBegin;
181795fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
181829046d53SLisandro Dalcin   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
181995fce210SBarry Smith   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
18208bfbc91cSJed Brown   ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
182195fce210SBarry Smith   PetscFunctionReturn(0);
182295fce210SBarry Smith }
182395fce210SBarry Smith 
182495fce210SBarry Smith /*@C
182595fce210SBarry Smith    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
182695fce210SBarry Smith 
182795fce210SBarry Smith    Collective
182895fce210SBarry Smith 
182995fce210SBarry Smith    Input Arguments:
183095fce210SBarry Smith +  sf - star forest
183195fce210SBarry Smith .  unit - data type
183295fce210SBarry Smith -  leafdata - leaf data to gather to roots
183395fce210SBarry Smith 
183495fce210SBarry Smith    Output Argument:
183595fce210SBarry Smith .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree
183695fce210SBarry Smith 
183795fce210SBarry Smith    Level: intermediate
183895fce210SBarry Smith 
183995fce210SBarry Smith .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
184095fce210SBarry Smith @*/
184195fce210SBarry Smith PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
184295fce210SBarry Smith {
184395fce210SBarry Smith   PetscErrorCode ierr;
1844a5526d50SJunchao Zhang   PetscSF        multi = NULL;
184595fce210SBarry Smith 
184695fce210SBarry Smith   PetscFunctionBegin;
184795fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
184895fce210SBarry Smith   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
18498bfbc91cSJed Brown   ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr);
185095fce210SBarry Smith   PetscFunctionReturn(0);
185195fce210SBarry Smith }
185295fce210SBarry Smith 
185395fce210SBarry Smith /*@C
185495fce210SBarry Smith    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
185595fce210SBarry Smith 
185695fce210SBarry Smith    Collective
185795fce210SBarry Smith 
185895fce210SBarry Smith    Input Arguments:
185995fce210SBarry Smith +  sf - star forest
186095fce210SBarry Smith .  unit - data type
186195fce210SBarry Smith -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
186295fce210SBarry Smith 
186395fce210SBarry Smith    Output Argument:
186495fce210SBarry Smith .  leafdata - leaf data to be update with personal data from each respective root
186595fce210SBarry Smith 
186695fce210SBarry Smith    Level: intermediate
186795fce210SBarry Smith 
186895fce210SBarry Smith .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
186995fce210SBarry Smith @*/
187095fce210SBarry Smith PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
187195fce210SBarry Smith {
187295fce210SBarry Smith   PetscErrorCode ierr;
1873a5526d50SJunchao Zhang   PetscSF        multi = NULL;
187495fce210SBarry Smith 
187595fce210SBarry Smith   PetscFunctionBegin;
187695fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
187795fce210SBarry Smith   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
187895fce210SBarry Smith   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
187995fce210SBarry Smith   ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
188095fce210SBarry Smith   PetscFunctionReturn(0);
188195fce210SBarry Smith }
188295fce210SBarry Smith 
188395fce210SBarry Smith /*@C
188495fce210SBarry Smith    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
188595fce210SBarry Smith 
188695fce210SBarry Smith    Collective
188795fce210SBarry Smith 
188895fce210SBarry Smith    Input Arguments:
188995fce210SBarry Smith +  sf - star forest
189095fce210SBarry Smith .  unit - data type
189195fce210SBarry Smith -  multirootdata - root buffer to send to each leaf, one unit of data per leaf
189295fce210SBarry Smith 
189395fce210SBarry Smith    Output Argument:
189495fce210SBarry Smith .  leafdata - leaf data to be update with personal data from each respective root
189595fce210SBarry Smith 
189695fce210SBarry Smith    Level: intermediate
189795fce210SBarry Smith 
189895fce210SBarry Smith .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
189995fce210SBarry Smith @*/
190095fce210SBarry Smith PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
190195fce210SBarry Smith {
190295fce210SBarry Smith   PetscErrorCode ierr;
1903a5526d50SJunchao Zhang   PetscSF        multi = NULL;
190495fce210SBarry Smith 
190595fce210SBarry Smith   PetscFunctionBegin;
190695fce210SBarry Smith   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
190795fce210SBarry Smith   ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr);
190895fce210SBarry Smith   ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr);
190995fce210SBarry Smith   PetscFunctionReturn(0);
191095fce210SBarry Smith }
1911a7b3aa13SAta Mesgarnejad 
1912a072220fSLawrence Mitchell static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1913a072220fSLawrence Mitchell {
1914a072220fSLawrence Mitchell   PetscInt        i, n, nleaves;
1915a072220fSLawrence Mitchell   const PetscInt *ilocal = NULL;
1916a072220fSLawrence Mitchell   PetscHSetI      seen;
1917a072220fSLawrence Mitchell   PetscErrorCode  ierr;
1918a072220fSLawrence Mitchell 
1919a072220fSLawrence Mitchell   PetscFunctionBegin;
192076bd3646SJed Brown   if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0);
1921a072220fSLawrence Mitchell   ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr);
1922a072220fSLawrence Mitchell   ierr = PetscHSetICreate(&seen);CHKERRQ(ierr);
1923a072220fSLawrence Mitchell   for (i = 0; i < nleaves; i++) {
1924a072220fSLawrence Mitchell     const PetscInt leaf = ilocal ? ilocal[i] : i;
1925a072220fSLawrence Mitchell     ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr);
1926a072220fSLawrence Mitchell   }
1927a072220fSLawrence Mitchell   ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr);
1928a072220fSLawrence Mitchell   if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique");
1929a072220fSLawrence Mitchell   ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr);
1930a072220fSLawrence Mitchell   PetscFunctionReturn(0);
1931a072220fSLawrence Mitchell }
193254729392SStefano Zampini 
1933a7b3aa13SAta Mesgarnejad /*@
193404c0ada0SJunchao Zhang   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1935a7b3aa13SAta Mesgarnejad 
1936a7b3aa13SAta Mesgarnejad   Input Parameters:
193754729392SStefano Zampini + sfA - The first PetscSF
193854729392SStefano Zampini - sfB - The second PetscSF
1939a7b3aa13SAta Mesgarnejad 
1940a7b3aa13SAta Mesgarnejad   Output Parameters:
194154729392SStefano Zampini . sfBA - The composite SF
1942a7b3aa13SAta Mesgarnejad 
1943a7b3aa13SAta Mesgarnejad   Level: developer
1944a7b3aa13SAta Mesgarnejad 
1945a072220fSLawrence Mitchell   Notes:
194654729392SStefano Zampini   Currently, the two SFs must be defined on congruent communicators and they must be true star
194754729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots.
194854729392SStefano Zampini 
194954729392SStefano Zampini   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
195054729392SStefano Zampini   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
195154729392SStefano Zampini   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
195254729392SStefano Zampini   Bcast on sfA, then a Bcast on sfB, on connected nodes.
1953a072220fSLawrence Mitchell 
195404c0ada0SJunchao Zhang .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1955a7b3aa13SAta Mesgarnejad @*/
1956a7b3aa13SAta Mesgarnejad PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1957a7b3aa13SAta Mesgarnejad {
195804c0ada0SJunchao Zhang   PetscErrorCode    ierr;
1959a7b3aa13SAta Mesgarnejad   const PetscSFNode *remotePointsA,*remotePointsB;
1960d41018fbSJunchao Zhang   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
196154729392SStefano Zampini   const PetscInt    *localPointsA,*localPointsB;
196254729392SStefano Zampini   PetscInt          *localPointsBA;
196354729392SStefano Zampini   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
196454729392SStefano Zampini   PetscBool         denseB;
1965a7b3aa13SAta Mesgarnejad 
1966a7b3aa13SAta Mesgarnejad   PetscFunctionBegin;
1967a7b3aa13SAta Mesgarnejad   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
196829046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfA,1);
196929046d53SLisandro Dalcin   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
197029046d53SLisandro Dalcin   PetscSFCheckGraphSet(sfB,2);
197154729392SStefano Zampini   PetscCheckSameComm(sfA,1,sfB,2);
197229046d53SLisandro Dalcin   PetscValidPointer(sfBA,3);
197354729392SStefano Zampini   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
197454729392SStefano Zampini   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
197554729392SStefano Zampini 
1976a7b3aa13SAta Mesgarnejad   ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr);
1977a7b3aa13SAta Mesgarnejad   ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr);
1978d41018fbSJunchao Zhang   if (localPointsA) {
197954729392SStefano Zampini     ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr);
198054729392SStefano Zampini     for (i=0; i<numRootsB; i++) {
198154729392SStefano Zampini       reorderedRemotePointsA[i].rank = -1;
198254729392SStefano Zampini       reorderedRemotePointsA[i].index = -1;
198354729392SStefano Zampini     }
198454729392SStefano Zampini     for (i=0; i<numLeavesA; i++) {
198554729392SStefano Zampini       if (localPointsA[i] >= numRootsB) continue;
198654729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
198754729392SStefano Zampini     }
1988d41018fbSJunchao Zhang     remotePointsA = reorderedRemotePointsA;
1989d41018fbSJunchao Zhang   }
1990d41018fbSJunchao Zhang   ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr);
1991d41018fbSJunchao Zhang   ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr);
1992d41018fbSJunchao Zhang   ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1993d41018fbSJunchao Zhang   ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr);
1994d41018fbSJunchao Zhang   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
1995d41018fbSJunchao Zhang 
199654729392SStefano Zampini   denseB = (PetscBool)!localPointsB;
199754729392SStefano Zampini   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
199854729392SStefano Zampini     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
199954729392SStefano Zampini     else numLeavesBA++;
200054729392SStefano Zampini   }
200154729392SStefano Zampini   if (denseB) {
2002d41018fbSJunchao Zhang     localPointsBA  = NULL;
2003d41018fbSJunchao Zhang     remotePointsBA = leafdataB;
2004d41018fbSJunchao Zhang   } else {
200554729392SStefano Zampini     ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr);
200654729392SStefano Zampini     ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr);
200754729392SStefano Zampini     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
200854729392SStefano Zampini       const PetscInt l = localPointsB ? localPointsB[i] : i;
200954729392SStefano Zampini 
201054729392SStefano Zampini       if (leafdataB[l-minleaf].rank == -1) continue;
201154729392SStefano Zampini       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
201254729392SStefano Zampini       localPointsBA[numLeavesBA] = l;
201354729392SStefano Zampini       numLeavesBA++;
201454729392SStefano Zampini     }
2015d41018fbSJunchao Zhang     ierr = PetscFree(leafdataB);CHKERRQ(ierr);
2016d41018fbSJunchao Zhang   }
201754729392SStefano Zampini   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
201820c24465SJunchao Zhang   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
201954729392SStefano Zampini   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
2020a7b3aa13SAta Mesgarnejad   PetscFunctionReturn(0);
2021a7b3aa13SAta Mesgarnejad }
20221c6ba672SJunchao Zhang 
202304c0ada0SJunchao Zhang /*@
202454729392SStefano Zampini   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
202504c0ada0SJunchao Zhang 
202604c0ada0SJunchao Zhang   Input Parameters:
202754729392SStefano Zampini + sfA - The first PetscSF
202854729392SStefano Zampini - sfB - The second PetscSF
202904c0ada0SJunchao Zhang 
203004c0ada0SJunchao Zhang   Output Parameters:
203154729392SStefano Zampini . sfBA - The composite SF.
203204c0ada0SJunchao Zhang 
203304c0ada0SJunchao Zhang   Level: developer
203404c0ada0SJunchao Zhang 
203554729392SStefano Zampini   Notes:
203654729392SStefano Zampini   Currently, the two SFs must be defined on congruent communicators and they must be true star
203754729392SStefano Zampini   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
203854729392SStefano Zampini   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
203954729392SStefano Zampini 
204054729392SStefano Zampini   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
204154729392SStefano Zampini   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
204254729392SStefano Zampini   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
204354729392SStefano Zampini   a Reduce on sfB, on connected roots.
204454729392SStefano Zampini 
204554729392SStefano Zampini .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
204604c0ada0SJunchao Zhang @*/
204704c0ada0SJunchao Zhang PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
204804c0ada0SJunchao Zhang {
204904c0ada0SJunchao Zhang   PetscErrorCode    ierr;
205004c0ada0SJunchao Zhang   const PetscSFNode *remotePointsA,*remotePointsB;
205104c0ada0SJunchao Zhang   PetscSFNode       *remotePointsBA;
205204c0ada0SJunchao Zhang   const PetscInt    *localPointsA,*localPointsB;
205354729392SStefano Zampini   PetscSFNode       *reorderedRemotePointsA = NULL;
205454729392SStefano Zampini   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
20555b0d146aSStefano Zampini   MPI_Op            op;
20565b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
20575b0d146aSStefano Zampini   PetscBool         iswin;
20585b0d146aSStefano Zampini #endif
205904c0ada0SJunchao Zhang 
206004c0ada0SJunchao Zhang   PetscFunctionBegin;
206104c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1);
206204c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfA,1);
206304c0ada0SJunchao Zhang   PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2);
206404c0ada0SJunchao Zhang   PetscSFCheckGraphSet(sfB,2);
206554729392SStefano Zampini   PetscCheckSameComm(sfA,1,sfB,2);
206604c0ada0SJunchao Zhang   PetscValidPointer(sfBA,3);
206754729392SStefano Zampini   ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr);
206854729392SStefano Zampini   ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr);
206954729392SStefano Zampini 
207004c0ada0SJunchao Zhang   ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr);
207104c0ada0SJunchao Zhang   ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr);
20725b0d146aSStefano Zampini 
20735b0d146aSStefano Zampini   /* TODO: Check roots of sfB have degree of 1 */
20745b0d146aSStefano Zampini   /* Once we implement it, we can replace the MPI_MAXLOC
20755b0d146aSStefano Zampini      with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect.
20765b0d146aSStefano Zampini      We use MPI_MAXLOC only to have a deterministic output from this routine if
20775b0d146aSStefano Zampini      the root condition is not meet.
20785b0d146aSStefano Zampini    */
20795b0d146aSStefano Zampini   op = MPI_MAXLOC;
20805b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES)
20815b0d146aSStefano Zampini   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
20825b0d146aSStefano Zampini   ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr);
20835b0d146aSStefano Zampini   if (iswin) op = MPIU_REPLACE;
20845b0d146aSStefano Zampini #endif
20855b0d146aSStefano Zampini 
208654729392SStefano Zampini   ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr);
208754729392SStefano Zampini   ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr);
208854729392SStefano Zampini   for (i=0; i<maxleaf - minleaf + 1; i++) {
208954729392SStefano Zampini     reorderedRemotePointsA[i].rank = -1;
209054729392SStefano Zampini     reorderedRemotePointsA[i].index = -1;
209154729392SStefano Zampini   }
209254729392SStefano Zampini   if (localPointsA) {
209354729392SStefano Zampini     for (i=0; i<numLeavesA; i++) {
209454729392SStefano Zampini       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
209554729392SStefano Zampini       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
209654729392SStefano Zampini     }
209754729392SStefano Zampini   } else {
209854729392SStefano Zampini     for (i=0; i<numLeavesA; i++) {
209954729392SStefano Zampini       if (i > maxleaf || i < minleaf) continue;
210054729392SStefano Zampini       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
210154729392SStefano Zampini     }
210254729392SStefano Zampini   }
210354729392SStefano Zampini 
210454729392SStefano Zampini   ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr);
210504c0ada0SJunchao Zhang   ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr);
210654729392SStefano Zampini   for (i=0; i<numRootsB; i++) {
210754729392SStefano Zampini     remotePointsBA[i].rank = -1;
210854729392SStefano Zampini     remotePointsBA[i].index = -1;
210954729392SStefano Zampini   }
211054729392SStefano Zampini 
21115b0d146aSStefano Zampini   ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
21125b0d146aSStefano Zampini   ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr);
211354729392SStefano Zampini   ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr);
211454729392SStefano Zampini   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
211554729392SStefano Zampini     if (remotePointsBA[i].rank == -1) continue;
211654729392SStefano Zampini     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
211754729392SStefano Zampini     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
211854729392SStefano Zampini     localPointsBA[numLeavesBA] = i;
211954729392SStefano Zampini     numLeavesBA++;
212054729392SStefano Zampini   }
212154729392SStefano Zampini   ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr);
212220c24465SJunchao Zhang   ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr);
212354729392SStefano Zampini   ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr);
212404c0ada0SJunchao Zhang   PetscFunctionReturn(0);
212504c0ada0SJunchao Zhang }
212604c0ada0SJunchao Zhang 
21271c6ba672SJunchao Zhang /*
21281c6ba672SJunchao Zhang   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
21291c6ba672SJunchao Zhang 
21301c6ba672SJunchao Zhang   Input Parameters:
21311c6ba672SJunchao Zhang . sf - The global PetscSF
21321c6ba672SJunchao Zhang 
21331c6ba672SJunchao Zhang   Output Parameters:
21341c6ba672SJunchao Zhang . out - The local PetscSF
21351c6ba672SJunchao Zhang  */
21361c6ba672SJunchao Zhang PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
21371c6ba672SJunchao Zhang {
21381c6ba672SJunchao Zhang   MPI_Comm           comm;
21391c6ba672SJunchao Zhang   PetscMPIInt        myrank;
21401c6ba672SJunchao Zhang   const PetscInt     *ilocal;
21411c6ba672SJunchao Zhang   const PetscSFNode  *iremote;
21421c6ba672SJunchao Zhang   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
21431c6ba672SJunchao Zhang   PetscSFNode        *liremote;
21441c6ba672SJunchao Zhang   PetscSF            lsf;
21451c6ba672SJunchao Zhang   PetscErrorCode     ierr;
21461c6ba672SJunchao Zhang 
21471c6ba672SJunchao Zhang   PetscFunctionBegin;
21481c6ba672SJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
21491c6ba672SJunchao Zhang   if (sf->ops->CreateLocalSF) {
21501c6ba672SJunchao Zhang     ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr);
21511c6ba672SJunchao Zhang   } else {
21521c6ba672SJunchao Zhang     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
21531c6ba672SJunchao Zhang     ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
21541c6ba672SJunchao Zhang     ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr);
21551c6ba672SJunchao Zhang 
21561c6ba672SJunchao Zhang     /* Find out local edges and build a local SF */
21571c6ba672SJunchao Zhang     ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr);
21581c6ba672SJunchao Zhang     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
21591c6ba672SJunchao Zhang     ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr);
21601c6ba672SJunchao Zhang     ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr);
21611c6ba672SJunchao Zhang 
21621c6ba672SJunchao Zhang     for (i=j=0; i<nleaves; i++) {
21631c6ba672SJunchao Zhang       if (iremote[i].rank == (PetscInt)myrank) {
21641c6ba672SJunchao Zhang         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
21651c6ba672SJunchao Zhang         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
21661c6ba672SJunchao Zhang         liremote[j].index = iremote[i].index;
21671c6ba672SJunchao Zhang         j++;
21681c6ba672SJunchao Zhang       }
21691c6ba672SJunchao Zhang     }
21701c6ba672SJunchao Zhang     ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
217120c24465SJunchao Zhang     ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr);
21721c6ba672SJunchao Zhang     ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
21731c6ba672SJunchao Zhang     ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
21741c6ba672SJunchao Zhang     *out = lsf;
21751c6ba672SJunchao Zhang   }
21761c6ba672SJunchao Zhang   PetscFunctionReturn(0);
21771c6ba672SJunchao Zhang }
2178dd5b3ca6SJunchao Zhang 
2179dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2180dd5b3ca6SJunchao Zhang PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2181dd5b3ca6SJunchao Zhang {
2182dd5b3ca6SJunchao Zhang   PetscErrorCode     ierr;
2183eb02082bSJunchao Zhang   PetscMemType       rootmtype,leafmtype;
2184dd5b3ca6SJunchao Zhang 
2185dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
2186dd5b3ca6SJunchao Zhang   PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1);
2187dd5b3ca6SJunchao Zhang   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
2188dd5b3ca6SJunchao Zhang   ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2189eb02082bSJunchao Zhang   ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr);
2190eb02082bSJunchao Zhang   ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr);
2191dd5b3ca6SJunchao Zhang   if (sf->ops->BcastToZero) {
2192eb02082bSJunchao Zhang     ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr);
2193dd5b3ca6SJunchao Zhang   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2194dd5b3ca6SJunchao Zhang   ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr);
2195dd5b3ca6SJunchao Zhang   PetscFunctionReturn(0);
2196dd5b3ca6SJunchao Zhang }
2197dd5b3ca6SJunchao Zhang 
2198