1af0996ceSBarry Smith #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 2c4e6a40aSLawrence Mitchell #include <petsc/private/hashseti.h> 395fce210SBarry Smith #include <petscctable.h> 495fce210SBarry Smith 57fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 67fd2d3dbSJunchao Zhang #include <cuda_runtime.h> 77fd2d3dbSJunchao Zhang #endif 87fd2d3dbSJunchao Zhang 97fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP) 107fd2d3dbSJunchao Zhang #include <hip/hip_runtime.h> 117fd2d3dbSJunchao Zhang #endif 127fd2d3dbSJunchao Zhang 1395fce210SBarry Smith #if defined(PETSC_USE_DEBUG) 1495fce210SBarry Smith # define PetscSFCheckGraphSet(sf,arg) do { \ 1595fce210SBarry Smith if (PetscUnlikely(!(sf)->graphset)) \ 16dd5b3ca6SJunchao Zhang SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %D \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \ 1795fce210SBarry Smith } while (0) 1895fce210SBarry Smith #else 1995fce210SBarry Smith # define PetscSFCheckGraphSet(sf,arg) do {} while (0) 2095fce210SBarry Smith #endif 2195fce210SBarry Smith 224c8fdceaSLisandro Dalcin const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL}; 2395fce210SBarry Smith 247fd2d3dbSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscGetMemType(const void *data,PetscMemType *type) 257fd2d3dbSJunchao Zhang { 267fd2d3dbSJunchao Zhang PetscFunctionBegin; 277fd2d3dbSJunchao Zhang PetscValidPointer(type,2); 287fd2d3dbSJunchao Zhang *type = PETSC_MEMTYPE_HOST; 297fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 307fd2d3dbSJunchao Zhang if (PetscCUDAInitialized && data) { 317fd2d3dbSJunchao Zhang cudaError_t cerr; 327fd2d3dbSJunchao Zhang struct cudaPointerAttributes attr; 337fd2d3dbSJunchao Zhang enum cudaMemoryType mtype; 347fd2d3dbSJunchao Zhang cerr = cudaPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 357fd2d3dbSJunchao Zhang cudaGetLastError(); /* Reset the last error */ 367fd2d3dbSJunchao Zhang #if (CUDART_VERSION < 10000) 377fd2d3dbSJunchao Zhang mtype = attr.memoryType; 387fd2d3dbSJunchao Zhang #else 397fd2d3dbSJunchao Zhang mtype = attr.type; 407fd2d3dbSJunchao Zhang #endif 417fd2d3dbSJunchao Zhang if (cerr == cudaSuccess && mtype == cudaMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 427fd2d3dbSJunchao Zhang } 437fd2d3dbSJunchao Zhang #endif 447fd2d3dbSJunchao Zhang 457fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_HIP) 467fd2d3dbSJunchao Zhang if (PetscHIPInitialized && data) { 477fd2d3dbSJunchao Zhang hipError_t cerr; 487fd2d3dbSJunchao Zhang struct hipPointerAttribute_t attr; 497fd2d3dbSJunchao Zhang enum hipMemoryType mtype; 507fd2d3dbSJunchao Zhang cerr = hipPointerGetAttributes(&attr,data); /* Do not check error since before CUDA 11.0, passing a host pointer returns cudaErrorInvalidValue */ 517fd2d3dbSJunchao Zhang hipGetLastError(); /* Reset the last error */ 527fd2d3dbSJunchao Zhang mtype = attr.memoryType; 537fd2d3dbSJunchao Zhang if (cerr == hipSuccess && mtype == hipMemoryTypeDevice) *type = PETSC_MEMTYPE_DEVICE; 547fd2d3dbSJunchao Zhang } 557fd2d3dbSJunchao Zhang #endif 567fd2d3dbSJunchao Zhang PetscFunctionReturn(0); 577fd2d3dbSJunchao Zhang } 587fd2d3dbSJunchao Zhang 598af6ec1cSBarry Smith /*@ 6095fce210SBarry Smith PetscSFCreate - create a star forest communication context 6195fce210SBarry Smith 62d083f849SBarry Smith Collective 6395fce210SBarry Smith 6495fce210SBarry Smith Input Arguments: 6595fce210SBarry Smith . comm - communicator on which the star forest will operate 6695fce210SBarry Smith 6795fce210SBarry Smith Output Arguments: 6895fce210SBarry Smith . sf - new star forest context 6995fce210SBarry Smith 70dd5b3ca6SJunchao Zhang Options Database Keys: 71dd5b3ca6SJunchao Zhang + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default) 72dd5b3ca6SJunchao Zhang . -sf_type window -Use MPI-3 one-sided window for communication 73dd5b3ca6SJunchao Zhang - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication 74dd5b3ca6SJunchao Zhang 7595fce210SBarry Smith Level: intermediate 7695fce210SBarry Smith 77dd5b3ca6SJunchao Zhang Notes: 78dd5b3ca6SJunchao Zhang When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv, 79dd5b3ca6SJunchao Zhang MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special 80dd5b3ca6SJunchao Zhang SFs are optimized and they have better performance than general SFs. 81dd5b3ca6SJunchao Zhang 82dd5b3ca6SJunchao Zhang .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy() 8395fce210SBarry Smith @*/ 8495fce210SBarry Smith PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf) 8595fce210SBarry Smith { 8695fce210SBarry Smith PetscErrorCode ierr; 8795fce210SBarry Smith PetscSF b; 8895fce210SBarry Smith 8995fce210SBarry Smith PetscFunctionBegin; 9095fce210SBarry Smith PetscValidPointer(sf,2); 91607a6623SBarry Smith ierr = PetscSFInitializePackage();CHKERRQ(ierr); 9295fce210SBarry Smith 9373107ff1SLisandro Dalcin ierr = PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);CHKERRQ(ierr); 9495fce210SBarry Smith 9595fce210SBarry Smith b->nroots = -1; 9695fce210SBarry Smith b->nleaves = -1; 9729046d53SLisandro Dalcin b->minleaf = PETSC_MAX_INT; 9829046d53SLisandro Dalcin b->maxleaf = PETSC_MIN_INT; 9995fce210SBarry Smith b->nranks = -1; 10095fce210SBarry Smith b->rankorder = PETSC_TRUE; 10195fce210SBarry Smith b->ingroup = MPI_GROUP_NULL; 10295fce210SBarry Smith b->outgroup = MPI_GROUP_NULL; 10395fce210SBarry Smith b->graphset = PETSC_FALSE; 10420c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 10520c24465SJunchao Zhang b->use_gpu_aware_mpi = use_gpu_aware_mpi; 10620c24465SJunchao Zhang b->use_stream_aware_mpi = PETSC_FALSE; 10720c24465SJunchao Zhang b->use_default_stream = PETSC_TRUE; /* The assumption is true for PETSc internal use of SF */ 10827f636e8SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/ 10920c24465SJunchao Zhang b->backend = PETSCSF_BACKEND_KOKKOS; 11027f636e8SJunchao Zhang #elif defined(PETSC_HAVE_CUDA) 11127f636e8SJunchao Zhang b->backend = PETSCSF_BACKEND_CUDA; 11220c24465SJunchao Zhang #endif 11320c24465SJunchao Zhang #endif 11495fce210SBarry Smith *sf = b; 11595fce210SBarry Smith PetscFunctionReturn(0); 11695fce210SBarry Smith } 11795fce210SBarry Smith 11829046d53SLisandro Dalcin /*@ 11995fce210SBarry Smith PetscSFReset - Reset a star forest so that different sizes or neighbors can be used 12095fce210SBarry Smith 12195fce210SBarry Smith Collective 12295fce210SBarry Smith 12395fce210SBarry Smith Input Arguments: 12495fce210SBarry Smith . sf - star forest 12595fce210SBarry Smith 12695fce210SBarry Smith Level: advanced 12795fce210SBarry Smith 12895fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy() 12995fce210SBarry Smith @*/ 13095fce210SBarry Smith PetscErrorCode PetscSFReset(PetscSF sf) 13195fce210SBarry Smith { 13295fce210SBarry Smith PetscErrorCode ierr; 13395fce210SBarry Smith 13495fce210SBarry Smith PetscFunctionBegin; 13595fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 13679715d56SJed Brown if (sf->ops->Reset) {ierr = (*sf->ops->Reset)(sf);CHKERRQ(ierr);} 13729046d53SLisandro Dalcin sf->nroots = -1; 13829046d53SLisandro Dalcin sf->nleaves = -1; 13929046d53SLisandro Dalcin sf->minleaf = PETSC_MAX_INT; 14029046d53SLisandro Dalcin sf->maxleaf = PETSC_MIN_INT; 14195fce210SBarry Smith sf->mine = NULL; 14295fce210SBarry Smith sf->remote = NULL; 14329046d53SLisandro Dalcin sf->graphset = PETSC_FALSE; 14429046d53SLisandro Dalcin ierr = PetscFree(sf->mine_alloc);CHKERRQ(ierr); 14595fce210SBarry Smith ierr = PetscFree(sf->remote_alloc);CHKERRQ(ierr); 14621c688dcSJed Brown sf->nranks = -1; 14729046d53SLisandro Dalcin ierr = PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);CHKERRQ(ierr); 1487fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 14920c24465SJunchao Zhang for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);CHKERRQ(ierr);} 150eb02082bSJunchao Zhang #endif 15129046d53SLisandro Dalcin sf->degreeknown = PETSC_FALSE; 15295fce210SBarry Smith ierr = PetscFree(sf->degree);CHKERRQ(ierr); 15395fce210SBarry Smith if (sf->ingroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->ingroup);CHKERRQ(ierr);} 15495fce210SBarry Smith if (sf->outgroup != MPI_GROUP_NULL) {ierr = MPI_Group_free(&sf->outgroup);CHKERRQ(ierr);} 15595fce210SBarry Smith ierr = PetscSFDestroy(&sf->multi);CHKERRQ(ierr); 156dd5b3ca6SJunchao Zhang ierr = PetscLayoutDestroy(&sf->map);CHKERRQ(ierr); 15795fce210SBarry Smith sf->setupcalled = PETSC_FALSE; 15895fce210SBarry Smith PetscFunctionReturn(0); 15995fce210SBarry Smith } 16095fce210SBarry Smith 16195fce210SBarry Smith /*@C 16229046d53SLisandro Dalcin PetscSFSetType - Set the PetscSF communication implementation 16395fce210SBarry Smith 16495fce210SBarry Smith Collective on PetscSF 16595fce210SBarry Smith 16695fce210SBarry Smith Input Parameters: 16795fce210SBarry Smith + sf - the PetscSF context 16895fce210SBarry Smith - type - a known method 16995fce210SBarry Smith 17095fce210SBarry Smith Options Database Key: 17195fce210SBarry Smith . -sf_type <type> - Sets the method; use -help for a list 17270616304SStefano Zampini of available methods (for instance, window, basic, neighbor) 17395fce210SBarry Smith 17495fce210SBarry Smith Notes: 17595fce210SBarry Smith See "include/petscsf.h" for available methods (for instance) 17695fce210SBarry Smith + PETSCSFWINDOW - MPI-2/3 one-sided 17795fce210SBarry Smith - PETSCSFBASIC - basic implementation using MPI-1 two-sided 17895fce210SBarry Smith 17995fce210SBarry Smith Level: intermediate 18095fce210SBarry Smith 18195fce210SBarry Smith .seealso: PetscSFType, PetscSFCreate() 18295fce210SBarry Smith @*/ 18395fce210SBarry Smith PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type) 18495fce210SBarry Smith { 18595fce210SBarry Smith PetscErrorCode ierr,(*r)(PetscSF); 18695fce210SBarry Smith PetscBool match; 18795fce210SBarry Smith 18895fce210SBarry Smith PetscFunctionBegin; 18995fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 19095fce210SBarry Smith PetscValidCharPointer(type,2); 19195fce210SBarry Smith 19295fce210SBarry Smith ierr = PetscObjectTypeCompare((PetscObject)sf,type,&match);CHKERRQ(ierr); 19395fce210SBarry Smith if (match) PetscFunctionReturn(0); 19495fce210SBarry Smith 195adc40e5bSBarry Smith ierr = PetscFunctionListFind(PetscSFList,type,&r);CHKERRQ(ierr); 19695fce210SBarry Smith if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscSF type %s",type); 19729046d53SLisandro Dalcin /* Destroy the previous PetscSF implementation context */ 19829046d53SLisandro Dalcin if (sf->ops->Destroy) {ierr = (*(sf)->ops->Destroy)(sf);CHKERRQ(ierr);} 19995fce210SBarry Smith ierr = PetscMemzero(sf->ops,sizeof(*sf->ops));CHKERRQ(ierr); 20095fce210SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)sf,type);CHKERRQ(ierr); 20195fce210SBarry Smith ierr = (*r)(sf);CHKERRQ(ierr); 20295fce210SBarry Smith PetscFunctionReturn(0); 20395fce210SBarry Smith } 20495fce210SBarry Smith 20529046d53SLisandro Dalcin /*@C 20629046d53SLisandro Dalcin PetscSFGetType - Get the PetscSF communication implementation 20729046d53SLisandro Dalcin 20829046d53SLisandro Dalcin Not Collective 20929046d53SLisandro Dalcin 21029046d53SLisandro Dalcin Input Parameter: 21129046d53SLisandro Dalcin . sf - the PetscSF context 21229046d53SLisandro Dalcin 21329046d53SLisandro Dalcin Output Parameter: 21429046d53SLisandro Dalcin . type - the PetscSF type name 21529046d53SLisandro Dalcin 21629046d53SLisandro Dalcin Level: intermediate 21729046d53SLisandro Dalcin 21829046d53SLisandro Dalcin .seealso: PetscSFSetType(), PetscSFCreate() 21929046d53SLisandro Dalcin @*/ 22029046d53SLisandro Dalcin PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type) 22129046d53SLisandro Dalcin { 22229046d53SLisandro Dalcin PetscFunctionBegin; 22329046d53SLisandro Dalcin PetscValidHeaderSpecific(sf, PETSCSF_CLASSID,1); 22429046d53SLisandro Dalcin PetscValidPointer(type,2); 22529046d53SLisandro Dalcin *type = ((PetscObject)sf)->type_name; 22629046d53SLisandro Dalcin PetscFunctionReturn(0); 22729046d53SLisandro Dalcin } 22829046d53SLisandro Dalcin 229d36d33e4SMatthew G. Knepley /*@ 23095fce210SBarry Smith PetscSFDestroy - destroy star forest 23195fce210SBarry Smith 23295fce210SBarry Smith Collective 23395fce210SBarry Smith 23495fce210SBarry Smith Input Arguments: 23595fce210SBarry Smith . sf - address of star forest 23695fce210SBarry Smith 23795fce210SBarry Smith Level: intermediate 23895fce210SBarry Smith 23995fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFReset() 24095fce210SBarry Smith @*/ 24195fce210SBarry Smith PetscErrorCode PetscSFDestroy(PetscSF *sf) 24295fce210SBarry Smith { 24395fce210SBarry Smith PetscErrorCode ierr; 24495fce210SBarry Smith 24595fce210SBarry Smith PetscFunctionBegin; 24695fce210SBarry Smith if (!*sf) PetscFunctionReturn(0); 24795fce210SBarry Smith PetscValidHeaderSpecific((*sf),PETSCSF_CLASSID,1); 24829046d53SLisandro Dalcin if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; PetscFunctionReturn(0);} 24995fce210SBarry Smith ierr = PetscSFReset(*sf);CHKERRQ(ierr); 25095fce210SBarry Smith if ((*sf)->ops->Destroy) {ierr = (*(*sf)->ops->Destroy)(*sf);CHKERRQ(ierr);} 25195fce210SBarry Smith ierr = PetscHeaderDestroy(sf);CHKERRQ(ierr); 25295fce210SBarry Smith PetscFunctionReturn(0); 25395fce210SBarry Smith } 25495fce210SBarry Smith 255c4e6a40aSLawrence Mitchell static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf) 256c4e6a40aSLawrence Mitchell { 257c4e6a40aSLawrence Mitchell PetscInt i, nleaves; 258c4e6a40aSLawrence Mitchell PetscMPIInt size; 259c4e6a40aSLawrence Mitchell const PetscInt *ilocal; 260c4e6a40aSLawrence Mitchell const PetscSFNode *iremote; 261c4e6a40aSLawrence Mitchell PetscErrorCode ierr; 262c4e6a40aSLawrence Mitchell 263c4e6a40aSLawrence Mitchell PetscFunctionBegin; 26476bd3646SJed Brown if (!sf->graphset || !PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 265c4e6a40aSLawrence Mitchell ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 266c4e6a40aSLawrence Mitchell ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 267c4e6a40aSLawrence Mitchell for (i = 0; i < nleaves; i++) { 268c4e6a40aSLawrence Mitchell const PetscInt rank = iremote[i].rank; 269c4e6a40aSLawrence Mitchell const PetscInt remote = iremote[i].index; 270c4e6a40aSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 271c4e6a40aSLawrence Mitchell if (rank < 0 || rank >= size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided rank (%D) for remote %D is invalid, should be in [0, %d)",rank,i,size); 272c4e6a40aSLawrence Mitchell if (remote < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided index (%D) for remote %D is invalid, should be >= 0",remote,i); 273c4e6a40aSLawrence Mitchell if (leaf < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided location (%D) for leaf %D is invalid, should be >= 0",leaf,i); 274c4e6a40aSLawrence Mitchell } 275c4e6a40aSLawrence Mitchell PetscFunctionReturn(0); 276c4e6a40aSLawrence Mitchell } 277c4e6a40aSLawrence Mitchell 27820c24465SJunchao Zhang 27920c24465SJunchao Zhang 28020c24465SJunchao Zhang 28195fce210SBarry Smith /*@ 28295fce210SBarry Smith PetscSFSetUp - set up communication structures 28395fce210SBarry Smith 28495fce210SBarry Smith Collective 28595fce210SBarry Smith 28695fce210SBarry Smith Input Arguments: 28795fce210SBarry Smith . sf - star forest communication object 28895fce210SBarry Smith 28995fce210SBarry Smith Level: beginner 29095fce210SBarry Smith 29195fce210SBarry Smith .seealso: PetscSFSetFromOptions(), PetscSFSetType() 29295fce210SBarry Smith @*/ 29395fce210SBarry Smith PetscErrorCode PetscSFSetUp(PetscSF sf) 29495fce210SBarry Smith { 29595fce210SBarry Smith PetscErrorCode ierr; 29695fce210SBarry Smith 29795fce210SBarry Smith PetscFunctionBegin; 29829046d53SLisandro Dalcin PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 29929046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 30095fce210SBarry Smith if (sf->setupcalled) PetscFunctionReturn(0); 30129046d53SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 30220c24465SJunchao Zhang ierr = PetscSFCheckGraphValid_Private(sf);CHKERRQ(ierr); 30320c24465SJunchao Zhang if (!((PetscObject)sf)->type_name) {ierr = PetscSFSetType(sf,PETSCSFBASIC);CHKERRQ(ierr);} /* Zero all sf->ops */ 30495fce210SBarry Smith if (sf->ops->SetUp) {ierr = (*sf->ops->SetUp)(sf);CHKERRQ(ierr);} 30520c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 30620c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_CUDA) { 30720c24465SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_Cuda; 30820c24465SJunchao Zhang sf->ops->Free = PetscSFFree_Cuda; 30920c24465SJunchao Zhang } 31020c24465SJunchao Zhang #endif 31120c24465SJunchao Zhang 31220c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 31320c24465SJunchao Zhang if (sf->backend == PETSCSF_BACKEND_KOKKOS) { 31420c24465SJunchao Zhang sf->ops->Malloc = PetscSFMalloc_Kokkos; 31520c24465SJunchao Zhang sf->ops->Free = PetscSFFree_Kokkos; 31620c24465SJunchao Zhang } 31720c24465SJunchao Zhang #endif 31829046d53SLisandro Dalcin ierr = PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);CHKERRQ(ierr); 31995fce210SBarry Smith sf->setupcalled = PETSC_TRUE; 32095fce210SBarry Smith PetscFunctionReturn(0); 32195fce210SBarry Smith } 32295fce210SBarry Smith 3238af6ec1cSBarry Smith /*@ 32495fce210SBarry Smith PetscSFSetFromOptions - set PetscSF options using the options database 32595fce210SBarry Smith 32695fce210SBarry Smith Logically Collective 32795fce210SBarry Smith 32895fce210SBarry Smith Input Arguments: 32995fce210SBarry Smith . sf - star forest 33095fce210SBarry Smith 33195fce210SBarry Smith Options Database Keys: 33260263706SJed Brown + -sf_type - implementation type, see PetscSFSetType() 33351ccb202SJunchao Zhang . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise 334b85e67b7SJunchao Zhang . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also 335c2a741eeSJunchao Zhang use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true). 336c06a8e02SRichard Tran Mills If true, this option only works with -use_gpu_aware_mpi 1. 33720c24465SJunchao Zhang . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false). 338c06a8e02SRichard Tran Mills If true, this option only works with -use_gpu_aware_mpi 1. 33995fce210SBarry Smith 34020c24465SJunchao Zhang - -sf_backend cuda | kokkos -Select the device backend SF uses. Currently SF has two backends: cuda and Kokkos. 34120c24465SJunchao Zhang On CUDA devices, one can choose cuda or kokkos with the default being cuda. On other devices, 34220c24465SJunchao Zhang the only available is kokkos. 34320c24465SJunchao Zhang 34495fce210SBarry Smith Level: intermediate 34595fce210SBarry Smith @*/ 34695fce210SBarry Smith PetscErrorCode PetscSFSetFromOptions(PetscSF sf) 34795fce210SBarry Smith { 34895fce210SBarry Smith PetscSFType deft; 34995fce210SBarry Smith char type[256]; 35095fce210SBarry Smith PetscErrorCode ierr; 35195fce210SBarry Smith PetscBool flg; 35295fce210SBarry Smith 35395fce210SBarry Smith PetscFunctionBegin; 35495fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 35595fce210SBarry Smith ierr = PetscObjectOptionsBegin((PetscObject)sf);CHKERRQ(ierr); 35695fce210SBarry Smith deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC; 35729046d53SLisandro Dalcin ierr = PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);CHKERRQ(ierr); 35895fce210SBarry Smith ierr = PetscSFSetType(sf,flg ? type : deft);CHKERRQ(ierr); 35995fce210SBarry Smith ierr = PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);CHKERRQ(ierr); 3607fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 36120c24465SJunchao Zhang { 36220c24465SJunchao Zhang char backendstr[32] = {0}; 36320c24465SJunchao Zhang PetscBool isCuda = PETSC_FALSE,isKokkos = PETSC_FALSE,set; 36420c24465SJunchao Zhang /* Change the defaults set in PetscSFCreate() with command line options */ 365cd620004SJunchao Zhang ierr = PetscOptionsBool("-sf_use_default_stream","Assume SF's input and output root/leafdata is computed on the default stream","PetscSFSetFromOptions",sf->use_default_stream,&sf->use_default_stream,NULL);CHKERRQ(ierr); 366b85e67b7SJunchao Zhang ierr = PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);CHKERRQ(ierr); 36720c24465SJunchao Zhang ierr = PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);CHKERRQ(ierr); 36820c24465SJunchao Zhang ierr = PetscStrcasecmp("cuda",backendstr,&isCuda);CHKERRQ(ierr); 36920c24465SJunchao Zhang ierr = PetscStrcasecmp("kokkos",backendstr,&isKokkos);CHKERRQ(ierr); 37020c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 37120c24465SJunchao Zhang if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA; 37220c24465SJunchao Zhang else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS; 37320c24465SJunchao Zhang else if (set) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported on cuda devices. You may choose cuda or kokkos (if installed)", backendstr); 37420c24465SJunchao Zhang #elif defined(PETSC_HAVE_KOKKOS) 37520c24465SJunchao Zhang if (set && !isKokkos) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-sf_backend %s is not supported. You can only choose kokkos", backendstr); 37620c24465SJunchao Zhang #endif 37720c24465SJunchao Zhang } 378c2a741eeSJunchao Zhang #endif 379e55864a3SBarry Smith if (sf->ops->SetFromOptions) {ierr = (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);CHKERRQ(ierr);} 38095fce210SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 38195fce210SBarry Smith PetscFunctionReturn(0); 38295fce210SBarry Smith } 38395fce210SBarry Smith 38429046d53SLisandro Dalcin /*@ 38595fce210SBarry Smith PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order 38695fce210SBarry Smith 38795fce210SBarry Smith Logically Collective 38895fce210SBarry Smith 38995fce210SBarry Smith Input Arguments: 39095fce210SBarry Smith + sf - star forest 39195fce210SBarry Smith - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic) 39295fce210SBarry Smith 39395fce210SBarry Smith Level: advanced 39495fce210SBarry Smith 39595fce210SBarry Smith .seealso: PetscSFGatherBegin(), PetscSFScatterBegin() 39695fce210SBarry Smith @*/ 39795fce210SBarry Smith PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg) 39895fce210SBarry Smith { 39995fce210SBarry Smith PetscFunctionBegin; 40095fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 40195fce210SBarry Smith PetscValidLogicalCollectiveBool(sf,flg,2); 40295fce210SBarry Smith if (sf->multi) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Rank ordering must be set before first call to PetscSFGatherBegin() or PetscSFScatterBegin()"); 40395fce210SBarry Smith sf->rankorder = flg; 40495fce210SBarry Smith PetscFunctionReturn(0); 40595fce210SBarry Smith } 40695fce210SBarry Smith 4078af6ec1cSBarry Smith /*@ 40895fce210SBarry Smith PetscSFSetGraph - Set a parallel star forest 40995fce210SBarry Smith 41095fce210SBarry Smith Collective 41195fce210SBarry Smith 41295fce210SBarry Smith Input Arguments: 41395fce210SBarry Smith + sf - star forest 41495fce210SBarry Smith . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 41595fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 416c4e6a40aSLawrence Mitchell . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced 417c4e6a40aSLawrence Mitchell during setup in debug mode) 41895fce210SBarry Smith . localmode - copy mode for ilocal 419c4e6a40aSLawrence Mitchell . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced 420c4e6a40aSLawrence Mitchell during setup in debug mode) 42195fce210SBarry Smith - remotemode - copy mode for iremote 42295fce210SBarry Smith 42395fce210SBarry Smith Level: intermediate 42495fce210SBarry Smith 42595452b02SPatrick Sanan Notes: 42695452b02SPatrick Sanan In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode 42738ab3f8aSBarry Smith 4282ad1e87fSLisandro Dalcin Developers Note: Local indices which are the identity permutation in the range [0,nleaves) are discarded as they 4292ad1e87fSLisandro Dalcin encode contiguous storage. In such case, if localmode is PETSC_OWN_POINTER, the memory is deallocated as it is not 4302ad1e87fSLisandro Dalcin needed 4312ad1e87fSLisandro Dalcin 432c4e6a40aSLawrence Mitchell Developers Note: This object does not necessarily encode a true star forest in the graph theoretic sense, since leaf 433c4e6a40aSLawrence Mitchell indices are not required to be unique. Some functions, however, rely on unique leaf indices (checked in debug mode). 434c4e6a40aSLawrence Mitchell 43595fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph() 43695fce210SBarry Smith @*/ 43795fce210SBarry Smith PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,const PetscInt *ilocal,PetscCopyMode localmode,const PetscSFNode *iremote,PetscCopyMode remotemode) 43895fce210SBarry Smith { 43995fce210SBarry Smith PetscErrorCode ierr; 44095fce210SBarry Smith 44195fce210SBarry Smith PetscFunctionBegin; 44295fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 44329046d53SLisandro Dalcin if (nleaves > 0 && ilocal) PetscValidIntPointer(ilocal,4); 44429046d53SLisandro Dalcin if (nleaves > 0) PetscValidPointer(iremote,6); 44529046d53SLisandro Dalcin if (nroots < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nroots %D, cannot be negative",nroots); 44695fce210SBarry Smith if (nleaves < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nleaves %D, cannot be negative",nleaves); 44729046d53SLisandro Dalcin 4482a67d2daSStefano Zampini if (sf->nroots >= 0) { /* Reset only if graph already set */ 44995fce210SBarry Smith ierr = PetscSFReset(sf);CHKERRQ(ierr); 4502a67d2daSStefano Zampini } 4512a67d2daSStefano Zampini 45229046d53SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 45329046d53SLisandro Dalcin 45495fce210SBarry Smith sf->nroots = nroots; 45595fce210SBarry Smith sf->nleaves = nleaves; 45629046d53SLisandro Dalcin 45729046d53SLisandro Dalcin if (nleaves && ilocal) { 45821c688dcSJed Brown PetscInt i; 45929046d53SLisandro Dalcin PetscInt minleaf = PETSC_MAX_INT; 46029046d53SLisandro Dalcin PetscInt maxleaf = PETSC_MIN_INT; 4612ad1e87fSLisandro Dalcin int contiguous = 1; 46229046d53SLisandro Dalcin for (i=0; i<nleaves; i++) { 46329046d53SLisandro Dalcin minleaf = PetscMin(minleaf,ilocal[i]); 46429046d53SLisandro Dalcin maxleaf = PetscMax(maxleaf,ilocal[i]); 4652ad1e87fSLisandro Dalcin contiguous &= (ilocal[i] == i); 46629046d53SLisandro Dalcin } 46729046d53SLisandro Dalcin sf->minleaf = minleaf; 46829046d53SLisandro Dalcin sf->maxleaf = maxleaf; 4692ad1e87fSLisandro Dalcin if (contiguous) { 4702ad1e87fSLisandro Dalcin if (localmode == PETSC_OWN_POINTER) { 4712ad1e87fSLisandro Dalcin ierr = PetscFree(ilocal);CHKERRQ(ierr); 4722ad1e87fSLisandro Dalcin } 4732ad1e87fSLisandro Dalcin ilocal = NULL; 4742ad1e87fSLisandro Dalcin } 47529046d53SLisandro Dalcin } else { 47629046d53SLisandro Dalcin sf->minleaf = 0; 47729046d53SLisandro Dalcin sf->maxleaf = nleaves - 1; 47829046d53SLisandro Dalcin } 47929046d53SLisandro Dalcin 48029046d53SLisandro Dalcin if (ilocal) { 48195fce210SBarry Smith switch (localmode) { 48295fce210SBarry Smith case PETSC_COPY_VALUES: 483785e854fSJed Brown ierr = PetscMalloc1(nleaves,&sf->mine_alloc);CHKERRQ(ierr); 484580bdb30SBarry Smith ierr = PetscArraycpy(sf->mine_alloc,ilocal,nleaves);CHKERRQ(ierr); 48595fce210SBarry Smith sf->mine = sf->mine_alloc; 48695fce210SBarry Smith break; 48795fce210SBarry Smith case PETSC_OWN_POINTER: 48895fce210SBarry Smith sf->mine_alloc = (PetscInt*)ilocal; 48995fce210SBarry Smith sf->mine = sf->mine_alloc; 49095fce210SBarry Smith break; 49195fce210SBarry Smith case PETSC_USE_POINTER: 49229046d53SLisandro Dalcin sf->mine_alloc = NULL; 49395fce210SBarry Smith sf->mine = (PetscInt*)ilocal; 49495fce210SBarry Smith break; 49595fce210SBarry Smith default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown localmode"); 49695fce210SBarry Smith } 49795fce210SBarry Smith } 49829046d53SLisandro Dalcin 49995fce210SBarry Smith switch (remotemode) { 50095fce210SBarry Smith case PETSC_COPY_VALUES: 501785e854fSJed Brown ierr = PetscMalloc1(nleaves,&sf->remote_alloc);CHKERRQ(ierr); 502580bdb30SBarry Smith ierr = PetscArraycpy(sf->remote_alloc,iremote,nleaves);CHKERRQ(ierr); 50395fce210SBarry Smith sf->remote = sf->remote_alloc; 50495fce210SBarry Smith break; 50595fce210SBarry Smith case PETSC_OWN_POINTER: 50695fce210SBarry Smith sf->remote_alloc = (PetscSFNode*)iremote; 50795fce210SBarry Smith sf->remote = sf->remote_alloc; 50895fce210SBarry Smith break; 50995fce210SBarry Smith case PETSC_USE_POINTER: 51029046d53SLisandro Dalcin sf->remote_alloc = NULL; 51195fce210SBarry Smith sf->remote = (PetscSFNode*)iremote; 51295fce210SBarry Smith break; 51395fce210SBarry Smith default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_OUTOFRANGE,"Unknown remotemode"); 51495fce210SBarry Smith } 51595fce210SBarry Smith 516563ffbabSMatthew G. Knepley ierr = PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);CHKERRQ(ierr); 51729046d53SLisandro Dalcin sf->graphset = PETSC_TRUE; 51895fce210SBarry Smith PetscFunctionReturn(0); 51995fce210SBarry Smith } 52095fce210SBarry Smith 52129046d53SLisandro Dalcin /*@ 522dd5b3ca6SJunchao Zhang PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern 523dd5b3ca6SJunchao Zhang 524dd5b3ca6SJunchao Zhang Collective 525dd5b3ca6SJunchao Zhang 526dd5b3ca6SJunchao Zhang Input Parameters: 527dd5b3ca6SJunchao Zhang + sf - The PetscSF 528dd5b3ca6SJunchao Zhang . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL) 529dd5b3ca6SJunchao Zhang - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL 530dd5b3ca6SJunchao Zhang 531dd5b3ca6SJunchao Zhang Notes: 532dd5b3ca6SJunchao Zhang It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map. 533dd5b3ca6SJunchao Zhang n and N are local and global sizes of x respectively. 534dd5b3ca6SJunchao Zhang 535dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to 536dd5b3ca6SJunchao Zhang sequential vectors y on all ranks. 537dd5b3ca6SJunchao Zhang 538dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a 539dd5b3ca6SJunchao Zhang sequential vector y on rank 0. 540dd5b3ca6SJunchao Zhang 541dd5b3ca6SJunchao Zhang In above cases, entries of x are roots and entries of y are leaves. 542dd5b3ca6SJunchao Zhang 543dd5b3ca6SJunchao Zhang With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine 544dd5b3ca6SJunchao Zhang creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i 545dd5b3ca6SJunchao Zhang of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does 546dd5b3ca6SJunchao Zhang not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data 547dd5b3ca6SJunchao Zhang items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines. 548dd5b3ca6SJunchao Zhang 549dd5b3ca6SJunchao Zhang In this case, roots and leaves are symmetric. 550dd5b3ca6SJunchao Zhang 551dd5b3ca6SJunchao Zhang Level: intermediate 552dd5b3ca6SJunchao Zhang @*/ 553dd5b3ca6SJunchao Zhang PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern) 554dd5b3ca6SJunchao Zhang { 555dd5b3ca6SJunchao Zhang MPI_Comm comm; 556dd5b3ca6SJunchao Zhang PetscInt n,N,res[2]; 557dd5b3ca6SJunchao Zhang PetscMPIInt rank,size; 558dd5b3ca6SJunchao Zhang PetscSFType type; 559dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 560dd5b3ca6SJunchao Zhang 561dd5b3ca6SJunchao Zhang PetscFunctionBegin; 562dd5b3ca6SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf, &comm);CHKERRQ(ierr); 563dd5b3ca6SJunchao Zhang if (pattern < PETSCSF_PATTERN_ALLGATHER || pattern > PETSCSF_PATTERN_ALLTOALL) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Unsupported PetscSFPattern %D\n",pattern); 564dd5b3ca6SJunchao Zhang ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 565dd5b3ca6SJunchao Zhang ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 566dd5b3ca6SJunchao Zhang 567dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLTOALL) { 568dd5b3ca6SJunchao Zhang type = PETSCSFALLTOALL; 569dd5b3ca6SJunchao Zhang ierr = PetscLayoutCreate(comm,&sf->map);CHKERRQ(ierr); 570dd5b3ca6SJunchao Zhang ierr = PetscLayoutSetLocalSize(sf->map,size);CHKERRQ(ierr); 571dd5b3ca6SJunchao Zhang ierr = PetscLayoutSetSize(sf->map,((PetscInt)size)*size);CHKERRQ(ierr); 572dd5b3ca6SJunchao Zhang ierr = PetscLayoutSetUp(sf->map);CHKERRQ(ierr); 573dd5b3ca6SJunchao Zhang } else { 574dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetLocalSize(map,&n);CHKERRQ(ierr); 575dd5b3ca6SJunchao Zhang ierr = PetscLayoutGetSize(map,&N);CHKERRQ(ierr); 576dd5b3ca6SJunchao Zhang res[0] = n; 577dd5b3ca6SJunchao Zhang res[1] = -n; 578dd5b3ca6SJunchao Zhang /* Check if n are same over all ranks so that we can optimize it */ 579dd5b3ca6SJunchao Zhang ierr = MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr); 580dd5b3ca6SJunchao Zhang if (res[0] == -res[1]) { /* same n */ 581dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER; 582dd5b3ca6SJunchao Zhang } else { 583dd5b3ca6SJunchao Zhang type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV; 584dd5b3ca6SJunchao Zhang } 585dd5b3ca6SJunchao Zhang ierr = PetscLayoutReference(map,&sf->map);CHKERRQ(ierr); 586dd5b3ca6SJunchao Zhang } 587dd5b3ca6SJunchao Zhang ierr = PetscSFSetType(sf,type);CHKERRQ(ierr); 588dd5b3ca6SJunchao Zhang 589dd5b3ca6SJunchao Zhang sf->pattern = pattern; 590dd5b3ca6SJunchao Zhang sf->mine = NULL; /* Contiguous */ 591dd5b3ca6SJunchao Zhang 592dd5b3ca6SJunchao Zhang /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called. 593dd5b3ca6SJunchao Zhang Also set other easy stuff. 594dd5b3ca6SJunchao Zhang */ 595dd5b3ca6SJunchao Zhang if (pattern == PETSCSF_PATTERN_ALLGATHER) { 596dd5b3ca6SJunchao Zhang sf->nleaves = N; 597dd5b3ca6SJunchao Zhang sf->nroots = n; 598dd5b3ca6SJunchao Zhang sf->nranks = size; 599dd5b3ca6SJunchao Zhang sf->minleaf = 0; 600dd5b3ca6SJunchao Zhang sf->maxleaf = N - 1; 601dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_GATHER) { 602dd5b3ca6SJunchao Zhang sf->nleaves = rank ? 0 : N; 603dd5b3ca6SJunchao Zhang sf->nroots = n; 604dd5b3ca6SJunchao Zhang sf->nranks = rank ? 0 : size; 605dd5b3ca6SJunchao Zhang sf->minleaf = 0; 606dd5b3ca6SJunchao Zhang sf->maxleaf = rank ? -1 : N - 1; 607dd5b3ca6SJunchao Zhang } else if (pattern == PETSCSF_PATTERN_ALLTOALL) { 608dd5b3ca6SJunchao Zhang sf->nleaves = size; 609dd5b3ca6SJunchao Zhang sf->nroots = size; 610dd5b3ca6SJunchao Zhang sf->nranks = size; 611dd5b3ca6SJunchao Zhang sf->minleaf = 0; 612dd5b3ca6SJunchao Zhang sf->maxleaf = size - 1; 613dd5b3ca6SJunchao Zhang } 614dd5b3ca6SJunchao Zhang sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */ 615dd5b3ca6SJunchao Zhang sf->graphset = PETSC_TRUE; 616dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 617dd5b3ca6SJunchao Zhang } 618dd5b3ca6SJunchao Zhang 619dd5b3ca6SJunchao Zhang /*@ 62095fce210SBarry Smith PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map 62195fce210SBarry Smith 62295fce210SBarry Smith Collective 62395fce210SBarry Smith 62495fce210SBarry Smith Input Arguments: 625dd5b3ca6SJunchao Zhang 62695fce210SBarry Smith . sf - star forest to invert 62795fce210SBarry Smith 62895fce210SBarry Smith Output Arguments: 62995fce210SBarry Smith . isf - inverse of sf 63095fce210SBarry Smith Level: advanced 63195fce210SBarry Smith 63295fce210SBarry Smith Notes: 63395fce210SBarry Smith All roots must have degree 1. 63495fce210SBarry Smith 63595fce210SBarry Smith The local space may be a permutation, but cannot be sparse. 63695fce210SBarry Smith 63795fce210SBarry Smith .seealso: PetscSFSetGraph() 63895fce210SBarry Smith @*/ 63995fce210SBarry Smith PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf) 64095fce210SBarry Smith { 64195fce210SBarry Smith PetscErrorCode ierr; 64295fce210SBarry Smith PetscMPIInt rank; 64395fce210SBarry Smith PetscInt i,nroots,nleaves,maxlocal,count,*newilocal; 64495fce210SBarry Smith const PetscInt *ilocal; 64595fce210SBarry Smith PetscSFNode *roots,*leaves; 64695fce210SBarry Smith 64795fce210SBarry Smith PetscFunctionBegin; 64829046d53SLisandro Dalcin PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 64929046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 65029046d53SLisandro Dalcin PetscValidPointer(isf,2); 65129046d53SLisandro Dalcin 65295fce210SBarry Smith ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 65329046d53SLisandro Dalcin maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 65429046d53SLisandro Dalcin 65529046d53SLisandro Dalcin ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 656ae9aee6dSMatthew G. Knepley ierr = PetscMalloc2(nroots,&roots,maxlocal,&leaves);CHKERRQ(ierr); 657ae9aee6dSMatthew G. Knepley for (i=0; i<maxlocal; i++) { 65895fce210SBarry Smith leaves[i].rank = rank; 65995fce210SBarry Smith leaves[i].index = i; 66095fce210SBarry Smith } 66195fce210SBarry Smith for (i=0; i <nroots; i++) { 66295fce210SBarry Smith roots[i].rank = -1; 66395fce210SBarry Smith roots[i].index = -1; 66495fce210SBarry Smith } 6658bfbc91cSJed Brown ierr = PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 6668bfbc91cSJed Brown ierr = PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPIU_REPLACE);CHKERRQ(ierr); 66795fce210SBarry Smith 66895fce210SBarry Smith /* Check whether our leaves are sparse */ 66995fce210SBarry Smith for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++; 67095fce210SBarry Smith if (count == nroots) newilocal = NULL; 67195fce210SBarry Smith else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */ 672785e854fSJed Brown ierr = PetscMalloc1(count,&newilocal);CHKERRQ(ierr); 67395fce210SBarry Smith for (i=0,count=0; i<nroots; i++) { 67495fce210SBarry Smith if (roots[i].rank >= 0) { 67595fce210SBarry Smith newilocal[count] = i; 67695fce210SBarry Smith roots[count].rank = roots[i].rank; 67795fce210SBarry Smith roots[count].index = roots[i].index; 67895fce210SBarry Smith count++; 67995fce210SBarry Smith } 68095fce210SBarry Smith } 68195fce210SBarry Smith } 68295fce210SBarry Smith 68395fce210SBarry Smith ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);CHKERRQ(ierr); 68495fce210SBarry Smith ierr = PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);CHKERRQ(ierr); 68595fce210SBarry Smith ierr = PetscFree2(roots,leaves);CHKERRQ(ierr); 68695fce210SBarry Smith PetscFunctionReturn(0); 68795fce210SBarry Smith } 68895fce210SBarry Smith 68995fce210SBarry Smith /*@ 69095fce210SBarry Smith PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph 69195fce210SBarry Smith 69295fce210SBarry Smith Collective 69395fce210SBarry Smith 69495fce210SBarry Smith Input Arguments: 69595fce210SBarry Smith + sf - communication object to duplicate 69695fce210SBarry Smith - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption) 69795fce210SBarry Smith 69895fce210SBarry Smith Output Arguments: 69995fce210SBarry Smith . newsf - new communication object 70095fce210SBarry Smith 70195fce210SBarry Smith Level: beginner 70295fce210SBarry Smith 70395fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph() 70495fce210SBarry Smith @*/ 70595fce210SBarry Smith PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf) 70695fce210SBarry Smith { 70729046d53SLisandro Dalcin PetscSFType type; 70895fce210SBarry Smith PetscErrorCode ierr; 70995fce210SBarry Smith 71095fce210SBarry Smith PetscFunctionBegin; 71129046d53SLisandro Dalcin PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 71229046d53SLisandro Dalcin PetscValidLogicalCollectiveEnum(sf,opt,2); 71329046d53SLisandro Dalcin PetscValidPointer(newsf,3); 71495fce210SBarry Smith ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);CHKERRQ(ierr); 71529046d53SLisandro Dalcin ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 71629046d53SLisandro Dalcin if (type) {ierr = PetscSFSetType(*newsf,type);CHKERRQ(ierr);} 71795fce210SBarry Smith if (opt == PETSCSF_DUPLICATE_GRAPH) { 718dd5b3ca6SJunchao Zhang PetscSFCheckGraphSet(sf,1); 719dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 72095fce210SBarry Smith PetscInt nroots,nleaves; 72195fce210SBarry Smith const PetscInt *ilocal; 72295fce210SBarry Smith const PetscSFNode *iremote; 72395fce210SBarry Smith ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 72495fce210SBarry Smith ierr = PetscSFSetGraph(*newsf,nroots,nleaves,ilocal,PETSC_COPY_VALUES,iremote,PETSC_COPY_VALUES);CHKERRQ(ierr); 725dd5b3ca6SJunchao Zhang } else { 726dd5b3ca6SJunchao Zhang ierr = PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);CHKERRQ(ierr); 727dd5b3ca6SJunchao Zhang } 72895fce210SBarry Smith } 72920c24465SJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 73020c24465SJunchao Zhang (*newsf)->backend = sf->backend; 73120c24465SJunchao Zhang (*newsf)->use_default_stream = sf->use_default_stream; 73220c24465SJunchao Zhang (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi; 73320c24465SJunchao Zhang (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi; 73420c24465SJunchao Zhang #endif 73529046d53SLisandro Dalcin if (sf->ops->Duplicate) {ierr = (*sf->ops->Duplicate)(sf,opt,*newsf);CHKERRQ(ierr);} 73620c24465SJunchao Zhang /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */ 73795fce210SBarry Smith PetscFunctionReturn(0); 73895fce210SBarry Smith } 73995fce210SBarry Smith 74095fce210SBarry Smith /*@C 74195fce210SBarry Smith PetscSFGetGraph - Get the graph specifying a parallel star forest 74295fce210SBarry Smith 74395fce210SBarry Smith Not Collective 74495fce210SBarry Smith 74595fce210SBarry Smith Input Arguments: 74695fce210SBarry Smith . sf - star forest 74795fce210SBarry Smith 74895fce210SBarry Smith Output Arguments: 74995fce210SBarry Smith + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves) 75095fce210SBarry Smith . nleaves - number of leaf vertices on the current process, each of these references a root on any process 75195fce210SBarry Smith . ilocal - locations of leaves in leafdata buffers 75295fce210SBarry Smith - iremote - remote locations of root vertices for each leaf on the current process 75395fce210SBarry Smith 754373e0d91SLisandro Dalcin Notes: 755373e0d91SLisandro Dalcin We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet 756373e0d91SLisandro Dalcin 757245d9833Sprj- When called from Fortran, the returned iremote array is a copy and must be deallocated after use. Consequently, if you 758ca797d7aSLawrence Mitchell want to update the graph, you must call PetscSFSetGraph after modifying the iremote array. 759ca797d7aSLawrence Mitchell 76095fce210SBarry Smith Level: intermediate 76195fce210SBarry Smith 76295fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph() 76395fce210SBarry Smith @*/ 76495fce210SBarry Smith PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 76595fce210SBarry Smith { 766b8dee149SJunchao Zhang PetscErrorCode ierr; 76795fce210SBarry Smith 76895fce210SBarry Smith PetscFunctionBegin; 76995fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 770b8dee149SJunchao Zhang if (sf->ops->GetGraph) { 771b8dee149SJunchao Zhang ierr = (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);CHKERRQ(ierr); 772b8dee149SJunchao Zhang } else { 77395fce210SBarry Smith if (nroots) *nroots = sf->nroots; 77495fce210SBarry Smith if (nleaves) *nleaves = sf->nleaves; 77595fce210SBarry Smith if (ilocal) *ilocal = sf->mine; 77695fce210SBarry Smith if (iremote) *iremote = sf->remote; 777b8dee149SJunchao Zhang } 77895fce210SBarry Smith PetscFunctionReturn(0); 77995fce210SBarry Smith } 78095fce210SBarry Smith 78129046d53SLisandro Dalcin /*@ 78295fce210SBarry Smith PetscSFGetLeafRange - Get the active leaf ranges 78395fce210SBarry Smith 78495fce210SBarry Smith Not Collective 78595fce210SBarry Smith 78695fce210SBarry Smith Input Arguments: 78795fce210SBarry Smith . sf - star forest 78895fce210SBarry Smith 78995fce210SBarry Smith Output Arguments: 790dd5b3ca6SJunchao Zhang + minleaf - minimum active leaf on this process. Return 0 if there are no leaves. 791dd5b3ca6SJunchao Zhang - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves. 79295fce210SBarry Smith 79395fce210SBarry Smith Level: developer 79495fce210SBarry Smith 79595fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph() 79695fce210SBarry Smith @*/ 79795fce210SBarry Smith PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf) 79895fce210SBarry Smith { 79995fce210SBarry Smith PetscFunctionBegin; 80095fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 80129046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 80295fce210SBarry Smith if (minleaf) *minleaf = sf->minleaf; 80395fce210SBarry Smith if (maxleaf) *maxleaf = sf->maxleaf; 80495fce210SBarry Smith PetscFunctionReturn(0); 80595fce210SBarry Smith } 80695fce210SBarry Smith 80795fce210SBarry Smith /*@C 808fe2efc57SMark PetscSFViewFromOptions - View from Options 809fe2efc57SMark 810fe2efc57SMark Collective on PetscSF 811fe2efc57SMark 812fe2efc57SMark Input Parameters: 813fe2efc57SMark + A - the star forest 814736c3998SJose E. Roman . obj - Optional object 815736c3998SJose E. Roman - name - command line option 816fe2efc57SMark 817fe2efc57SMark Level: intermediate 818fe2efc57SMark .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate() 819fe2efc57SMark @*/ 820fe2efc57SMark PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[]) 821fe2efc57SMark { 822fe2efc57SMark PetscErrorCode ierr; 823fe2efc57SMark 824fe2efc57SMark PetscFunctionBegin; 825fe2efc57SMark PetscValidHeaderSpecific(A,PETSCSF_CLASSID,1); 826fe2efc57SMark ierr = PetscObjectViewFromOptions((PetscObject)A,obj,name);CHKERRQ(ierr); 827fe2efc57SMark PetscFunctionReturn(0); 828fe2efc57SMark } 829fe2efc57SMark 830fe2efc57SMark /*@C 83195fce210SBarry Smith PetscSFView - view a star forest 83295fce210SBarry Smith 83395fce210SBarry Smith Collective 83495fce210SBarry Smith 83595fce210SBarry Smith Input Arguments: 83695fce210SBarry Smith + sf - star forest 83795fce210SBarry Smith - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD 83895fce210SBarry Smith 83995fce210SBarry Smith Level: beginner 84095fce210SBarry Smith 84195fce210SBarry Smith .seealso: PetscSFCreate(), PetscSFSetGraph() 84295fce210SBarry Smith @*/ 84395fce210SBarry Smith PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer) 84495fce210SBarry Smith { 84595fce210SBarry Smith PetscErrorCode ierr; 84695fce210SBarry Smith PetscBool iascii; 84795fce210SBarry Smith PetscViewerFormat format; 84895fce210SBarry Smith 84995fce210SBarry Smith PetscFunctionBegin; 85095fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 85195fce210SBarry Smith if (!viewer) {ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);CHKERRQ(ierr);} 85295fce210SBarry Smith PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 85395fce210SBarry Smith PetscCheckSameComm(sf,1,viewer,2); 85480153354SVaclav Hapla if (sf->graphset) {ierr = PetscSFSetUp(sf);CHKERRQ(ierr);} 85595fce210SBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 85695fce210SBarry Smith if (iascii) { 85795fce210SBarry Smith PetscMPIInt rank; 85881bfa7aaSJed Brown PetscInt ii,i,j; 85995fce210SBarry Smith 860dae58748SBarry Smith ierr = PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);CHKERRQ(ierr); 86195fce210SBarry Smith ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 86295fce210SBarry Smith if (sf->ops->View) {ierr = (*sf->ops->View)(sf,viewer);CHKERRQ(ierr);} 863dd5b3ca6SJunchao Zhang if (sf->pattern == PETSCSF_PATTERN_GENERAL) { 86480153354SVaclav Hapla if (!sf->graphset) { 86580153354SVaclav Hapla ierr = PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");CHKERRQ(ierr); 86680153354SVaclav Hapla ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 86780153354SVaclav Hapla PetscFunctionReturn(0); 86880153354SVaclav Hapla } 86995fce210SBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 8701575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 87195fce210SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,sf->nroots,sf->nleaves,sf->nranks);CHKERRQ(ierr); 87295fce210SBarry Smith for (i=0; i<sf->nleaves; i++) { 87395fce210SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);CHKERRQ(ierr); 87495fce210SBarry Smith } 87595fce210SBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 87695fce210SBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 87795fce210SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 87881bfa7aaSJed Brown PetscMPIInt *tmpranks,*perm; 87981bfa7aaSJed Brown ierr = PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);CHKERRQ(ierr); 880580bdb30SBarry Smith ierr = PetscArraycpy(tmpranks,sf->ranks,sf->nranks);CHKERRQ(ierr); 88181bfa7aaSJed Brown for (i=0; i<sf->nranks; i++) perm[i] = i; 88281bfa7aaSJed Brown ierr = PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);CHKERRQ(ierr); 88395fce210SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);CHKERRQ(ierr); 88481bfa7aaSJed Brown for (ii=0; ii<sf->nranks; ii++) { 88581bfa7aaSJed Brown i = perm[ii]; 8867904a332SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %D edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);CHKERRQ(ierr); 88795fce210SBarry Smith for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) { 88895fce210SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- %D\n",rank,sf->rmine[j],sf->rremote[j]);CHKERRQ(ierr); 88995fce210SBarry Smith } 89095fce210SBarry Smith } 89181bfa7aaSJed Brown ierr = PetscFree2(tmpranks,perm);CHKERRQ(ierr); 89295fce210SBarry Smith } 89395fce210SBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 8941575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 895dd5b3ca6SJunchao Zhang } 89695fce210SBarry Smith ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 89795fce210SBarry Smith } 89895fce210SBarry Smith PetscFunctionReturn(0); 89995fce210SBarry Smith } 90095fce210SBarry Smith 90195fce210SBarry Smith /*@C 902dec1416fSJunchao Zhang PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process 90395fce210SBarry Smith 90495fce210SBarry Smith Not Collective 90595fce210SBarry Smith 90695fce210SBarry Smith Input Arguments: 90795fce210SBarry Smith . sf - star forest 90895fce210SBarry Smith 90995fce210SBarry Smith Output Arguments: 91095fce210SBarry Smith + nranks - number of ranks referenced by local part 91195fce210SBarry Smith . ranks - array of ranks 91295fce210SBarry Smith . roffset - offset in rmine/rremote for each rank (length nranks+1) 91395fce210SBarry Smith . rmine - concatenated array holding local indices referencing each remote rank 91495fce210SBarry Smith - rremote - concatenated array holding remote indices referenced for each remote rank 91595fce210SBarry Smith 91695fce210SBarry Smith Level: developer 91795fce210SBarry Smith 918dec1416fSJunchao Zhang .seealso: PetscSFGetLeafRanks() 91995fce210SBarry Smith @*/ 920dec1416fSJunchao Zhang PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote) 92195fce210SBarry Smith { 922dec1416fSJunchao Zhang PetscErrorCode ierr; 92395fce210SBarry Smith 92495fce210SBarry Smith PetscFunctionBegin; 92595fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 92629046d53SLisandro Dalcin if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 927dec1416fSJunchao Zhang if (sf->ops->GetRootRanks) { 928dec1416fSJunchao Zhang ierr = (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);CHKERRQ(ierr); 929dec1416fSJunchao Zhang } else { 930dec1416fSJunchao Zhang /* The generic implementation */ 93195fce210SBarry Smith if (nranks) *nranks = sf->nranks; 93295fce210SBarry Smith if (ranks) *ranks = sf->ranks; 93395fce210SBarry Smith if (roffset) *roffset = sf->roffset; 93495fce210SBarry Smith if (rmine) *rmine = sf->rmine; 93595fce210SBarry Smith if (rremote) *rremote = sf->rremote; 936dec1416fSJunchao Zhang } 93795fce210SBarry Smith PetscFunctionReturn(0); 93895fce210SBarry Smith } 93995fce210SBarry Smith 9408750ddebSJunchao Zhang /*@C 9418750ddebSJunchao Zhang PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process 9428750ddebSJunchao Zhang 9438750ddebSJunchao Zhang Not Collective 9448750ddebSJunchao Zhang 9458750ddebSJunchao Zhang Input Arguments: 9468750ddebSJunchao Zhang . sf - star forest 9478750ddebSJunchao Zhang 9488750ddebSJunchao Zhang Output Arguments: 9498750ddebSJunchao Zhang + niranks - number of leaf ranks referencing roots on this process 9508750ddebSJunchao Zhang . iranks - array of ranks 9518750ddebSJunchao Zhang . ioffset - offset in irootloc for each rank (length niranks+1) 9528750ddebSJunchao Zhang - irootloc - concatenated array holding local indices of roots referenced by each leaf rank 9538750ddebSJunchao Zhang 9548750ddebSJunchao Zhang Level: developer 9558750ddebSJunchao Zhang 956dec1416fSJunchao Zhang .seealso: PetscSFGetRootRanks() 9578750ddebSJunchao Zhang @*/ 9588750ddebSJunchao Zhang PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 9598750ddebSJunchao Zhang { 9608750ddebSJunchao Zhang PetscErrorCode ierr; 9618750ddebSJunchao Zhang 9628750ddebSJunchao Zhang PetscFunctionBegin; 9638750ddebSJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 9648750ddebSJunchao Zhang if (!sf->setupcalled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUp() before obtaining ranks"); 9658750ddebSJunchao Zhang if (sf->ops->GetLeafRanks) { 9668750ddebSJunchao Zhang ierr = (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);CHKERRQ(ierr); 9678750ddebSJunchao Zhang } else { 9688750ddebSJunchao Zhang PetscSFType type; 9698750ddebSJunchao Zhang ierr = PetscSFGetType(sf,&type);CHKERRQ(ierr); 9708750ddebSJunchao Zhang SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type); 9718750ddebSJunchao Zhang } 9728750ddebSJunchao Zhang PetscFunctionReturn(0); 9738750ddebSJunchao Zhang } 9748750ddebSJunchao Zhang 975b5a8e515SJed Brown static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) { 976b5a8e515SJed Brown PetscInt i; 977b5a8e515SJed Brown for (i=0; i<n; i++) { 978b5a8e515SJed Brown if (needle == list[i]) return PETSC_TRUE; 979b5a8e515SJed Brown } 980b5a8e515SJed Brown return PETSC_FALSE; 981b5a8e515SJed Brown } 982b5a8e515SJed Brown 98395fce210SBarry Smith /*@C 98421c688dcSJed Brown PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations. 98521c688dcSJed Brown 98621c688dcSJed Brown Collective 98721c688dcSJed Brown 98821c688dcSJed Brown Input Arguments: 989b5a8e515SJed Brown + sf - PetscSF to set up; PetscSFSetGraph() must have been called 990b5a8e515SJed Brown - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange) 99121c688dcSJed Brown 99221c688dcSJed Brown Level: developer 99321c688dcSJed Brown 994dec1416fSJunchao Zhang .seealso: PetscSFGetRootRanks() 99521c688dcSJed Brown @*/ 996b5a8e515SJed Brown PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup) 99721c688dcSJed Brown { 99821c688dcSJed Brown PetscErrorCode ierr; 99921c688dcSJed Brown PetscTable table; 100021c688dcSJed Brown PetscTablePosition pos; 1001b5a8e515SJed Brown PetscMPIInt size,groupsize,*groupranks; 1002247e8311SStefano Zampini PetscInt *rcount,*ranks; 1003247e8311SStefano Zampini PetscInt i, irank = -1,orank = -1; 100421c688dcSJed Brown 100521c688dcSJed Brown PetscFunctionBegin; 100621c688dcSJed Brown PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 100729046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 100821c688dcSJed Brown ierr = MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);CHKERRQ(ierr); 100921c688dcSJed Brown ierr = PetscTableCreate(10,size,&table);CHKERRQ(ierr); 101021c688dcSJed Brown for (i=0; i<sf->nleaves; i++) { 101121c688dcSJed Brown /* Log 1-based rank */ 101221c688dcSJed Brown ierr = PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);CHKERRQ(ierr); 101321c688dcSJed Brown } 101421c688dcSJed Brown ierr = PetscTableGetCount(table,&sf->nranks);CHKERRQ(ierr); 101521c688dcSJed Brown ierr = PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);CHKERRQ(ierr); 101621c688dcSJed Brown ierr = PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);CHKERRQ(ierr); 101721c688dcSJed Brown ierr = PetscTableGetHeadPosition(table,&pos);CHKERRQ(ierr); 101821c688dcSJed Brown for (i=0; i<sf->nranks; i++) { 101921c688dcSJed Brown ierr = PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);CHKERRQ(ierr); 102021c688dcSJed Brown ranks[i]--; /* Convert back to 0-based */ 102121c688dcSJed Brown } 102221c688dcSJed Brown ierr = PetscTableDestroy(&table);CHKERRQ(ierr); 1023b5a8e515SJed Brown 1024b5a8e515SJed Brown /* We expect that dgroup is reliably "small" while nranks could be large */ 1025b5a8e515SJed Brown { 10267fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 1027b5a8e515SJed Brown PetscMPIInt *dgroupranks; 1028b5a8e515SJed Brown ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 1029b5a8e515SJed Brown ierr = MPI_Group_size(dgroup,&groupsize);CHKERRQ(ierr); 1030b5a8e515SJed Brown ierr = PetscMalloc1(groupsize,&dgroupranks);CHKERRQ(ierr); 1031b5a8e515SJed Brown ierr = PetscMalloc1(groupsize,&groupranks);CHKERRQ(ierr); 1032b5a8e515SJed Brown for (i=0; i<groupsize; i++) dgroupranks[i] = i; 1033f3fc9a17SJed Brown if (groupsize) {ierr = MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);CHKERRQ(ierr);} 1034b5a8e515SJed Brown ierr = MPI_Group_free(&group);CHKERRQ(ierr); 1035b5a8e515SJed Brown ierr = PetscFree(dgroupranks);CHKERRQ(ierr); 1036b5a8e515SJed Brown } 1037b5a8e515SJed Brown 1038b5a8e515SJed Brown /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */ 1039b5a8e515SJed Brown for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) { 1040b5a8e515SJed Brown for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */ 1041b5a8e515SJed Brown if (InList(ranks[i],groupsize,groupranks)) break; 1042b5a8e515SJed Brown } 1043b5a8e515SJed Brown for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */ 1044b5a8e515SJed Brown if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break; 1045b5a8e515SJed Brown } 1046b5a8e515SJed Brown if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */ 1047b5a8e515SJed Brown PetscInt tmprank,tmpcount; 1048247e8311SStefano Zampini 1049b5a8e515SJed Brown tmprank = ranks[i]; 1050b5a8e515SJed Brown tmpcount = rcount[i]; 1051b5a8e515SJed Brown ranks[i] = ranks[sf->ndranks]; 1052b5a8e515SJed Brown rcount[i] = rcount[sf->ndranks]; 1053b5a8e515SJed Brown ranks[sf->ndranks] = tmprank; 1054b5a8e515SJed Brown rcount[sf->ndranks] = tmpcount; 1055b5a8e515SJed Brown sf->ndranks++; 1056b5a8e515SJed Brown } 1057b5a8e515SJed Brown } 1058b5a8e515SJed Brown ierr = PetscFree(groupranks);CHKERRQ(ierr); 1059b5a8e515SJed Brown ierr = PetscSortIntWithArray(sf->ndranks,ranks,rcount);CHKERRQ(ierr); 1060b5a8e515SJed Brown ierr = PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);CHKERRQ(ierr); 106121c688dcSJed Brown sf->roffset[0] = 0; 106221c688dcSJed Brown for (i=0; i<sf->nranks; i++) { 106321c688dcSJed Brown ierr = PetscMPIIntCast(ranks[i],sf->ranks+i);CHKERRQ(ierr); 106421c688dcSJed Brown sf->roffset[i+1] = sf->roffset[i] + rcount[i]; 106521c688dcSJed Brown rcount[i] = 0; 106621c688dcSJed Brown } 1067247e8311SStefano Zampini for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) { 1068247e8311SStefano Zampini /* short circuit */ 1069247e8311SStefano Zampini if (orank != sf->remote[i].rank) { 107021c688dcSJed Brown /* Search for index of iremote[i].rank in sf->ranks */ 1071b5a8e515SJed Brown ierr = PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);CHKERRQ(ierr); 1072b5a8e515SJed Brown if (irank < 0) { 1073b5a8e515SJed Brown ierr = PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);CHKERRQ(ierr); 1074b5a8e515SJed Brown if (irank >= 0) irank += sf->ndranks; 107521c688dcSJed Brown } 1076247e8311SStefano Zampini orank = sf->remote[i].rank; 1077247e8311SStefano Zampini } 1078b5a8e515SJed Brown if (irank < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Could not find rank %D in array",sf->remote[i].rank); 107921c688dcSJed Brown sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i; 108021c688dcSJed Brown sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index; 108121c688dcSJed Brown rcount[irank]++; 108221c688dcSJed Brown } 108321c688dcSJed Brown ierr = PetscFree2(rcount,ranks);CHKERRQ(ierr); 108421c688dcSJed Brown PetscFunctionReturn(0); 108521c688dcSJed Brown } 108621c688dcSJed Brown 108721c688dcSJed Brown /*@C 108895fce210SBarry Smith PetscSFGetGroups - gets incoming and outgoing process groups 108995fce210SBarry Smith 109095fce210SBarry Smith Collective 109195fce210SBarry Smith 109295fce210SBarry Smith Input Argument: 109395fce210SBarry Smith . sf - star forest 109495fce210SBarry Smith 109595fce210SBarry Smith Output Arguments: 109695fce210SBarry Smith + incoming - group of origin processes for incoming edges (leaves that reference my roots) 109795fce210SBarry Smith - outgoing - group of destination processes for outgoing edges (roots that I reference) 109895fce210SBarry Smith 109995fce210SBarry Smith Level: developer 110095fce210SBarry Smith 110195fce210SBarry Smith .seealso: PetscSFGetWindow(), PetscSFRestoreWindow() 110295fce210SBarry Smith @*/ 110395fce210SBarry Smith PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing) 110495fce210SBarry Smith { 110595fce210SBarry Smith PetscErrorCode ierr; 11067fb8a5e4SKarl Rupp MPI_Group group = MPI_GROUP_NULL; 110795fce210SBarry Smith 110895fce210SBarry Smith PetscFunctionBegin; 110944ee17edSStefano Zampini if (sf->nranks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetUpRanks() before obtaining groups"); 111095fce210SBarry Smith if (sf->ingroup == MPI_GROUP_NULL) { 111195fce210SBarry Smith PetscInt i; 111295fce210SBarry Smith const PetscInt *indegree; 111395fce210SBarry Smith PetscMPIInt rank,*outranks,*inranks; 111495fce210SBarry Smith PetscSFNode *remote; 111595fce210SBarry Smith PetscSF bgcount; 111695fce210SBarry Smith 111795fce210SBarry Smith /* Compute the number of incoming ranks */ 1118785e854fSJed Brown ierr = PetscMalloc1(sf->nranks,&remote);CHKERRQ(ierr); 111995fce210SBarry Smith for (i=0; i<sf->nranks; i++) { 112095fce210SBarry Smith remote[i].rank = sf->ranks[i]; 112195fce210SBarry Smith remote[i].index = 0; 112295fce210SBarry Smith } 112395fce210SBarry Smith ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);CHKERRQ(ierr); 112495fce210SBarry Smith ierr = PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 112595fce210SBarry Smith ierr = PetscSFComputeDegreeBegin(bgcount,&indegree);CHKERRQ(ierr); 112695fce210SBarry Smith ierr = PetscSFComputeDegreeEnd(bgcount,&indegree);CHKERRQ(ierr); 112795fce210SBarry Smith /* Enumerate the incoming ranks */ 1128dcca6d9dSJed Brown ierr = PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);CHKERRQ(ierr); 112995fce210SBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 113095fce210SBarry Smith for (i=0; i<sf->nranks; i++) outranks[i] = rank; 113195fce210SBarry Smith ierr = PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 113295fce210SBarry Smith ierr = PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);CHKERRQ(ierr); 113395fce210SBarry Smith ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 113495fce210SBarry Smith ierr = MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);CHKERRQ(ierr); 113595fce210SBarry Smith ierr = MPI_Group_free(&group);CHKERRQ(ierr); 113695fce210SBarry Smith ierr = PetscFree2(inranks,outranks);CHKERRQ(ierr); 113795fce210SBarry Smith ierr = PetscSFDestroy(&bgcount);CHKERRQ(ierr); 113895fce210SBarry Smith } 113995fce210SBarry Smith *incoming = sf->ingroup; 114095fce210SBarry Smith 114195fce210SBarry Smith if (sf->outgroup == MPI_GROUP_NULL) { 114295fce210SBarry Smith ierr = MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);CHKERRQ(ierr); 114395fce210SBarry Smith ierr = MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);CHKERRQ(ierr); 114495fce210SBarry Smith ierr = MPI_Group_free(&group);CHKERRQ(ierr); 114595fce210SBarry Smith } 114695fce210SBarry Smith *outgoing = sf->outgroup; 114795fce210SBarry Smith PetscFunctionReturn(0); 114895fce210SBarry Smith } 114995fce210SBarry Smith 115029046d53SLisandro Dalcin /*@ 115195fce210SBarry Smith PetscSFGetMultiSF - gets the inner SF implemeting gathers and scatters 115295fce210SBarry Smith 115395fce210SBarry Smith Collective 115495fce210SBarry Smith 115595fce210SBarry Smith Input Argument: 115695fce210SBarry Smith . sf - star forest that may contain roots with 0 or with more than 1 vertex 115795fce210SBarry Smith 115895fce210SBarry Smith Output Arguments: 115995fce210SBarry Smith . multi - star forest with split roots, such that each root has degree exactly 1 116095fce210SBarry Smith 116195fce210SBarry Smith Level: developer 116295fce210SBarry Smith 116395fce210SBarry Smith Notes: 116495fce210SBarry Smith 116595fce210SBarry Smith In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi 116695fce210SBarry Smith directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming 116795fce210SBarry Smith edge, it is a candidate for future optimization that might involve its removal. 116895fce210SBarry Smith 1169673100f5SVaclav Hapla .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering() 117095fce210SBarry Smith @*/ 117195fce210SBarry Smith PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi) 117295fce210SBarry Smith { 117395fce210SBarry Smith PetscErrorCode ierr; 117495fce210SBarry Smith 117595fce210SBarry Smith PetscFunctionBegin; 117695fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 117795fce210SBarry Smith PetscValidPointer(multi,2); 117895fce210SBarry Smith if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */ 117995fce210SBarry Smith ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 118095fce210SBarry Smith *multi = sf->multi; 118195fce210SBarry Smith PetscFunctionReturn(0); 118295fce210SBarry Smith } 118395fce210SBarry Smith if (!sf->multi) { 118495fce210SBarry Smith const PetscInt *indegree; 11859837ea96SMatthew G. Knepley PetscInt i,*inoffset,*outones,*outoffset,maxlocal; 118695fce210SBarry Smith PetscSFNode *remote; 118729046d53SLisandro Dalcin maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */ 118895fce210SBarry Smith ierr = PetscSFComputeDegreeBegin(sf,&indegree);CHKERRQ(ierr); 118995fce210SBarry Smith ierr = PetscSFComputeDegreeEnd(sf,&indegree);CHKERRQ(ierr); 11909837ea96SMatthew G. Knepley ierr = PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);CHKERRQ(ierr); 119195fce210SBarry Smith inoffset[0] = 0; 119295fce210SBarry Smith for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i]; 11939837ea96SMatthew G. Knepley for (i=0; i<maxlocal; i++) outones[i] = 1; 1194dbd2ff41SBarry Smith ierr = PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 1195dbd2ff41SBarry Smith ierr = PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);CHKERRQ(ierr); 119695fce210SBarry Smith for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */ 119776bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */ 119895fce210SBarry Smith for (i=0; i<sf->nroots; i++) { 119995fce210SBarry Smith if (inoffset[i] + indegree[i] != inoffset[i+1]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect result after PetscSFFetchAndOp"); 120095fce210SBarry Smith } 120176bd3646SJed Brown } 1202785e854fSJed Brown ierr = PetscMalloc1(sf->nleaves,&remote);CHKERRQ(ierr); 120395fce210SBarry Smith for (i=0; i<sf->nleaves; i++) { 120495fce210SBarry Smith remote[i].rank = sf->remote[i].rank; 120538e7336fSToby Isaac remote[i].index = outoffset[sf->mine ? sf->mine[i] : i]; 120695fce210SBarry Smith } 120795fce210SBarry Smith ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);CHKERRQ(ierr); 120801365b40SToby Isaac ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);CHKERRQ(ierr); 120995fce210SBarry Smith if (sf->rankorder) { /* Sort the ranks */ 121095fce210SBarry Smith PetscMPIInt rank; 121195fce210SBarry Smith PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree; 121295fce210SBarry Smith PetscSFNode *newremote; 121395fce210SBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 121495fce210SBarry Smith for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]); 12159837ea96SMatthew G. Knepley ierr = PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);CHKERRQ(ierr); 12169837ea96SMatthew G. Knepley for (i=0; i<maxlocal; i++) outranks[i] = rank; 12178bfbc91cSJed Brown ierr = PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 12188bfbc91cSJed Brown ierr = PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPIU_REPLACE);CHKERRQ(ierr); 121995fce210SBarry Smith /* Sort the incoming ranks at each vertex, build the inverse map */ 122095fce210SBarry Smith for (i=0; i<sf->nroots; i++) { 122195fce210SBarry Smith PetscInt j; 122295fce210SBarry Smith for (j=0; j<indegree[i]; j++) tmpoffset[j] = j; 122395fce210SBarry Smith ierr = PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);CHKERRQ(ierr); 122495fce210SBarry Smith for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j; 122595fce210SBarry Smith } 122695fce210SBarry Smith ierr = PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 122795fce210SBarry Smith ierr = PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset);CHKERRQ(ierr); 1228785e854fSJed Brown ierr = PetscMalloc1(sf->nleaves,&newremote);CHKERRQ(ierr); 122995fce210SBarry Smith for (i=0; i<sf->nleaves; i++) { 123095fce210SBarry Smith newremote[i].rank = sf->remote[i].rank; 123101365b40SToby Isaac newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i]; 123295fce210SBarry Smith } 123301365b40SToby Isaac ierr = PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 123495fce210SBarry Smith ierr = PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);CHKERRQ(ierr); 123595fce210SBarry Smith } 123695fce210SBarry Smith ierr = PetscFree3(inoffset,outones,outoffset);CHKERRQ(ierr); 123795fce210SBarry Smith } 123895fce210SBarry Smith *multi = sf->multi; 123995fce210SBarry Smith PetscFunctionReturn(0); 124095fce210SBarry Smith } 124195fce210SBarry Smith 124295fce210SBarry Smith /*@C 124395fce210SBarry Smith PetscSFCreateEmbeddedSF - removes edges from all but the selected roots, does not remap indices 124495fce210SBarry Smith 124595fce210SBarry Smith Collective 124695fce210SBarry Smith 124795fce210SBarry Smith Input Arguments: 124895fce210SBarry Smith + sf - original star forest 1249ba2a7774SJunchao Zhang . nselected - number of selected roots on this process 1250ba2a7774SJunchao Zhang - selected - indices of the selected roots on this process 125195fce210SBarry Smith 125295fce210SBarry Smith Output Arguments: 1253cd620004SJunchao Zhang . esf - new star forest 125495fce210SBarry Smith 125595fce210SBarry Smith Level: advanced 125695fce210SBarry Smith 125795fce210SBarry Smith Note: 125895fce210SBarry Smith To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can 125995fce210SBarry Smith be done by calling PetscSFGetGraph(). 126095fce210SBarry Smith 126195fce210SBarry Smith .seealso: PetscSFSetGraph(), PetscSFGetGraph() 126295fce210SBarry Smith @*/ 1263cd620004SJunchao Zhang PetscErrorCode PetscSFCreateEmbeddedSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf) 126495fce210SBarry Smith { 1265cd620004SJunchao Zhang PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal; 1266cd620004SJunchao Zhang const PetscInt *ilocal; 1267cd620004SJunchao Zhang signed char *rootdata,*leafdata,*leafmem; 1268ba2a7774SJunchao Zhang const PetscSFNode *iremote; 1269f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1270f659e5c7SJunchao Zhang MPI_Comm comm; 12710511a646SMatthew G. Knepley PetscErrorCode ierr; 127295fce210SBarry Smith 127395fce210SBarry Smith PetscFunctionBegin; 127495fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 127529046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 1276ba2a7774SJunchao Zhang if (nselected) PetscValidPointer(selected,3); 1277cd620004SJunchao Zhang PetscValidPointer(esf,4); 12780511a646SMatthew G. Knepley 1279f659e5c7SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1280140a1472SStefano Zampini ierr = PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 1281f659e5c7SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1282cd620004SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 1283cd620004SJunchao Zhang 128476bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */ 1285cd620004SJunchao Zhang PetscBool dups; 1286cd620004SJunchao Zhang ierr = PetscCheckDupsInt(nselected,selected,&dups);CHKERRQ(ierr); 1287cd620004SJunchao Zhang if (dups) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"selected[] has dups"); 1288cd620004SJunchao Zhang for (i=0; i<nselected; i++) 1289cd620004SJunchao Zhang if (selected[i] < 0 || selected[i] >= nroots) SETERRQ2(comm,PETSC_ERR_ARG_OUTOFRANGE,"selected root indice %D is out of [0,%D)",selected[i],nroots); 1290cd620004SJunchao Zhang } 1291f659e5c7SJunchao Zhang 1292f659e5c7SJunchao Zhang if (sf->ops->CreateEmbeddedSF) { 1293cd620004SJunchao Zhang ierr = (*sf->ops->CreateEmbeddedSF)(sf,nselected,selected,esf);CHKERRQ(ierr); 1294f659e5c7SJunchao Zhang } else { 1295cd620004SJunchao Zhang /* A generic version of creating embedded sf */ 1296cd620004SJunchao Zhang ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 1297cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 1298cd620004SJunchao Zhang ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 1299cd620004SJunchao Zhang leafdata = leafmem - minleaf; 1300cd620004SJunchao Zhang /* Tag selected roots and bcast to leaves */ 1301cd620004SJunchao Zhang for (i=0; i<nselected; i++) rootdata[selected[i]] = 1; 1302cd620004SJunchao Zhang ierr = PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1303cd620004SJunchao Zhang ierr = PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata);CHKERRQ(ierr); 1304ba2a7774SJunchao Zhang 1305cd620004SJunchao Zhang /* Build esf with leaves that are still connected */ 1306cd620004SJunchao Zhang esf_nleaves = 0; 1307cd620004SJunchao Zhang for (i=0; i<nleaves; i++) { 1308cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1309cd620004SJunchao Zhang /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs 1310cd620004SJunchao Zhang with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555 1311cd620004SJunchao Zhang */ 1312cd620004SJunchao Zhang esf_nleaves += (leafdata[j] ? 1 : 0); 1313cd620004SJunchao Zhang } 1314cd620004SJunchao Zhang ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 1315cd620004SJunchao Zhang ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 1316cd620004SJunchao Zhang for (i=n=0; i<nleaves; i++) { 1317cd620004SJunchao Zhang j = ilocal ? ilocal[i] : i; 1318cd620004SJunchao Zhang if (leafdata[j]) { 1319cd620004SJunchao Zhang new_ilocal[n] = j; 1320cd620004SJunchao Zhang new_iremote[n].rank = iremote[i].rank; 1321cd620004SJunchao Zhang new_iremote[n].index = iremote[i].index; 1322fc1ede2bSMatthew G. Knepley ++n; 132395fce210SBarry Smith } 132495fce210SBarry Smith } 1325cd620004SJunchao Zhang ierr = PetscSFCreate(comm,esf);CHKERRQ(ierr); 1326cd620004SJunchao Zhang ierr = PetscSFSetFromOptions(*esf);CHKERRQ(ierr); 1327cd620004SJunchao Zhang ierr = PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1328cd620004SJunchao Zhang ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 1329f659e5c7SJunchao Zhang } 1330140a1472SStefano Zampini ierr = PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);CHKERRQ(ierr); 133195fce210SBarry Smith PetscFunctionReturn(0); 133295fce210SBarry Smith } 133395fce210SBarry Smith 13342f5fb4c2SMatthew G. Knepley /*@C 13352f5fb4c2SMatthew G. Knepley PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices 13362f5fb4c2SMatthew G. Knepley 13372f5fb4c2SMatthew G. Knepley Collective 13382f5fb4c2SMatthew G. Knepley 13392f5fb4c2SMatthew G. Knepley Input Arguments: 13402f5fb4c2SMatthew G. Knepley + sf - original star forest 1341f659e5c7SJunchao Zhang . nselected - number of selected leaves on this process 1342f659e5c7SJunchao Zhang - selected - indices of the selected leaves on this process 13432f5fb4c2SMatthew G. Knepley 13442f5fb4c2SMatthew G. Knepley Output Arguments: 13452f5fb4c2SMatthew G. Knepley . newsf - new star forest 13462f5fb4c2SMatthew G. Knepley 13472f5fb4c2SMatthew G. Knepley Level: advanced 13482f5fb4c2SMatthew G. Knepley 13492f5fb4c2SMatthew G. Knepley .seealso: PetscSFCreateEmbeddedSF(), PetscSFSetGraph(), PetscSFGetGraph() 13502f5fb4c2SMatthew G. Knepley @*/ 1351f659e5c7SJunchao Zhang PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 13522f5fb4c2SMatthew G. Knepley { 1353f659e5c7SJunchao Zhang const PetscSFNode *iremote; 1354f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 1355f659e5c7SJunchao Zhang const PetscInt *ilocal; 1356f659e5c7SJunchao Zhang PetscInt i,nroots,*leaves,*new_ilocal; 1357f659e5c7SJunchao Zhang MPI_Comm comm; 13582f5fb4c2SMatthew G. Knepley PetscErrorCode ierr; 13592f5fb4c2SMatthew G. Knepley 13602f5fb4c2SMatthew G. Knepley PetscFunctionBegin; 13612f5fb4c2SMatthew G. Knepley PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 136229046d53SLisandro Dalcin PetscSFCheckGraphSet(sf,1); 1363f659e5c7SJunchao Zhang if (nselected) PetscValidPointer(selected,3); 13642f5fb4c2SMatthew G. Knepley PetscValidPointer(newsf,4); 13652f5fb4c2SMatthew G. Knepley 1366f659e5c7SJunchao Zhang /* Uniq selected[] and put results in leaves[] */ 1367f659e5c7SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 1368f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&leaves);CHKERRQ(ierr); 1369dd5b3ca6SJunchao Zhang ierr = PetscArraycpy(leaves,selected,nselected);CHKERRQ(ierr); 1370f659e5c7SJunchao Zhang ierr = PetscSortedRemoveDupsInt(&nselected,leaves);CHKERRQ(ierr); 1371f659e5c7SJunchao Zhang if (nselected && (leaves[0] < 0 || leaves[nselected-1] >= sf->nleaves)) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max leaf indices %D/%D are not in [0,%D)",leaves[0],leaves[nselected-1],sf->nleaves); 1372f659e5c7SJunchao Zhang 1373f659e5c7SJunchao Zhang /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */ 1374f659e5c7SJunchao Zhang if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) { 1375f659e5c7SJunchao Zhang ierr = (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);CHKERRQ(ierr); 1376f659e5c7SJunchao Zhang } else { 1377f659e5c7SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);CHKERRQ(ierr); 1378f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&new_ilocal);CHKERRQ(ierr); 1379f659e5c7SJunchao Zhang ierr = PetscMalloc1(nselected,&new_iremote);CHKERRQ(ierr); 1380f659e5c7SJunchao Zhang for (i=0; i<nselected; ++i) { 1381f659e5c7SJunchao Zhang const PetscInt l = leaves[i]; 1382f659e5c7SJunchao Zhang new_ilocal[i] = ilocal ? ilocal[l] : l; 1383f659e5c7SJunchao Zhang new_iremote[i].rank = iremote[l].rank; 1384f659e5c7SJunchao Zhang new_iremote[i].index = iremote[l].index; 13852f5fb4c2SMatthew G. Knepley } 13864cc19a0cSStefano Zampini ierr = PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);CHKERRQ(ierr); 1387f659e5c7SJunchao Zhang ierr = PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 1388f659e5c7SJunchao Zhang } 1389f659e5c7SJunchao Zhang ierr = PetscFree(leaves);CHKERRQ(ierr); 13902f5fb4c2SMatthew G. Knepley PetscFunctionReturn(0); 13912f5fb4c2SMatthew G. Knepley } 13922f5fb4c2SMatthew G. Knepley 139395fce210SBarry Smith /*@C 13943482bfa8SJunchao Zhang PetscSFBcastAndOpBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastAndOpEnd() 13953482bfa8SJunchao Zhang 13963482bfa8SJunchao Zhang Collective on PetscSF 13973482bfa8SJunchao Zhang 13983482bfa8SJunchao Zhang Input Arguments: 13993482bfa8SJunchao Zhang + sf - star forest on which to communicate 14003482bfa8SJunchao Zhang . unit - data type associated with each node 14013482bfa8SJunchao Zhang . rootdata - buffer to broadcast 14023482bfa8SJunchao Zhang - op - operation to use for reduction 14033482bfa8SJunchao Zhang 14043482bfa8SJunchao Zhang Output Arguments: 14053482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 14063482bfa8SJunchao Zhang 14073482bfa8SJunchao Zhang Level: intermediate 14083482bfa8SJunchao Zhang 1409*d0295fc0SJunchao Zhang Notes: 1410*d0295fc0SJunchao Zhang When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1411*d0295fc0SJunchao Zhang are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1412*d0295fc0SJunchao Zhang use PetscSFBcastAndOpWithMemTypeBegin() instead. 1413*d0295fc0SJunchao Zhang .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(), PetscSFBcastAndOpWithMemTypeBegin() 14143482bfa8SJunchao Zhang @*/ 14153482bfa8SJunchao Zhang PetscErrorCode PetscSFBcastAndOpBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 14163482bfa8SJunchao Zhang { 14173482bfa8SJunchao Zhang PetscErrorCode ierr; 1418eb02082bSJunchao Zhang PetscMemType rootmtype,leafmtype; 14193482bfa8SJunchao Zhang 14203482bfa8SJunchao Zhang PetscFunctionBegin; 14213482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 14223482bfa8SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 14233482bfa8SJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1424eb02082bSJunchao Zhang ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1425eb02082bSJunchao Zhang ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1426eb02082bSJunchao Zhang ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 14273482bfa8SJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 14283482bfa8SJunchao Zhang PetscFunctionReturn(0); 14293482bfa8SJunchao Zhang } 14303482bfa8SJunchao Zhang 14313482bfa8SJunchao Zhang /*@C 1432*d0295fc0SJunchao Zhang PetscSFBcastAndOpWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastAndOpEnd() 1433*d0295fc0SJunchao Zhang 1434*d0295fc0SJunchao Zhang Collective on PetscSF 1435*d0295fc0SJunchao Zhang 1436*d0295fc0SJunchao Zhang Input Arguments: 1437*d0295fc0SJunchao Zhang + sf - star forest on which to communicate 1438*d0295fc0SJunchao Zhang . unit - data type associated with each node 1439*d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1440*d0295fc0SJunchao Zhang . rootdata - buffer to broadcast 1441*d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1442*d0295fc0SJunchao Zhang - op - operation to use for reduction 1443*d0295fc0SJunchao Zhang 1444*d0295fc0SJunchao Zhang Output Arguments: 1445*d0295fc0SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 1446*d0295fc0SJunchao Zhang 1447*d0295fc0SJunchao Zhang Level: intermediate 1448*d0295fc0SJunchao Zhang 1449*d0295fc0SJunchao Zhang .seealso: PetscSFBcastAndOpEnd(), PetscSFBcastBegin(), PetscSFBcastEnd(),PetscSFBcastAndOpBegin() 1450*d0295fc0SJunchao Zhang @*/ 1451*d0295fc0SJunchao Zhang PetscErrorCode PetscSFBcastAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 1452*d0295fc0SJunchao Zhang { 1453*d0295fc0SJunchao Zhang PetscErrorCode ierr; 1454*d0295fc0SJunchao Zhang 1455*d0295fc0SJunchao Zhang PetscFunctionBegin; 1456*d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1457*d0295fc0SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1458*d0295fc0SJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1459*d0295fc0SJunchao Zhang ierr = (*sf->ops->BcastAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);CHKERRQ(ierr); 1460*d0295fc0SJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1461*d0295fc0SJunchao Zhang PetscFunctionReturn(0); 1462*d0295fc0SJunchao Zhang } 1463*d0295fc0SJunchao Zhang 1464*d0295fc0SJunchao Zhang /*@C 14653482bfa8SJunchao Zhang PetscSFBcastAndOpEnd - end a broadcast & reduce operation started with PetscSFBcastAndOpBegin() 14663482bfa8SJunchao Zhang 14673482bfa8SJunchao Zhang Collective 14683482bfa8SJunchao Zhang 14693482bfa8SJunchao Zhang Input Arguments: 14703482bfa8SJunchao Zhang + sf - star forest 14713482bfa8SJunchao Zhang . unit - data type 14723482bfa8SJunchao Zhang . rootdata - buffer to broadcast 14733482bfa8SJunchao Zhang - op - operation to use for reduction 14743482bfa8SJunchao Zhang 14753482bfa8SJunchao Zhang Output Arguments: 14763482bfa8SJunchao Zhang . leafdata - buffer to be reduced with values from each leaf's respective root 14773482bfa8SJunchao Zhang 14783482bfa8SJunchao Zhang Level: intermediate 14793482bfa8SJunchao Zhang 14803482bfa8SJunchao Zhang .seealso: PetscSFSetGraph(), PetscSFReduceEnd() 14813482bfa8SJunchao Zhang @*/ 14823482bfa8SJunchao Zhang PetscErrorCode PetscSFBcastAndOpEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 14833482bfa8SJunchao Zhang { 14843482bfa8SJunchao Zhang PetscErrorCode ierr; 14853482bfa8SJunchao Zhang 14863482bfa8SJunchao Zhang PetscFunctionBegin; 14873482bfa8SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 14883482bfa8SJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 148900816365SJunchao Zhang ierr = (*sf->ops->BcastAndOpEnd)(sf,unit,rootdata,leafdata,op);CHKERRQ(ierr); 14903482bfa8SJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_BcastAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 14913482bfa8SJunchao Zhang PetscFunctionReturn(0); 14923482bfa8SJunchao Zhang } 14933482bfa8SJunchao Zhang 14943482bfa8SJunchao Zhang /*@C 149595fce210SBarry Smith PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd() 149695fce210SBarry Smith 149795fce210SBarry Smith Collective 149895fce210SBarry Smith 149995fce210SBarry Smith Input Arguments: 150095fce210SBarry Smith + sf - star forest 150195fce210SBarry Smith . unit - data type 150295fce210SBarry Smith . leafdata - values to reduce 150395fce210SBarry Smith - op - reduction operation 150495fce210SBarry Smith 150595fce210SBarry Smith Output Arguments: 150695fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 150795fce210SBarry Smith 150895fce210SBarry Smith Level: intermediate 150995fce210SBarry Smith 1510*d0295fc0SJunchao Zhang Notes: 1511*d0295fc0SJunchao Zhang When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers 1512*d0295fc0SJunchao Zhang are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should 1513*d0295fc0SJunchao Zhang use PetscSFReduceWithMemTypeBegin() instead. 1514*d0295fc0SJunchao Zhang 1515*d0295fc0SJunchao Zhang .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin() 151695fce210SBarry Smith @*/ 151795fce210SBarry Smith PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 151895fce210SBarry Smith { 151995fce210SBarry Smith PetscErrorCode ierr; 1520eb02082bSJunchao Zhang PetscMemType rootmtype,leafmtype; 152195fce210SBarry Smith 152295fce210SBarry Smith PetscFunctionBegin; 152395fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 152495fce210SBarry Smith ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 152529046d53SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1526eb02082bSJunchao Zhang ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1527eb02082bSJunchao Zhang ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1528eb02082bSJunchao Zhang ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1529563ffbabSMatthew G. Knepley ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 153095fce210SBarry Smith PetscFunctionReturn(0); 153195fce210SBarry Smith } 153295fce210SBarry Smith 153395fce210SBarry Smith /*@C 1534*d0295fc0SJunchao Zhang PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd() 1535*d0295fc0SJunchao Zhang 1536*d0295fc0SJunchao Zhang Collective 1537*d0295fc0SJunchao Zhang 1538*d0295fc0SJunchao Zhang Input Arguments: 1539*d0295fc0SJunchao Zhang + sf - star forest 1540*d0295fc0SJunchao Zhang . unit - data type 1541*d0295fc0SJunchao Zhang . leafmtype - memory type of leafdata 1542*d0295fc0SJunchao Zhang . leafdata - values to reduce 1543*d0295fc0SJunchao Zhang . rootmtype - memory type of rootdata 1544*d0295fc0SJunchao Zhang - op - reduction operation 1545*d0295fc0SJunchao Zhang 1546*d0295fc0SJunchao Zhang Output Arguments: 1547*d0295fc0SJunchao Zhang . rootdata - result of reduction of values from all leaves of each root 1548*d0295fc0SJunchao Zhang 1549*d0295fc0SJunchao Zhang Level: intermediate 1550*d0295fc0SJunchao Zhang 1551*d0295fc0SJunchao Zhang .seealso: PetscSFBcastBegin(), PetscSFReduceBegin() 1552*d0295fc0SJunchao Zhang @*/ 1553*d0295fc0SJunchao Zhang PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 1554*d0295fc0SJunchao Zhang { 1555*d0295fc0SJunchao Zhang PetscErrorCode ierr; 1556*d0295fc0SJunchao Zhang 1557*d0295fc0SJunchao Zhang PetscFunctionBegin; 1558*d0295fc0SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1559*d0295fc0SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1560*d0295fc0SJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1561*d0295fc0SJunchao Zhang ierr = (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);CHKERRQ(ierr); 1562*d0295fc0SJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);CHKERRQ(ierr); 1563*d0295fc0SJunchao Zhang PetscFunctionReturn(0); 1564*d0295fc0SJunchao Zhang } 1565*d0295fc0SJunchao Zhang 1566*d0295fc0SJunchao Zhang /*@C 156795fce210SBarry Smith PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin() 156895fce210SBarry Smith 156995fce210SBarry Smith Collective 157095fce210SBarry Smith 157195fce210SBarry Smith Input Arguments: 157295fce210SBarry Smith + sf - star forest 157395fce210SBarry Smith . unit - data type 157495fce210SBarry Smith . leafdata - values to reduce 157595fce210SBarry Smith - op - reduction operation 157695fce210SBarry Smith 157795fce210SBarry Smith Output Arguments: 157895fce210SBarry Smith . rootdata - result of reduction of values from all leaves of each root 157995fce210SBarry Smith 158095fce210SBarry Smith Level: intermediate 158195fce210SBarry Smith 158295fce210SBarry Smith .seealso: PetscSFSetGraph(), PetscSFBcastEnd() 158395fce210SBarry Smith @*/ 158495fce210SBarry Smith PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 158595fce210SBarry Smith { 158695fce210SBarry Smith PetscErrorCode ierr; 158795fce210SBarry Smith 158895fce210SBarry Smith PetscFunctionBegin; 158995fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 159029046d53SLisandro Dalcin ierr = PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 159100816365SJunchao Zhang ierr = (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);CHKERRQ(ierr); 1592563ffbabSMatthew G. Knepley ierr = PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);CHKERRQ(ierr); 159395fce210SBarry Smith PetscFunctionReturn(0); 159495fce210SBarry Smith } 159595fce210SBarry Smith 159695fce210SBarry Smith /*@C 1597a1729e3fSJunchao Zhang PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd() 1598a1729e3fSJunchao Zhang 1599a1729e3fSJunchao Zhang Collective 1600a1729e3fSJunchao Zhang 1601a1729e3fSJunchao Zhang Input Arguments: 1602a1729e3fSJunchao Zhang + sf - star forest 1603a1729e3fSJunchao Zhang . unit - data type 1604a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1605a1729e3fSJunchao Zhang - op - operation to use for reduction 1606a1729e3fSJunchao Zhang 1607a1729e3fSJunchao Zhang Output Arguments: 1608a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1609a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1610a1729e3fSJunchao Zhang 1611a1729e3fSJunchao Zhang Level: advanced 1612a1729e3fSJunchao Zhang 1613a1729e3fSJunchao Zhang Note: 1614a1729e3fSJunchao Zhang The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process 1615a1729e3fSJunchao Zhang might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is 1616a1729e3fSJunchao Zhang not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as 1617a1729e3fSJunchao Zhang integers. 1618a1729e3fSJunchao Zhang 1619a1729e3fSJunchao Zhang .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph() 1620a1729e3fSJunchao Zhang @*/ 1621a1729e3fSJunchao Zhang PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1622a1729e3fSJunchao Zhang { 1623a1729e3fSJunchao Zhang PetscErrorCode ierr; 1624eb02082bSJunchao Zhang PetscMemType rootmtype,leafmtype,leafupdatemtype; 1625a1729e3fSJunchao Zhang 1626a1729e3fSJunchao Zhang PetscFunctionBegin; 1627a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1628a1729e3fSJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 1629a1729e3fSJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1630eb02082bSJunchao Zhang ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 1631eb02082bSJunchao Zhang ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 1632eb02082bSJunchao Zhang ierr = PetscGetMemType(leafupdate,&leafupdatemtype);CHKERRQ(ierr); 1633eb02082bSJunchao Zhang if (leafmtype != leafupdatemtype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for leafdata and leafupdate in different memory types"); 1634eb02082bSJunchao Zhang ierr = (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);CHKERRQ(ierr); 1635a1729e3fSJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 1636a1729e3fSJunchao Zhang PetscFunctionReturn(0); 1637a1729e3fSJunchao Zhang } 1638a1729e3fSJunchao Zhang 1639a1729e3fSJunchao Zhang /*@C 1640a1729e3fSJunchao Zhang PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value 1641a1729e3fSJunchao Zhang 1642a1729e3fSJunchao Zhang Collective 1643a1729e3fSJunchao Zhang 1644a1729e3fSJunchao Zhang Input Arguments: 1645a1729e3fSJunchao Zhang + sf - star forest 1646a1729e3fSJunchao Zhang . unit - data type 1647a1729e3fSJunchao Zhang . leafdata - leaf values to use in reduction 1648a1729e3fSJunchao Zhang - op - operation to use for reduction 1649a1729e3fSJunchao Zhang 1650a1729e3fSJunchao Zhang Output Arguments: 1651a1729e3fSJunchao Zhang + rootdata - root values to be updated, input state is seen by first process to perform an update 1652a1729e3fSJunchao Zhang - leafupdate - state at each leaf's respective root immediately prior to my atomic update 1653a1729e3fSJunchao Zhang 1654a1729e3fSJunchao Zhang Level: advanced 1655a1729e3fSJunchao Zhang 1656a1729e3fSJunchao Zhang .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph() 1657a1729e3fSJunchao Zhang @*/ 1658a1729e3fSJunchao Zhang PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 1659a1729e3fSJunchao Zhang { 1660a1729e3fSJunchao Zhang PetscErrorCode ierr; 1661a1729e3fSJunchao Zhang 1662a1729e3fSJunchao Zhang PetscFunctionBegin; 1663a1729e3fSJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1664a1729e3fSJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 166500816365SJunchao Zhang ierr = (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 1666a1729e3fSJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);CHKERRQ(ierr); 1667a1729e3fSJunchao Zhang PetscFunctionReturn(0); 1668a1729e3fSJunchao Zhang } 1669a1729e3fSJunchao Zhang 1670a1729e3fSJunchao Zhang /*@C 167195fce210SBarry Smith PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd() 167295fce210SBarry Smith 167395fce210SBarry Smith Collective 167495fce210SBarry Smith 167595fce210SBarry Smith Input Arguments: 167695fce210SBarry Smith . sf - star forest 167795fce210SBarry Smith 167895fce210SBarry Smith Output Arguments: 167995fce210SBarry Smith . degree - degree of each root vertex 168095fce210SBarry Smith 168195fce210SBarry Smith Level: advanced 168295fce210SBarry Smith 1683ffe67aa5SVáclav Hapla Notes: 1684ffe67aa5SVáclav Hapla The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1685ffe67aa5SVáclav Hapla 168695fce210SBarry Smith .seealso: PetscSFGatherBegin() 168795fce210SBarry Smith @*/ 168895fce210SBarry Smith PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree) 168995fce210SBarry Smith { 169095fce210SBarry Smith PetscErrorCode ierr; 169195fce210SBarry Smith 169295fce210SBarry Smith PetscFunctionBegin; 169395fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 169495fce210SBarry Smith PetscSFCheckGraphSet(sf,1); 169595fce210SBarry Smith PetscValidPointer(degree,2); 1696803bd9e8SMatthew G. Knepley if (!sf->degreeknown) { 16975b0d146aSStefano Zampini PetscInt i, nroots = sf->nroots, maxlocal; 1698803bd9e8SMatthew G. Knepley if (sf->degree) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Calls to PetscSFComputeDegreeBegin() cannot be nested."); 16995b0d146aSStefano Zampini maxlocal = sf->maxleaf-sf->minleaf+1; 170029046d53SLisandro Dalcin ierr = PetscMalloc1(nroots,&sf->degree);CHKERRQ(ierr); 170129046d53SLisandro Dalcin ierr = PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp);CHKERRQ(ierr); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */ 170229046d53SLisandro Dalcin for (i=0; i<nroots; i++) sf->degree[i] = 0; 17039837ea96SMatthew G. Knepley for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1; 17045b0d146aSStefano Zampini ierr = PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 170595fce210SBarry Smith } 170695fce210SBarry Smith *degree = NULL; 170795fce210SBarry Smith PetscFunctionReturn(0); 170895fce210SBarry Smith } 170995fce210SBarry Smith 171095fce210SBarry Smith /*@C 171195fce210SBarry Smith PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin() 171295fce210SBarry Smith 171395fce210SBarry Smith Collective 171495fce210SBarry Smith 171595fce210SBarry Smith Input Arguments: 171695fce210SBarry Smith . sf - star forest 171795fce210SBarry Smith 171895fce210SBarry Smith Output Arguments: 171995fce210SBarry Smith . degree - degree of each root vertex 172095fce210SBarry Smith 172195fce210SBarry Smith Level: developer 172295fce210SBarry Smith 1723ffe67aa5SVáclav Hapla Notes: 1724ffe67aa5SVáclav Hapla The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it. 1725ffe67aa5SVáclav Hapla 172695fce210SBarry Smith .seealso: 172795fce210SBarry Smith @*/ 172895fce210SBarry Smith PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree) 172995fce210SBarry Smith { 173095fce210SBarry Smith PetscErrorCode ierr; 173195fce210SBarry Smith 173295fce210SBarry Smith PetscFunctionBegin; 173395fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 173495fce210SBarry Smith PetscSFCheckGraphSet(sf,1); 173529046d53SLisandro Dalcin PetscValidPointer(degree,2); 173695fce210SBarry Smith if (!sf->degreeknown) { 173729046d53SLisandro Dalcin if (!sf->degreetmp) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Must call PetscSFComputeDegreeBegin() before PetscSFComputeDegreeEnd()"); 17385b0d146aSStefano Zampini ierr = PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);CHKERRQ(ierr); 173995fce210SBarry Smith ierr = PetscFree(sf->degreetmp);CHKERRQ(ierr); 174095fce210SBarry Smith sf->degreeknown = PETSC_TRUE; 174195fce210SBarry Smith } 174295fce210SBarry Smith *degree = sf->degree; 174395fce210SBarry Smith PetscFunctionReturn(0); 174495fce210SBarry Smith } 174595fce210SBarry Smith 1746673100f5SVaclav Hapla 1747673100f5SVaclav Hapla /*@C 174866dfcd1aSVaclav Hapla PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()). 174966dfcd1aSVaclav Hapla Each multi-root is assigned index of the corresponding original root. 1750673100f5SVaclav Hapla 1751673100f5SVaclav Hapla Collective 1752673100f5SVaclav Hapla 1753673100f5SVaclav Hapla Input Arguments: 1754673100f5SVaclav Hapla + sf - star forest 1755673100f5SVaclav Hapla - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd() 1756673100f5SVaclav Hapla 1757673100f5SVaclav Hapla Output Arguments: 175866dfcd1aSVaclav Hapla + nMultiRoots - (optional) number of multi-roots (roots of multi-SF) 175966dfcd1aSVaclav Hapla - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots 1760673100f5SVaclav Hapla 1761673100f5SVaclav Hapla Level: developer 1762673100f5SVaclav Hapla 1763ffe67aa5SVáclav Hapla Notes: 1764ffe67aa5SVáclav Hapla The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed. 1765ffe67aa5SVáclav Hapla 1766673100f5SVaclav Hapla .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF() 1767673100f5SVaclav Hapla @*/ 176866dfcd1aSVaclav Hapla PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[]) 1769673100f5SVaclav Hapla { 1770673100f5SVaclav Hapla PetscSF msf; 1771673100f5SVaclav Hapla PetscInt i, j, k, nroots, nmroots; 1772673100f5SVaclav Hapla PetscErrorCode ierr; 1773673100f5SVaclav Hapla 1774673100f5SVaclav Hapla PetscFunctionBegin; 1775673100f5SVaclav Hapla PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 1776673100f5SVaclav Hapla ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 177729328920SVaclav Hapla if (nroots) PetscValidIntPointer(degree,2); 177866dfcd1aSVaclav Hapla if (nMultiRoots) PetscValidIntPointer(nMultiRoots,3); 177966dfcd1aSVaclav Hapla PetscValidPointer(multiRootsOrigNumbering,4); 1780673100f5SVaclav Hapla ierr = PetscSFGetMultiSF(sf,&msf);CHKERRQ(ierr); 1781673100f5SVaclav Hapla ierr = PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);CHKERRQ(ierr); 178266dfcd1aSVaclav Hapla ierr = PetscMalloc1(nmroots, multiRootsOrigNumbering);CHKERRQ(ierr); 1783673100f5SVaclav Hapla for (i=0,j=0,k=0; i<nroots; i++) { 1784673100f5SVaclav Hapla if (!degree[i]) continue; 1785673100f5SVaclav Hapla for (j=0; j<degree[i]; j++,k++) { 178666dfcd1aSVaclav Hapla (*multiRootsOrigNumbering)[k] = i; 1787673100f5SVaclav Hapla } 1788673100f5SVaclav Hapla } 1789673100f5SVaclav Hapla if (PetscUnlikely(k != nmroots)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sanity check fail"); 179066dfcd1aSVaclav Hapla if (nMultiRoots) *nMultiRoots = nmroots; 1791673100f5SVaclav Hapla PetscFunctionReturn(0); 1792673100f5SVaclav Hapla } 1793673100f5SVaclav Hapla 179495fce210SBarry Smith /*@C 179595fce210SBarry Smith PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd() 179695fce210SBarry Smith 179795fce210SBarry Smith Collective 179895fce210SBarry Smith 179995fce210SBarry Smith Input Arguments: 180095fce210SBarry Smith + sf - star forest 180195fce210SBarry Smith . unit - data type 180295fce210SBarry Smith - leafdata - leaf data to gather to roots 180395fce210SBarry Smith 180495fce210SBarry Smith Output Argument: 180595fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 180695fce210SBarry Smith 180795fce210SBarry Smith Level: intermediate 180895fce210SBarry Smith 180995fce210SBarry Smith .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 181095fce210SBarry Smith @*/ 181195fce210SBarry Smith PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 181295fce210SBarry Smith { 181395fce210SBarry Smith PetscErrorCode ierr; 1814a5526d50SJunchao Zhang PetscSF multi = NULL; 181595fce210SBarry Smith 181695fce210SBarry Smith PetscFunctionBegin; 181795fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 181829046d53SLisandro Dalcin ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 181995fce210SBarry Smith ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 18208bfbc91cSJed Brown ierr = PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 182195fce210SBarry Smith PetscFunctionReturn(0); 182295fce210SBarry Smith } 182395fce210SBarry Smith 182495fce210SBarry Smith /*@C 182595fce210SBarry Smith PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin() 182695fce210SBarry Smith 182795fce210SBarry Smith Collective 182895fce210SBarry Smith 182995fce210SBarry Smith Input Arguments: 183095fce210SBarry Smith + sf - star forest 183195fce210SBarry Smith . unit - data type 183295fce210SBarry Smith - leafdata - leaf data to gather to roots 183395fce210SBarry Smith 183495fce210SBarry Smith Output Argument: 183595fce210SBarry Smith . multirootdata - root buffer to gather into, amount of space per root is equal to its degree 183695fce210SBarry Smith 183795fce210SBarry Smith Level: intermediate 183895fce210SBarry Smith 183995fce210SBarry Smith .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 184095fce210SBarry Smith @*/ 184195fce210SBarry Smith PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata) 184295fce210SBarry Smith { 184395fce210SBarry Smith PetscErrorCode ierr; 1844a5526d50SJunchao Zhang PetscSF multi = NULL; 184595fce210SBarry Smith 184695fce210SBarry Smith PetscFunctionBegin; 184795fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 184895fce210SBarry Smith ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 18498bfbc91cSJed Brown ierr = PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPIU_REPLACE);CHKERRQ(ierr); 185095fce210SBarry Smith PetscFunctionReturn(0); 185195fce210SBarry Smith } 185295fce210SBarry Smith 185395fce210SBarry Smith /*@C 185495fce210SBarry Smith PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd() 185595fce210SBarry Smith 185695fce210SBarry Smith Collective 185795fce210SBarry Smith 185895fce210SBarry Smith Input Arguments: 185995fce210SBarry Smith + sf - star forest 186095fce210SBarry Smith . unit - data type 186195fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 186295fce210SBarry Smith 186395fce210SBarry Smith Output Argument: 186495fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 186595fce210SBarry Smith 186695fce210SBarry Smith Level: intermediate 186795fce210SBarry Smith 186895fce210SBarry Smith .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin() 186995fce210SBarry Smith @*/ 187095fce210SBarry Smith PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 187195fce210SBarry Smith { 187295fce210SBarry Smith PetscErrorCode ierr; 1873a5526d50SJunchao Zhang PetscSF multi = NULL; 187495fce210SBarry Smith 187595fce210SBarry Smith PetscFunctionBegin; 187695fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 187795fce210SBarry Smith ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 187895fce210SBarry Smith ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 187995fce210SBarry Smith ierr = PetscSFBcastBegin(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 188095fce210SBarry Smith PetscFunctionReturn(0); 188195fce210SBarry Smith } 188295fce210SBarry Smith 188395fce210SBarry Smith /*@C 188495fce210SBarry Smith PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin() 188595fce210SBarry Smith 188695fce210SBarry Smith Collective 188795fce210SBarry Smith 188895fce210SBarry Smith Input Arguments: 188995fce210SBarry Smith + sf - star forest 189095fce210SBarry Smith . unit - data type 189195fce210SBarry Smith - multirootdata - root buffer to send to each leaf, one unit of data per leaf 189295fce210SBarry Smith 189395fce210SBarry Smith Output Argument: 189495fce210SBarry Smith . leafdata - leaf data to be update with personal data from each respective root 189595fce210SBarry Smith 189695fce210SBarry Smith Level: intermediate 189795fce210SBarry Smith 189895fce210SBarry Smith .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd() 189995fce210SBarry Smith @*/ 190095fce210SBarry Smith PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata) 190195fce210SBarry Smith { 190295fce210SBarry Smith PetscErrorCode ierr; 1903a5526d50SJunchao Zhang PetscSF multi = NULL; 190495fce210SBarry Smith 190595fce210SBarry Smith PetscFunctionBegin; 190695fce210SBarry Smith PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 190795fce210SBarry Smith ierr = PetscSFGetMultiSF(sf,&multi);CHKERRQ(ierr); 190895fce210SBarry Smith ierr = PetscSFBcastEnd(multi,unit,multirootdata,leafdata);CHKERRQ(ierr); 190995fce210SBarry Smith PetscFunctionReturn(0); 191095fce210SBarry Smith } 1911a7b3aa13SAta Mesgarnejad 1912a072220fSLawrence Mitchell static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf) 1913a072220fSLawrence Mitchell { 1914a072220fSLawrence Mitchell PetscInt i, n, nleaves; 1915a072220fSLawrence Mitchell const PetscInt *ilocal = NULL; 1916a072220fSLawrence Mitchell PetscHSetI seen; 1917a072220fSLawrence Mitchell PetscErrorCode ierr; 1918a072220fSLawrence Mitchell 1919a072220fSLawrence Mitchell PetscFunctionBegin; 192076bd3646SJed Brown if (!PetscDefined(USE_DEBUG)) PetscFunctionReturn(0); 1921a072220fSLawrence Mitchell ierr = PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);CHKERRQ(ierr); 1922a072220fSLawrence Mitchell ierr = PetscHSetICreate(&seen);CHKERRQ(ierr); 1923a072220fSLawrence Mitchell for (i = 0; i < nleaves; i++) { 1924a072220fSLawrence Mitchell const PetscInt leaf = ilocal ? ilocal[i] : i; 1925a072220fSLawrence Mitchell ierr = PetscHSetIAdd(seen,leaf);CHKERRQ(ierr); 1926a072220fSLawrence Mitchell } 1927a072220fSLawrence Mitchell ierr = PetscHSetIGetSize(seen,&n);CHKERRQ(ierr); 1928a072220fSLawrence Mitchell if (n != nleaves) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Provided leaves have repeated values: all leaves must be unique"); 1929a072220fSLawrence Mitchell ierr = PetscHSetIDestroy(&seen);CHKERRQ(ierr); 1930a072220fSLawrence Mitchell PetscFunctionReturn(0); 1931a072220fSLawrence Mitchell } 193254729392SStefano Zampini 1933a7b3aa13SAta Mesgarnejad /*@ 193404c0ada0SJunchao Zhang PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view 1935a7b3aa13SAta Mesgarnejad 1936a7b3aa13SAta Mesgarnejad Input Parameters: 193754729392SStefano Zampini + sfA - The first PetscSF 193854729392SStefano Zampini - sfB - The second PetscSF 1939a7b3aa13SAta Mesgarnejad 1940a7b3aa13SAta Mesgarnejad Output Parameters: 194154729392SStefano Zampini . sfBA - The composite SF 1942a7b3aa13SAta Mesgarnejad 1943a7b3aa13SAta Mesgarnejad Level: developer 1944a7b3aa13SAta Mesgarnejad 1945a072220fSLawrence Mitchell Notes: 194654729392SStefano Zampini Currently, the two SFs must be defined on congruent communicators and they must be true star 194754729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. 194854729392SStefano Zampini 194954729392SStefano Zampini sfA's leaf space and sfB's root space might be partially overlapped. The composition builds 195054729392SStefano Zampini a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected 195154729392SStefano Zampini nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a 195254729392SStefano Zampini Bcast on sfA, then a Bcast on sfB, on connected nodes. 1953a072220fSLawrence Mitchell 195404c0ada0SJunchao Zhang .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph() 1955a7b3aa13SAta Mesgarnejad @*/ 1956a7b3aa13SAta Mesgarnejad PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 1957a7b3aa13SAta Mesgarnejad { 195804c0ada0SJunchao Zhang PetscErrorCode ierr; 1959a7b3aa13SAta Mesgarnejad const PetscSFNode *remotePointsA,*remotePointsB; 1960d41018fbSJunchao Zhang PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB; 196154729392SStefano Zampini const PetscInt *localPointsA,*localPointsB; 196254729392SStefano Zampini PetscInt *localPointsBA; 196354729392SStefano Zampini PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA; 196454729392SStefano Zampini PetscBool denseB; 1965a7b3aa13SAta Mesgarnejad 1966a7b3aa13SAta Mesgarnejad PetscFunctionBegin; 1967a7b3aa13SAta Mesgarnejad PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 196829046d53SLisandro Dalcin PetscSFCheckGraphSet(sfA,1); 196929046d53SLisandro Dalcin PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 197029046d53SLisandro Dalcin PetscSFCheckGraphSet(sfB,2); 197154729392SStefano Zampini PetscCheckSameComm(sfA,1,sfB,2); 197229046d53SLisandro Dalcin PetscValidPointer(sfBA,3); 197354729392SStefano Zampini ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 197454729392SStefano Zampini ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 197554729392SStefano Zampini 1976a7b3aa13SAta Mesgarnejad ierr = PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);CHKERRQ(ierr); 1977a7b3aa13SAta Mesgarnejad ierr = PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);CHKERRQ(ierr); 1978d41018fbSJunchao Zhang if (localPointsA) { 197954729392SStefano Zampini ierr = PetscMalloc1(numRootsB,&reorderedRemotePointsA);CHKERRQ(ierr); 198054729392SStefano Zampini for (i=0; i<numRootsB; i++) { 198154729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 198254729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 198354729392SStefano Zampini } 198454729392SStefano Zampini for (i=0; i<numLeavesA; i++) { 198554729392SStefano Zampini if (localPointsA[i] >= numRootsB) continue; 198654729392SStefano Zampini reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i]; 198754729392SStefano Zampini } 1988d41018fbSJunchao Zhang remotePointsA = reorderedRemotePointsA; 1989d41018fbSJunchao Zhang } 1990d41018fbSJunchao Zhang ierr = PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);CHKERRQ(ierr); 1991d41018fbSJunchao Zhang ierr = PetscMalloc1(maxleaf-minleaf+1,&leafdataB);CHKERRQ(ierr); 1992d41018fbSJunchao Zhang ierr = PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1993d41018fbSJunchao Zhang ierr = PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf);CHKERRQ(ierr); 1994d41018fbSJunchao Zhang ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 1995d41018fbSJunchao Zhang 199654729392SStefano Zampini denseB = (PetscBool)!localPointsB; 199754729392SStefano Zampini for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 199854729392SStefano Zampini if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE; 199954729392SStefano Zampini else numLeavesBA++; 200054729392SStefano Zampini } 200154729392SStefano Zampini if (denseB) { 2002d41018fbSJunchao Zhang localPointsBA = NULL; 2003d41018fbSJunchao Zhang remotePointsBA = leafdataB; 2004d41018fbSJunchao Zhang } else { 200554729392SStefano Zampini ierr = PetscMalloc1(numLeavesBA,&localPointsBA);CHKERRQ(ierr); 200654729392SStefano Zampini ierr = PetscMalloc1(numLeavesBA,&remotePointsBA);CHKERRQ(ierr); 200754729392SStefano Zampini for (i=0,numLeavesBA=0; i<numLeavesB; i++) { 200854729392SStefano Zampini const PetscInt l = localPointsB ? localPointsB[i] : i; 200954729392SStefano Zampini 201054729392SStefano Zampini if (leafdataB[l-minleaf].rank == -1) continue; 201154729392SStefano Zampini remotePointsBA[numLeavesBA] = leafdataB[l-minleaf]; 201254729392SStefano Zampini localPointsBA[numLeavesBA] = l; 201354729392SStefano Zampini numLeavesBA++; 201454729392SStefano Zampini } 2015d41018fbSJunchao Zhang ierr = PetscFree(leafdataB);CHKERRQ(ierr); 2016d41018fbSJunchao Zhang } 201754729392SStefano Zampini ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 201820c24465SJunchao Zhang ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 201954729392SStefano Zampini ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 2020a7b3aa13SAta Mesgarnejad PetscFunctionReturn(0); 2021a7b3aa13SAta Mesgarnejad } 20221c6ba672SJunchao Zhang 202304c0ada0SJunchao Zhang /*@ 202454729392SStefano Zampini PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one 202504c0ada0SJunchao Zhang 202604c0ada0SJunchao Zhang Input Parameters: 202754729392SStefano Zampini + sfA - The first PetscSF 202854729392SStefano Zampini - sfB - The second PetscSF 202904c0ada0SJunchao Zhang 203004c0ada0SJunchao Zhang Output Parameters: 203154729392SStefano Zampini . sfBA - The composite SF. 203204c0ada0SJunchao Zhang 203304c0ada0SJunchao Zhang Level: developer 203404c0ada0SJunchao Zhang 203554729392SStefano Zampini Notes: 203654729392SStefano Zampini Currently, the two SFs must be defined on congruent communicators and they must be true star 203754729392SStefano Zampini forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the 203854729392SStefano Zampini second SF must have a degree of 1, i.e., no roots have more than one leaf connected. 203954729392SStefano Zampini 204054729392SStefano Zampini sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds 204154729392SStefano Zampini a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected 204254729392SStefano Zampini roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then 204354729392SStefano Zampini a Reduce on sfB, on connected roots. 204454729392SStefano Zampini 204554729392SStefano Zampini .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF() 204604c0ada0SJunchao Zhang @*/ 204704c0ada0SJunchao Zhang PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA) 204804c0ada0SJunchao Zhang { 204904c0ada0SJunchao Zhang PetscErrorCode ierr; 205004c0ada0SJunchao Zhang const PetscSFNode *remotePointsA,*remotePointsB; 205104c0ada0SJunchao Zhang PetscSFNode *remotePointsBA; 205204c0ada0SJunchao Zhang const PetscInt *localPointsA,*localPointsB; 205354729392SStefano Zampini PetscSFNode *reorderedRemotePointsA = NULL; 205454729392SStefano Zampini PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA; 20555b0d146aSStefano Zampini MPI_Op op; 20565b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 20575b0d146aSStefano Zampini PetscBool iswin; 20585b0d146aSStefano Zampini #endif 205904c0ada0SJunchao Zhang 206004c0ada0SJunchao Zhang PetscFunctionBegin; 206104c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfA,PETSCSF_CLASSID,1); 206204c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfA,1); 206304c0ada0SJunchao Zhang PetscValidHeaderSpecific(sfB,PETSCSF_CLASSID,2); 206404c0ada0SJunchao Zhang PetscSFCheckGraphSet(sfB,2); 206554729392SStefano Zampini PetscCheckSameComm(sfA,1,sfB,2); 206604c0ada0SJunchao Zhang PetscValidPointer(sfBA,3); 206754729392SStefano Zampini ierr = PetscSFCheckLeavesUnique_Private(sfA);CHKERRQ(ierr); 206854729392SStefano Zampini ierr = PetscSFCheckLeavesUnique_Private(sfB);CHKERRQ(ierr); 206954729392SStefano Zampini 207004c0ada0SJunchao Zhang ierr = PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);CHKERRQ(ierr); 207104c0ada0SJunchao Zhang ierr = PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);CHKERRQ(ierr); 20725b0d146aSStefano Zampini 20735b0d146aSStefano Zampini /* TODO: Check roots of sfB have degree of 1 */ 20745b0d146aSStefano Zampini /* Once we implement it, we can replace the MPI_MAXLOC 20755b0d146aSStefano Zampini with MPIU_REPLACE. In that case, MPI_MAXLOC and MPIU_REPLACE have the same effect. 20765b0d146aSStefano Zampini We use MPI_MAXLOC only to have a deterministic output from this routine if 20775b0d146aSStefano Zampini the root condition is not meet. 20785b0d146aSStefano Zampini */ 20795b0d146aSStefano Zampini op = MPI_MAXLOC; 20805b0d146aSStefano Zampini #if defined(PETSC_USE_64BIT_INDICES) 20815b0d146aSStefano Zampini /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */ 20825b0d146aSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);CHKERRQ(ierr); 20835b0d146aSStefano Zampini if (iswin) op = MPIU_REPLACE; 20845b0d146aSStefano Zampini #endif 20855b0d146aSStefano Zampini 208654729392SStefano Zampini ierr = PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);CHKERRQ(ierr); 208754729392SStefano Zampini ierr = PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);CHKERRQ(ierr); 208854729392SStefano Zampini for (i=0; i<maxleaf - minleaf + 1; i++) { 208954729392SStefano Zampini reorderedRemotePointsA[i].rank = -1; 209054729392SStefano Zampini reorderedRemotePointsA[i].index = -1; 209154729392SStefano Zampini } 209254729392SStefano Zampini if (localPointsA) { 209354729392SStefano Zampini for (i=0; i<numLeavesA; i++) { 209454729392SStefano Zampini if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue; 209554729392SStefano Zampini reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i]; 209654729392SStefano Zampini } 209754729392SStefano Zampini } else { 209854729392SStefano Zampini for (i=0; i<numLeavesA; i++) { 209954729392SStefano Zampini if (i > maxleaf || i < minleaf) continue; 210054729392SStefano Zampini reorderedRemotePointsA[i - minleaf] = remotePointsA[i]; 210154729392SStefano Zampini } 210254729392SStefano Zampini } 210354729392SStefano Zampini 210454729392SStefano Zampini ierr = PetscMalloc1(numRootsB,&localPointsBA);CHKERRQ(ierr); 210504c0ada0SJunchao Zhang ierr = PetscMalloc1(numRootsB,&remotePointsBA);CHKERRQ(ierr); 210654729392SStefano Zampini for (i=0; i<numRootsB; i++) { 210754729392SStefano Zampini remotePointsBA[i].rank = -1; 210854729392SStefano Zampini remotePointsBA[i].index = -1; 210954729392SStefano Zampini } 211054729392SStefano Zampini 21115b0d146aSStefano Zampini ierr = PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 21125b0d146aSStefano Zampini ierr = PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);CHKERRQ(ierr); 211354729392SStefano Zampini ierr = PetscFree(reorderedRemotePointsA);CHKERRQ(ierr); 211454729392SStefano Zampini for (i=0,numLeavesBA=0; i<numRootsB; i++) { 211554729392SStefano Zampini if (remotePointsBA[i].rank == -1) continue; 211654729392SStefano Zampini remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank; 211754729392SStefano Zampini remotePointsBA[numLeavesBA].index = remotePointsBA[i].index; 211854729392SStefano Zampini localPointsBA[numLeavesBA] = i; 211954729392SStefano Zampini numLeavesBA++; 212054729392SStefano Zampini } 212154729392SStefano Zampini ierr = PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);CHKERRQ(ierr); 212220c24465SJunchao Zhang ierr = PetscSFSetFromOptions(*sfBA);CHKERRQ(ierr); 212354729392SStefano Zampini ierr = PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);CHKERRQ(ierr); 212404c0ada0SJunchao Zhang PetscFunctionReturn(0); 212504c0ada0SJunchao Zhang } 212604c0ada0SJunchao Zhang 21271c6ba672SJunchao Zhang /* 21281c6ba672SJunchao Zhang PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF 21291c6ba672SJunchao Zhang 21301c6ba672SJunchao Zhang Input Parameters: 21311c6ba672SJunchao Zhang . sf - The global PetscSF 21321c6ba672SJunchao Zhang 21331c6ba672SJunchao Zhang Output Parameters: 21341c6ba672SJunchao Zhang . out - The local PetscSF 21351c6ba672SJunchao Zhang */ 21361c6ba672SJunchao Zhang PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out) 21371c6ba672SJunchao Zhang { 21381c6ba672SJunchao Zhang MPI_Comm comm; 21391c6ba672SJunchao Zhang PetscMPIInt myrank; 21401c6ba672SJunchao Zhang const PetscInt *ilocal; 21411c6ba672SJunchao Zhang const PetscSFNode *iremote; 21421c6ba672SJunchao Zhang PetscInt i,j,nroots,nleaves,lnleaves,*lilocal; 21431c6ba672SJunchao Zhang PetscSFNode *liremote; 21441c6ba672SJunchao Zhang PetscSF lsf; 21451c6ba672SJunchao Zhang PetscErrorCode ierr; 21461c6ba672SJunchao Zhang 21471c6ba672SJunchao Zhang PetscFunctionBegin; 21481c6ba672SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 21491c6ba672SJunchao Zhang if (sf->ops->CreateLocalSF) { 21501c6ba672SJunchao Zhang ierr = (*sf->ops->CreateLocalSF)(sf,out);CHKERRQ(ierr); 21511c6ba672SJunchao Zhang } else { 21521c6ba672SJunchao Zhang /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */ 21531c6ba672SJunchao Zhang ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 21541c6ba672SJunchao Zhang ierr = MPI_Comm_rank(comm,&myrank);CHKERRQ(ierr); 21551c6ba672SJunchao Zhang 21561c6ba672SJunchao Zhang /* Find out local edges and build a local SF */ 21571c6ba672SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);CHKERRQ(ierr); 21581c6ba672SJunchao Zhang for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;} 21591c6ba672SJunchao Zhang ierr = PetscMalloc1(lnleaves,&lilocal);CHKERRQ(ierr); 21601c6ba672SJunchao Zhang ierr = PetscMalloc1(lnleaves,&liremote);CHKERRQ(ierr); 21611c6ba672SJunchao Zhang 21621c6ba672SJunchao Zhang for (i=j=0; i<nleaves; i++) { 21631c6ba672SJunchao Zhang if (iremote[i].rank == (PetscInt)myrank) { 21641c6ba672SJunchao Zhang lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */ 21651c6ba672SJunchao Zhang liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */ 21661c6ba672SJunchao Zhang liremote[j].index = iremote[i].index; 21671c6ba672SJunchao Zhang j++; 21681c6ba672SJunchao Zhang } 21691c6ba672SJunchao Zhang } 21701c6ba672SJunchao Zhang ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 217120c24465SJunchao Zhang ierr = PetscSFSetFromOptions(lsf);CHKERRQ(ierr); 21721c6ba672SJunchao Zhang ierr = PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 21731c6ba672SJunchao Zhang ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 21741c6ba672SJunchao Zhang *out = lsf; 21751c6ba672SJunchao Zhang } 21761c6ba672SJunchao Zhang PetscFunctionReturn(0); 21771c6ba672SJunchao Zhang } 2178dd5b3ca6SJunchao Zhang 2179dd5b3ca6SJunchao Zhang /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */ 2180dd5b3ca6SJunchao Zhang PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata) 2181dd5b3ca6SJunchao Zhang { 2182dd5b3ca6SJunchao Zhang PetscErrorCode ierr; 2183eb02082bSJunchao Zhang PetscMemType rootmtype,leafmtype; 2184dd5b3ca6SJunchao Zhang 2185dd5b3ca6SJunchao Zhang PetscFunctionBegin; 2186dd5b3ca6SJunchao Zhang PetscValidHeaderSpecific(sf,PETSCSF_CLASSID,1); 2187dd5b3ca6SJunchao Zhang ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 2188dd5b3ca6SJunchao Zhang ierr = PetscLogEventBegin(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2189eb02082bSJunchao Zhang ierr = PetscGetMemType(rootdata,&rootmtype);CHKERRQ(ierr); 2190eb02082bSJunchao Zhang ierr = PetscGetMemType(leafdata,&leafmtype);CHKERRQ(ierr); 2191dd5b3ca6SJunchao Zhang if (sf->ops->BcastToZero) { 2192eb02082bSJunchao Zhang ierr = (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);CHKERRQ(ierr); 2193dd5b3ca6SJunchao Zhang } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type"); 2194dd5b3ca6SJunchao Zhang ierr = PetscLogEventEnd(PETSCSF_BcastAndOpBegin,sf,0,0,0);CHKERRQ(ierr); 2195dd5b3ca6SJunchao Zhang PetscFunctionReturn(0); 2196dd5b3ca6SJunchao Zhang } 2197dd5b3ca6SJunchao Zhang 2198