18cd53115SBarry Smith 240e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 3cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h> 4b23bfdefSJunchao Zhang 540e23c03SJunchao Zhang /*===================================================================================*/ 640e23c03SJunchao Zhang /* SF public interface implementations */ 740e23c03SJunchao Zhang /*===================================================================================*/ 840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 995fce210SBarry Smith { 1095fce210SBarry Smith PetscErrorCode ierr; 11b23bfdefSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 1295fce210SBarry Smith PetscInt *rlengths,*ilengths,i; 1340e23c03SJunchao Zhang PetscMPIInt rank,niranks,*iranks,tag; 1495fce210SBarry Smith MPI_Comm comm; 15b5a8e515SJed Brown MPI_Group group; 1640e23c03SJunchao Zhang MPI_Request *rootreqs,*leafreqs; 1795fce210SBarry Smith 1895fce210SBarry Smith PetscFunctionBegin; 19b5a8e515SJed Brown ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRQ(ierr); 20b5a8e515SJed Brown ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 21b5a8e515SJed Brown ierr = MPI_Group_free(&group);CHKERRQ(ierr); 2295fce210SBarry Smith ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2340e23c03SJunchao Zhang ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 24c943f53fSJed Brown ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2595fce210SBarry Smith /* 2695fce210SBarry Smith * Inform roots about how many leaves and from which ranks 2795fce210SBarry Smith */ 28785e854fSJed Brown ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 29cd620004SJunchao Zhang /* Determine number, sending ranks and length of incoming */ 3095fce210SBarry Smith for (i=0; i<sf->nranks; i++) { 3195fce210SBarry Smith rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 3295fce210SBarry Smith } 3340e23c03SJunchao Zhang ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 34c943f53fSJed Brown 350b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 360b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 370b899082SJunchao Zhang small and the sorting is cheap. 380b899082SJunchao Zhang */ 390b899082SJunchao Zhang ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 400b899082SJunchao Zhang 41c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 42c943f53fSJed Brown bas->ndiranks = sf->ndranks; 43c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 44c943f53fSJed Brown ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 45c943f53fSJed Brown bas->ioffset[0] = 0; 46c943f53fSJed Brown for (i=0; i<bas->ndiranks; i++) { 47c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 48c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 49c943f53fSJed Brown } 5040e23c03SJunchao Zhang if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 5140e23c03SJunchao Zhang for (; i<bas->niranks; i++) { 52c943f53fSJed Brown bas->iranks[i] = iranks[i-bas->ndiranks]; 53c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 54c943f53fSJed Brown } 55c943f53fSJed Brown bas->itotal = bas->ioffset[i]; 5695fce210SBarry Smith ierr = PetscFree(rlengths);CHKERRQ(ierr); 57c943f53fSJed Brown ierr = PetscFree(iranks);CHKERRQ(ierr); 58c943f53fSJed Brown ierr = PetscFree(ilengths);CHKERRQ(ierr); 5995fce210SBarry Smith 6095fce210SBarry Smith /* Send leaf identities to roots */ 61c943f53fSJed Brown ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 6240e23c03SJunchao Zhang ierr = PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);CHKERRQ(ierr); 6340e23c03SJunchao Zhang for (i=bas->ndiranks; i<bas->niranks; i++) { 6440e23c03SJunchao Zhang ierr = MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRQ(ierr); 6540e23c03SJunchao Zhang } 6640e23c03SJunchao Zhang for (i=0; i<sf->nranks; i++) { 6795fce210SBarry Smith PetscMPIInt npoints; 6895fce210SBarry Smith ierr = PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);CHKERRQ(ierr); 6940e23c03SJunchao Zhang if (i < sf->ndranks) { 7040e23c03SJunchao Zhang if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 7140e23c03SJunchao Zhang if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 7240e23c03SJunchao Zhang if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 7340e23c03SJunchao Zhang ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 74c943f53fSJed Brown continue; 75c943f53fSJed Brown } 7640e23c03SJunchao Zhang ierr = MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRQ(ierr); 77bf39f1bfSJed Brown } 7840e23c03SJunchao Zhang ierr = MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7940e23c03SJunchao Zhang ierr = MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 8040e23c03SJunchao Zhang ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 8195fce210SBarry Smith 82cd620004SJunchao Zhang sf->nleafreqs = sf->nranks - sf->ndranks; 83cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 84cd620004SJunchao Zhang sf->persistent = PETSC_TRUE; 85eb02082bSJunchao Zhang 86cd620004SJunchao Zhang /* Setup fields related to packing */ 87cd620004SJunchao Zhang ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr); 8895fce210SBarry Smith PetscFunctionReturn(0); 8995fce210SBarry Smith } 9095fce210SBarry Smith 9140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 9295fce210SBarry Smith { 9395fce210SBarry Smith PetscErrorCode ierr; 94cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 9595fce210SBarry Smith 9695fce210SBarry Smith PetscFunctionBegin; 9729046d53SLisandro Dalcin if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 98c943f53fSJed Brown ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 99c943f53fSJed Brown ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 100*7fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 101*7fd2d3dbSJunchao Zhang for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);} 102eb02082bSJunchao Zhang #endif 103cd620004SJunchao Zhang ierr = PetscSFLinkDestroy(sf,&bas->avail);CHKERRQ(ierr); 104cd620004SJunchao Zhang ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr); 10595fce210SBarry Smith PetscFunctionReturn(0); 10695fce210SBarry Smith } 10795fce210SBarry Smith 10840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 10995fce210SBarry Smith { 11095fce210SBarry Smith PetscErrorCode ierr; 11195fce210SBarry Smith 11295fce210SBarry Smith PetscFunctionBegin; 113f6d956f6SStefano Zampini ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); 11495fce210SBarry Smith ierr = PetscFree(sf->data);CHKERRQ(ierr); 11595fce210SBarry Smith PetscFunctionReturn(0); 11695fce210SBarry Smith } 11795fce210SBarry Smith 11840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 11995fce210SBarry Smith { 12095fce210SBarry Smith PetscErrorCode ierr; 12195fce210SBarry Smith PetscBool iascii; 12295fce210SBarry Smith 12395fce210SBarry Smith PetscFunctionBegin; 12495fce210SBarry Smith ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 125b23bfdefSJunchao Zhang if (iascii) {ierr = PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 12695fce210SBarry Smith PetscFunctionReturn(0); 12795fce210SBarry Smith } 12895fce210SBarry Smith 129eb02082bSJunchao Zhang static PetscErrorCode PetscSFBcastAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 13095fce210SBarry Smith { 13195fce210SBarry Smith PetscErrorCode ierr; 132cd620004SJunchao Zhang PetscSFLink link = NULL; 133851d6770SJunchao Zhang MPI_Request *rootreqs = NULL,*leafreqs = NULL; 134cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 13595fce210SBarry Smith 13695fce210SBarry Smith PetscFunctionBegin; 137cd620004SJunchao Zhang /* Create a communication link, which provides buffers & MPI requests etc */ 138cd620004SJunchao Zhang ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 139cd620004SJunchao Zhang /* Get MPI requests from the link. We do not need buffers explicitly since we use persistent MPI */ 140cd620004SJunchao Zhang ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr); 141cd620004SJunchao Zhang /* Post Irecv for remote */ 142cd620004SJunchao Zhang ierr = MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRQ(ierr); 143cd620004SJunchao Zhang /* Pack rootdata and do Isend for remote */ 144cd620004SJunchao Zhang ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 145cd620004SJunchao Zhang ierr = MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRQ(ierr); 146cd620004SJunchao Zhang /* Do local BcastAndOp, which overlaps with the irecv/isend above */ 147cd620004SJunchao Zhang ierr = PetscSFLinkBcastAndOpLocal(sf,link,rootdata,leafdata,op);CHKERRQ(ierr); 14895fce210SBarry Smith PetscFunctionReturn(0); 14995fce210SBarry Smith } 15095fce210SBarry Smith 15100816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 15295fce210SBarry Smith { 15395fce210SBarry Smith PetscErrorCode ierr; 154cd620004SJunchao Zhang PetscSFLink link = NULL; 15595fce210SBarry Smith 15695fce210SBarry Smith PetscFunctionBegin; 157cd620004SJunchao Zhang /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 158cd620004SJunchao Zhang ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 159cd620004SJunchao Zhang /* Wait for the completion of mpi */ 160cd620004SJunchao Zhang ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 161cd620004SJunchao Zhang /* Unpack leafdata and reclaim the link */ 162cd620004SJunchao Zhang ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr); 163cd620004SJunchao Zhang ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 164cd620004SJunchao Zhang PetscFunctionReturn(0); 165cd620004SJunchao Zhang } 166cd620004SJunchao Zhang 167cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */ 168cd620004SJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 169cd620004SJunchao Zhang { 170cd620004SJunchao Zhang PetscErrorCode ierr; 171cd620004SJunchao Zhang PetscSFLink link; 172cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 173cd620004SJunchao Zhang MPI_Request *rootreqs = NULL,*leafreqs = NULL; 174cd620004SJunchao Zhang 175cd620004SJunchao Zhang PetscFunctionBegin; 176cd620004SJunchao Zhang ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr); 177cd620004SJunchao Zhang ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr); 178cd620004SJunchao Zhang ierr = MPI_Startall_irecv(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRQ(ierr); 179cd620004SJunchao Zhang ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 180cd620004SJunchao Zhang ierr = MPI_Startall_isend(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRQ(ierr); 181cd620004SJunchao Zhang *out = link; 18295fce210SBarry Smith PetscFunctionReturn(0); 18395fce210SBarry Smith } 18495fce210SBarry Smith 18595fce210SBarry Smith /* leaf -> root with reduction */ 186eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 18795fce210SBarry Smith { 18895fce210SBarry Smith PetscErrorCode ierr; 189cd620004SJunchao Zhang PetscSFLink link = NULL; 19095fce210SBarry Smith 19195fce210SBarry Smith PetscFunctionBegin; 192cd620004SJunchao Zhang ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 193cd620004SJunchao Zhang ierr = PetscSFLinkReduceLocal(sf,link,leafdata,rootdata,op);CHKERRQ(ierr); 19495fce210SBarry Smith PetscFunctionReturn(0); 19595fce210SBarry Smith } 19695fce210SBarry Smith 19700816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 19895fce210SBarry Smith { 19995fce210SBarry Smith PetscErrorCode ierr; 200cd620004SJunchao Zhang PetscSFLink link = NULL; 20195fce210SBarry Smith 20295fce210SBarry Smith PetscFunctionBegin; 203cd620004SJunchao Zhang ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 204cd620004SJunchao Zhang ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 205cd620004SJunchao Zhang ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr); 206cd620004SJunchao Zhang ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 20795fce210SBarry Smith PetscFunctionReturn(0); 20895fce210SBarry Smith } 20995fce210SBarry Smith 210eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 21195fce210SBarry Smith { 21295fce210SBarry Smith PetscErrorCode ierr; 213cd620004SJunchao Zhang PetscSFLink link = NULL; 21495fce210SBarry Smith 21595fce210SBarry Smith PetscFunctionBegin; 216cd620004SJunchao Zhang ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr); 217cd620004SJunchao Zhang ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 21895fce210SBarry Smith PetscFunctionReturn(0); 21995fce210SBarry Smith } 22095fce210SBarry Smith 22100816365SJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 22295fce210SBarry Smith { 22395fce210SBarry Smith PetscErrorCode ierr; 224cd620004SJunchao Zhang PetscSFLink link = NULL; 225851d6770SJunchao Zhang MPI_Request *rootreqs = NULL,*leafreqs = NULL; 226cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 22795fce210SBarry Smith 22895fce210SBarry Smith PetscFunctionBegin; 229cd620004SJunchao Zhang ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 23095fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 231cd620004SJunchao Zhang ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 232cd620004SJunchao Zhang /* Do fetch-and-op, the (remote) update results are in rootbuf */ 233cd620004SJunchao Zhang ierr = PetscSFLinkFetchRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr); 23440e23c03SJunchao Zhang 235cd620004SJunchao Zhang /* Bcast rootbuf to leafupdate */ 236cd620004SJunchao Zhang ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,NULL,NULL,&rootreqs,&leafreqs);CHKERRQ(ierr); 237cd620004SJunchao Zhang /* Post leaf receives and root sends */ 238cd620004SJunchao Zhang ierr = MPI_Startall_irecv(sf->leafbuflen[PETSCSF_REMOTE],unit,sf->nleafreqs,leafreqs);CHKERRQ(ierr); 239cd620004SJunchao Zhang ierr = MPI_Startall_isend(bas->rootbuflen[PETSCSF_REMOTE],unit,bas->nrootreqs,rootreqs);CHKERRQ(ierr); 240b23bfdefSJunchao Zhang /* Unpack and insert fetched data into leaves */ 241cd620004SJunchao Zhang ierr = PetscSFLinkMPIWaitall(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 242cd620004SJunchao Zhang ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPIU_REPLACE);CHKERRQ(ierr); 243cd620004SJunchao Zhang ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 24495fce210SBarry Smith PetscFunctionReturn(0); 24595fce210SBarry Smith } 24695fce210SBarry Smith 24740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 2488750ddebSJunchao Zhang { 2498750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 2508750ddebSJunchao Zhang 2518750ddebSJunchao Zhang PetscFunctionBegin; 2528750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 2538750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 2548750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 2558750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 2568750ddebSJunchao Zhang PetscFunctionReturn(0); 2578750ddebSJunchao Zhang } 2588750ddebSJunchao Zhang 259f659e5c7SJunchao Zhang /* An optimized PetscSFCreateEmbeddedSF. We aggresively make use of the established communication on sf. 260f659e5c7SJunchao Zhang We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 261f659e5c7SJunchao Zhang was sorted before calling the routine. 262f659e5c7SJunchao Zhang */ 263f659e5c7SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 264f659e5c7SJunchao Zhang { 265f659e5c7SJunchao Zhang PetscSF esf; 266cd620004SJunchao Zhang PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 267cd620004SJunchao Zhang PetscInt i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 268cd620004SJunchao Zhang char *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */ 269f659e5c7SJunchao Zhang PetscMPIInt *esf_ranks; 270f659e5c7SJunchao Zhang const PetscMPIInt *ranks,*iranks; 271cd620004SJunchao Zhang const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 272f659e5c7SJunchao Zhang PetscBool connected; 273f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 274f659e5c7SJunchao Zhang PetscSF_Basic *bas; 275f659e5c7SJunchao Zhang PetscErrorCode ierr; 276f659e5c7SJunchao Zhang 277f659e5c7SJunchao Zhang PetscFunctionBegin; 278f659e5c7SJunchao Zhang ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 279f659e5c7SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 280f659e5c7SJunchao Zhang 281cd620004SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 282f659e5c7SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 283f659e5c7SJunchao Zhang ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 284cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 285cd620004SJunchao Zhang ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 286cd620004SJunchao Zhang leafdata = leafmem - minleaf; 287f659e5c7SJunchao Zhang /* Tag selected roots */ 288f659e5c7SJunchao Zhang for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 289f659e5c7SJunchao Zhang 290cd620004SJunchao Zhang ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata);CHKERRQ(ierr); 291cd620004SJunchao Zhang ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata);CHKERRQ(ierr); 292f659e5c7SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 293cd620004SJunchao Zhang esf_nranks = esf_ndranks = esf_nleaves = 0; 294b23bfdefSJunchao Zhang for (i=0; i<nranks; i++) { 295cd620004SJunchao Zhang connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 296cd620004SJunchao Zhang for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}} 297f659e5c7SJunchao Zhang if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 298f659e5c7SJunchao Zhang } 299f659e5c7SJunchao Zhang 300f659e5c7SJunchao Zhang /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 301cd620004SJunchao Zhang ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 302cd620004SJunchao Zhang ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 303cd620004SJunchao Zhang ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr); 304f659e5c7SJunchao Zhang p = 0; /* Counter for connected root ranks */ 305f659e5c7SJunchao Zhang q = 0; /* Counter for connected leaves */ 306f659e5c7SJunchao Zhang esf_roffset[0] = 0; 307f659e5c7SJunchao Zhang for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 308f659e5c7SJunchao Zhang connected = PETSC_FALSE; 309cd620004SJunchao Zhang for (j=roffset[i]; j<roffset[i+1]; j++) { 310cd620004SJunchao Zhang if (leafdata[rmine[j]]) { 311f659e5c7SJunchao Zhang esf_rmine[q] = new_ilocal[q] = rmine[j]; 312f659e5c7SJunchao Zhang esf_rremote[q] = rremote[j]; 313f659e5c7SJunchao Zhang new_iremote[q].index = rremote[j]; 314f659e5c7SJunchao Zhang new_iremote[q].rank = ranks[i]; 315f659e5c7SJunchao Zhang connected = PETSC_TRUE; 316f659e5c7SJunchao Zhang q++; 317f659e5c7SJunchao Zhang } 318f659e5c7SJunchao Zhang } 319f659e5c7SJunchao Zhang if (connected) { 320f659e5c7SJunchao Zhang esf_ranks[p] = ranks[i]; 321f659e5c7SJunchao Zhang esf_roffset[p+1] = q; 322f659e5c7SJunchao Zhang p++; 323f659e5c7SJunchao Zhang } 324f659e5c7SJunchao Zhang } 325f659e5c7SJunchao Zhang 326f659e5c7SJunchao Zhang /* SetGraph internally resets the SF, so we only set its fields after the call */ 327cd620004SJunchao Zhang ierr = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 328f659e5c7SJunchao Zhang esf->nranks = esf_nranks; 329f659e5c7SJunchao Zhang esf->ndranks = esf_ndranks; 330f659e5c7SJunchao Zhang esf->ranks = esf_ranks; 331f659e5c7SJunchao Zhang esf->roffset = esf_roffset; 332f659e5c7SJunchao Zhang esf->rmine = esf_rmine; 333f659e5c7SJunchao Zhang esf->rremote = esf_rremote; 334cd620004SJunchao Zhang esf->nleafreqs = esf_nranks - esf_ndranks; 335f659e5c7SJunchao Zhang 336f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 337f659e5c7SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 338f659e5c7SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 339f659e5c7SJunchao Zhang /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 340cd620004SJunchao Zhang we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 341f659e5c7SJunchao Zhang */ 342f659e5c7SJunchao Zhang ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 343f659e5c7SJunchao Zhang ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 344f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 345f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 346f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 347f659e5c7SJunchao Zhang for (i=0; i<niranks; i++) { 348f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 349f659e5c7SJunchao Zhang for (j=ioffset[i]; j<ioffset[i+1]; j++) { 350cd620004SJunchao Zhang if (rootdata[irootloc[j]]) { 351f659e5c7SJunchao Zhang bas->irootloc[q++] = irootloc[j]; 352f659e5c7SJunchao Zhang connected = PETSC_TRUE; 353f659e5c7SJunchao Zhang } 354f659e5c7SJunchao Zhang } 355f659e5c7SJunchao Zhang if (connected) { 356f659e5c7SJunchao Zhang bas->niranks++; 357f659e5c7SJunchao Zhang if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 358f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 359f659e5c7SJunchao Zhang bas->ioffset[p+1] = q; 360f659e5c7SJunchao Zhang p++; 361f659e5c7SJunchao Zhang } 362f659e5c7SJunchao Zhang } 363f659e5c7SJunchao Zhang bas->itotal = q; 364cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 365cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 366cd620004SJunchao Zhang /* Setup packing related fields */ 367cd620004SJunchao Zhang ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr); 368f659e5c7SJunchao Zhang 369f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 370cd620004SJunchao Zhang ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 371f659e5c7SJunchao Zhang *newsf = esf; 372f659e5c7SJunchao Zhang PetscFunctionReturn(0); 373f659e5c7SJunchao Zhang } 374f659e5c7SJunchao Zhang 3758cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 37695fce210SBarry Smith { 37740e23c03SJunchao Zhang PetscSF_Basic *dat; 37895fce210SBarry Smith PetscErrorCode ierr; 37995fce210SBarry Smith 38095fce210SBarry Smith PetscFunctionBegin; 38195fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 38295fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 38395fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 38495fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 3853482bfa8SJunchao Zhang sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Basic; 3863482bfa8SJunchao Zhang sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Basic; 38795fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 38895fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 38995fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 39095fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 3918750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 392f659e5c7SJunchao Zhang sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Basic; 39395fce210SBarry Smith 39440e23c03SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 39540e23c03SJunchao Zhang sf->data = (void*)dat; 39695fce210SBarry Smith PetscFunctionReturn(0); 39795fce210SBarry Smith } 398