140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h> 353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 4b23bfdefSJunchao Zhang 540e23c03SJunchao Zhang /*===================================================================================*/ 640e23c03SJunchao Zhang /* SF public interface implementations */ 740e23c03SJunchao Zhang /*===================================================================================*/ 840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 995fce210SBarry Smith { 10b23bfdefSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 1171438e86SJunchao Zhang PetscInt *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks; 1240e23c03SJunchao Zhang PetscMPIInt rank,niranks,*iranks,tag; 1395fce210SBarry Smith MPI_Comm comm; 14b5a8e515SJed Brown MPI_Group group; 1540e23c03SJunchao Zhang MPI_Request *rootreqs,*leafreqs; 1695fce210SBarry Smith 1795fce210SBarry Smith PetscFunctionBegin; 18*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_group(PETSC_COMM_SELF,&group)); 19*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFSetUpRanks(sf,group)); 20*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Group_free(&group)); 21*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectGetComm((PetscObject)sf,&comm)); 22*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectGetNewTag((PetscObject)sf,&tag)); 23*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(comm,&rank)); 2495fce210SBarry Smith /* 2595fce210SBarry Smith * Inform roots about how many leaves and from which ranks 2695fce210SBarry Smith */ 27*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(sf->nranks,&rlengths)); 28cd620004SJunchao Zhang /* Determine number, sending ranks and length of incoming */ 2995fce210SBarry Smith for (i=0; i<sf->nranks; i++) { 3095fce210SBarry Smith rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 3195fce210SBarry Smith } 3271438e86SJunchao Zhang nRemoteRootRanks = sf->nranks-sf->ndranks; 33*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths)); 34c943f53fSJed Brown 350b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 360b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 370b899082SJunchao Zhang small and the sorting is cheap. 380b899082SJunchao Zhang */ 39*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSortMPIIntWithIntArray(niranks,iranks,ilengths)); 400b899082SJunchao Zhang 41c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 42c943f53fSJed Brown bas->ndiranks = sf->ndranks; 43c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 44*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset)); 45c943f53fSJed Brown bas->ioffset[0] = 0; 46c943f53fSJed Brown for (i=0; i<bas->ndiranks; i++) { 47c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 48c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 49c943f53fSJed Brown } 502c71b3e2SJacob Faibussowitsch PetscCheckFalse(bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 5140e23c03SJunchao Zhang for (; i<bas->niranks; i++) { 52c943f53fSJed Brown bas->iranks[i] = iranks[i-bas->ndiranks]; 53c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 54c943f53fSJed Brown } 55c943f53fSJed Brown bas->itotal = bas->ioffset[i]; 56*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(rlengths)); 57*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(iranks)); 58*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(ilengths)); 5995fce210SBarry Smith 6095fce210SBarry Smith /* Send leaf identities to roots */ 6171438e86SJunchao Zhang nRemoteLeafRanks = bas->niranks-bas->ndiranks; 62*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(bas->itotal,&bas->irootloc)); 63*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs)); 6440e23c03SJunchao Zhang for (i=bas->ndiranks; i<bas->niranks; i++) { 65*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPIU_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks])); 6640e23c03SJunchao Zhang } 6740e23c03SJunchao Zhang for (i=0; i<sf->nranks; i++) { 68c87b50c4SJunchao Zhang PetscInt npoints = sf->roffset[i+1] - sf->roffset[i]; 6940e23c03SJunchao Zhang if (i < sf->ndranks) { 702c71b3e2SJacob Faibussowitsch PetscCheckFalse(sf->ranks[i] != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 712c71b3e2SJacob Faibussowitsch PetscCheckFalse(bas->iranks[0] != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 722c71b3e2SJacob Faibussowitsch PetscCheckFalse(npoints != bas->ioffset[1]-bas->ioffset[0],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 73*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints)); 74c943f53fSJed Brown continue; 75c943f53fSJed Brown } 76*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPIU_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks])); 77bf39f1bfSJed Brown } 78*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE)); 79*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE)); 8095fce210SBarry Smith 8171438e86SJunchao Zhang sf->nleafreqs = nRemoteRootRanks; 8271438e86SJunchao Zhang bas->nrootreqs = nRemoteLeafRanks; 83cd620004SJunchao Zhang sf->persistent = PETSC_TRUE; 84eb02082bSJunchao Zhang 8571438e86SJunchao Zhang /* Setup fields related to packing, such as rootbuflen[] */ 86*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFSetUpPackFields(sf)); 87*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(rootreqs,leafreqs)); 8895fce210SBarry Smith PetscFunctionReturn(0); 8995fce210SBarry Smith } 9095fce210SBarry Smith 9140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 9295fce210SBarry Smith { 93cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 9471438e86SJunchao Zhang PetscSFLink link = bas->avail,next; 9595fce210SBarry Smith 9695fce210SBarry Smith PetscFunctionBegin; 972c71b3e2SJacob Faibussowitsch PetscCheckFalse(bas->inuse,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 98*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(bas->iranks,bas->ioffset)); 99*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(bas->irootloc)); 10071438e86SJunchao Zhang 1017fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 102*5f80ce2aSJacob Faibussowitsch for (PetscInt i=0; i<2; i++) CHKERRQ(PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i])); 103eb02082bSJunchao Zhang #endif 10471438e86SJunchao Zhang 10571438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 106*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFReset_Basic_NVSHMEM(sf)); 10771438e86SJunchao Zhang #endif 10871438e86SJunchao Zhang 109*5f80ce2aSJacob Faibussowitsch for (; link; link=next) {next = link->next; CHKERRQ(PetscSFLinkDestroy(sf,link));} 11071438e86SJunchao Zhang bas->avail = NULL; 111*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFResetPackFields(sf)); 11295fce210SBarry Smith PetscFunctionReturn(0); 11395fce210SBarry Smith } 11495fce210SBarry Smith 11540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 11695fce210SBarry Smith { 11795fce210SBarry Smith PetscFunctionBegin; 118*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFReset_Basic(sf)); 119*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree(sf->data)); 12095fce210SBarry Smith PetscFunctionReturn(0); 12195fce210SBarry Smith } 12295fce210SBarry Smith 12362152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 12462152dedSBarry Smith #include <petscmat.h> 12562152dedSBarry Smith 12662152dedSBarry Smith PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer) 12762152dedSBarry Smith { 12862152dedSBarry Smith PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 12953dd6d7dSJunchao Zhang PetscInt i,nrootranks,ndrootranks; 13062152dedSBarry Smith const PetscInt *rootoffset; 13162152dedSBarry Smith PetscMPIInt rank,size; 13253dd6d7dSJunchao Zhang const PetscMPIInt *rootranks; 13362152dedSBarry Smith MPI_Comm comm = PetscObjectComm((PetscObject)sf); 13453dd6d7dSJunchao Zhang PetscScalar unitbytes; 13562152dedSBarry Smith Mat A; 13662152dedSBarry Smith 13762152dedSBarry Smith PetscFunctionBegin; 138*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_size(comm,&size)); 139*5f80ce2aSJacob Faibussowitsch CHKERRMPI(MPI_Comm_rank(comm,&rank)); 14053dd6d7dSJunchao Zhang /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 14153dd6d7dSJunchao Zhang PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 14253dd6d7dSJunchao Zhang in senders' view point: how many bytes I will send to my neighbors. 14353dd6d7dSJunchao Zhang 14453dd6d7dSJunchao Zhang Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 14553dd6d7dSJunchao Zhang 14653dd6d7dSJunchao Zhang If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 14753dd6d7dSJunchao Zhang different interpretations for the same SF for different data types. Since we most care about VecScatter, 14853dd6d7dSJunchao Zhang we uniformly treat each vertex as a PetscScalar. 14953dd6d7dSJunchao Zhang */ 15053dd6d7dSJunchao Zhang unitbytes = (PetscScalar)sizeof(PetscScalar); 15153dd6d7dSJunchao Zhang 152*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL)); 153*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A)); 154*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetOptionsPrefix(A,"__petsc_internal__")); /* To prevent the internal A from taking any command line options */ 15562152dedSBarry Smith for (i=0; i<nrootranks; i++) { 156*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES)); 15762152dedSBarry Smith } 158*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 159*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 160*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatView(A,viewer)); 161*5f80ce2aSJacob Faibussowitsch CHKERRQ(MatDestroy(&A)); 16262152dedSBarry Smith PetscFunctionReturn(0); 16362152dedSBarry Smith } 16462152dedSBarry Smith #endif 16562152dedSBarry Smith 16640e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 16795fce210SBarry Smith { 16853dd6d7dSJunchao Zhang PetscBool isascii; 16995fce210SBarry Smith 17095fce210SBarry Smith PetscFunctionBegin; 171*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 172*5f80ce2aSJacob Faibussowitsch if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) CHKERRQ(PetscViewerASCIIPrintf(viewer," MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered")); 17362152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 17453dd6d7dSJunchao Zhang else { 17553dd6d7dSJunchao Zhang PetscBool isdraw,isbinary; 176*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 177*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 17853dd6d7dSJunchao Zhang if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) { 179*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFView_Basic_PatternAndSizes(sf,viewer)); 18053dd6d7dSJunchao Zhang } 18162152dedSBarry Smith } 18262152dedSBarry Smith #endif 18395fce210SBarry Smith PetscFunctionReturn(0); 18495fce210SBarry Smith } 18595fce210SBarry Smith 186ad227feaSJunchao Zhang static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 18795fce210SBarry Smith { 188cd620004SJunchao Zhang PetscSFLink link = NULL; 18995fce210SBarry Smith 19095fce210SBarry Smith PetscFunctionBegin; 19171438e86SJunchao Zhang /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 192*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link)); 19371438e86SJunchao Zhang /* Pack rootdata to rootbuf for remote communication */ 194*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata)); 19571438e86SJunchao Zhang /* Start communcation, e.g., post MPI_Isend */ 196*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF)); 19771438e86SJunchao Zhang /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 198*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op)); 19995fce210SBarry Smith PetscFunctionReturn(0); 20095fce210SBarry Smith } 20195fce210SBarry Smith 202ad227feaSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 20395fce210SBarry Smith { 204cd620004SJunchao Zhang PetscSFLink link = NULL; 20595fce210SBarry Smith 20695fce210SBarry Smith PetscFunctionBegin; 207cd620004SJunchao Zhang /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 208*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link)); 20971438e86SJunchao Zhang /* Finish remote communication, e.g., post MPI_Waitall */ 210*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF)); 21171438e86SJunchao Zhang /* Unpack data in leafbuf to leafdata for remote communication */ 212*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op)); 21371438e86SJunchao Zhang /* Recycle the link */ 214*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkReclaim(sf,&link)); 215cd620004SJunchao Zhang PetscFunctionReturn(0); 216cd620004SJunchao Zhang } 217cd620004SJunchao Zhang 218cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */ 2199fbee547SJacob Faibussowitsch static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 220cd620004SJunchao Zhang { 22171438e86SJunchao Zhang PetscSFLink link = NULL; 222cd620004SJunchao Zhang 223cd620004SJunchao Zhang PetscFunctionBegin; 224*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link)); 225*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata)); 226*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT)); 227cd620004SJunchao Zhang *out = link; 22895fce210SBarry Smith PetscFunctionReturn(0); 22995fce210SBarry Smith } 23095fce210SBarry Smith 23195fce210SBarry Smith /* leaf -> root with reduction */ 232eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 23395fce210SBarry Smith { 234cd620004SJunchao Zhang PetscSFLink link = NULL; 23595fce210SBarry Smith 23695fce210SBarry Smith PetscFunctionBegin; 237*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link)); 238*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op)); 23995fce210SBarry Smith PetscFunctionReturn(0); 24095fce210SBarry Smith } 24195fce210SBarry Smith 24200816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 24395fce210SBarry Smith { 244cd620004SJunchao Zhang PetscSFLink link = NULL; 24595fce210SBarry Smith 24695fce210SBarry Smith PetscFunctionBegin; 247*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link)); 248*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT)); 249*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op)); 250*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkReclaim(sf,&link)); 25195fce210SBarry Smith PetscFunctionReturn(0); 25295fce210SBarry Smith } 25395fce210SBarry Smith 254eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 25595fce210SBarry Smith { 256cd620004SJunchao Zhang PetscSFLink link = NULL; 25795fce210SBarry Smith 25895fce210SBarry Smith PetscFunctionBegin; 259*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link)); 260*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op)); 26195fce210SBarry Smith PetscFunctionReturn(0); 26295fce210SBarry Smith } 26395fce210SBarry Smith 26400816365SJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 26595fce210SBarry Smith { 266cd620004SJunchao Zhang PetscSFLink link = NULL; 26795fce210SBarry Smith 26895fce210SBarry Smith PetscFunctionBegin; 269*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link)); 27095fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 271*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT)); 272cd620004SJunchao Zhang /* Do fetch-and-op, the (remote) update results are in rootbuf */ 273*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op)); 274cd620004SJunchao Zhang /* Bcast rootbuf to leafupdate */ 275*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF)); 276*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF)); 277b23bfdefSJunchao Zhang /* Unpack and insert fetched data into leaves */ 278*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE)); 279*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFLinkReclaim(sf,&link)); 28095fce210SBarry Smith PetscFunctionReturn(0); 28195fce210SBarry Smith } 28295fce210SBarry Smith 28340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 2848750ddebSJunchao Zhang { 2858750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 2868750ddebSJunchao Zhang 2878750ddebSJunchao Zhang PetscFunctionBegin; 2888750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 2898750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 2908750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 2918750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 2928750ddebSJunchao Zhang PetscFunctionReturn(0); 2938750ddebSJunchao Zhang } 2948750ddebSJunchao Zhang 29572502a1fSJunchao Zhang /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf. 296f659e5c7SJunchao Zhang We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 297f659e5c7SJunchao Zhang was sorted before calling the routine. 298f659e5c7SJunchao Zhang */ 29972502a1fSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 300f659e5c7SJunchao Zhang { 301f659e5c7SJunchao Zhang PetscSF esf; 302cd620004SJunchao Zhang PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 303cd620004SJunchao Zhang PetscInt i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 304cd620004SJunchao Zhang char *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */ 305f659e5c7SJunchao Zhang PetscMPIInt *esf_ranks; 306f659e5c7SJunchao Zhang const PetscMPIInt *ranks,*iranks; 307cd620004SJunchao Zhang const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 308f659e5c7SJunchao Zhang PetscBool connected; 309f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 310f659e5c7SJunchao Zhang PetscSF_Basic *bas; 311f659e5c7SJunchao Zhang 312f659e5c7SJunchao Zhang PetscFunctionBegin; 313*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf)); 314*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFSetFromOptions(esf)); 315*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFSetType(esf,PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 316f659e5c7SJunchao Zhang 317cd620004SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 318*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL)); 319*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFGetLeafRange(sf,&minleaf,&maxleaf)); 320cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 321*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem)); 322cd620004SJunchao Zhang leafdata = leafmem - minleaf; 323f659e5c7SJunchao Zhang /* Tag selected roots */ 324f659e5c7SJunchao Zhang for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 325f659e5c7SJunchao Zhang 326*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE)); 327*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE)); 328*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote)); /* Get send info */ 329cd620004SJunchao Zhang esf_nranks = esf_ndranks = esf_nleaves = 0; 330b23bfdefSJunchao Zhang for (i=0; i<nranks; i++) { 331cd620004SJunchao Zhang connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 332cd620004SJunchao Zhang for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}} 333f659e5c7SJunchao Zhang if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 334f659e5c7SJunchao Zhang } 335f659e5c7SJunchao Zhang 336f659e5c7SJunchao Zhang /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 337*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(esf_nleaves,&new_ilocal)); 338*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(esf_nleaves,&new_iremote)); 339*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote)); 340f659e5c7SJunchao Zhang p = 0; /* Counter for connected root ranks */ 341f659e5c7SJunchao Zhang q = 0; /* Counter for connected leaves */ 342f659e5c7SJunchao Zhang esf_roffset[0] = 0; 343f659e5c7SJunchao Zhang for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 344f659e5c7SJunchao Zhang connected = PETSC_FALSE; 345cd620004SJunchao Zhang for (j=roffset[i]; j<roffset[i+1]; j++) { 346cd620004SJunchao Zhang if (leafdata[rmine[j]]) { 347f659e5c7SJunchao Zhang esf_rmine[q] = new_ilocal[q] = rmine[j]; 348f659e5c7SJunchao Zhang esf_rremote[q] = rremote[j]; 349f659e5c7SJunchao Zhang new_iremote[q].index = rremote[j]; 350f659e5c7SJunchao Zhang new_iremote[q].rank = ranks[i]; 351f659e5c7SJunchao Zhang connected = PETSC_TRUE; 352f659e5c7SJunchao Zhang q++; 353f659e5c7SJunchao Zhang } 354f659e5c7SJunchao Zhang } 355f659e5c7SJunchao Zhang if (connected) { 356f659e5c7SJunchao Zhang esf_ranks[p] = ranks[i]; 357f659e5c7SJunchao Zhang esf_roffset[p+1] = q; 358f659e5c7SJunchao Zhang p++; 359f659e5c7SJunchao Zhang } 360f659e5c7SJunchao Zhang } 361f659e5c7SJunchao Zhang 362f659e5c7SJunchao Zhang /* SetGraph internally resets the SF, so we only set its fields after the call */ 363*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER)); 364f659e5c7SJunchao Zhang esf->nranks = esf_nranks; 365f659e5c7SJunchao Zhang esf->ndranks = esf_ndranks; 366f659e5c7SJunchao Zhang esf->ranks = esf_ranks; 367f659e5c7SJunchao Zhang esf->roffset = esf_roffset; 368f659e5c7SJunchao Zhang esf->rmine = esf_rmine; 369f659e5c7SJunchao Zhang esf->rremote = esf_rremote; 370cd620004SJunchao Zhang esf->nleafreqs = esf_nranks - esf_ndranks; 371f659e5c7SJunchao Zhang 372f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 373f659e5c7SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 374*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc)); /* Get recv info */ 375f659e5c7SJunchao Zhang /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 376cd620004SJunchao Zhang we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 377f659e5c7SJunchao Zhang */ 378*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset)); 379*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscMalloc1(ioffset[niranks],&bas->irootloc)); 380f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 381f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 382f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 383f659e5c7SJunchao Zhang for (i=0; i<niranks; i++) { 384f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 385f659e5c7SJunchao Zhang for (j=ioffset[i]; j<ioffset[i+1]; j++) { 386cd620004SJunchao Zhang if (rootdata[irootloc[j]]) { 387f659e5c7SJunchao Zhang bas->irootloc[q++] = irootloc[j]; 388f659e5c7SJunchao Zhang connected = PETSC_TRUE; 389f659e5c7SJunchao Zhang } 390f659e5c7SJunchao Zhang } 391f659e5c7SJunchao Zhang if (connected) { 392f659e5c7SJunchao Zhang bas->niranks++; 393f659e5c7SJunchao Zhang if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 394f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 395f659e5c7SJunchao Zhang bas->ioffset[p+1] = q; 396f659e5c7SJunchao Zhang p++; 397f659e5c7SJunchao Zhang } 398f659e5c7SJunchao Zhang } 399f659e5c7SJunchao Zhang bas->itotal = q; 400cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 401cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 402cd620004SJunchao Zhang /* Setup packing related fields */ 403*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscSFSetUpPackFields(esf)); 404f659e5c7SJunchao Zhang 40520c24465SJunchao Zhang /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 40620c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 40720c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_CUDA) { 40871438e86SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_CUDA; 40971438e86SJunchao Zhang esf->ops->Free = PetscSFFree_CUDA; 41020c24465SJunchao Zhang } 41120c24465SJunchao Zhang #endif 41220c24465SJunchao Zhang 41359af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 41459af0bd3SScott Kruger /* TODO: Needs debugging */ 41559af0bd3SScott Kruger if (esf->backend == PETSCSF_BACKEND_HIP) { 41659af0bd3SScott Kruger esf->ops->Malloc = PetscSFMalloc_HIP; 41759af0bd3SScott Kruger esf->ops->Free = PetscSFFree_HIP; 41859af0bd3SScott Kruger } 41959af0bd3SScott Kruger #endif 42059af0bd3SScott Kruger 42120c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 42220c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 42320c24465SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_Kokkos; 42420c24465SJunchao Zhang esf->ops->Free = PetscSFFree_Kokkos; 42520c24465SJunchao Zhang } 42620c24465SJunchao Zhang #endif 427f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 428*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscFree2(rootdata,leafmem)); 429f659e5c7SJunchao Zhang *newsf = esf; 430f659e5c7SJunchao Zhang PetscFunctionReturn(0); 431f659e5c7SJunchao Zhang } 432f659e5c7SJunchao Zhang 4338cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 43495fce210SBarry Smith { 43540e23c03SJunchao Zhang PetscSF_Basic *dat; 43695fce210SBarry Smith 43795fce210SBarry Smith PetscFunctionBegin; 43895fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 43995fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 44095fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 44195fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 442ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 443ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 44495fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 44595fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 44695fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 44795fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 4488750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 44972502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 45095fce210SBarry Smith 451*5f80ce2aSJacob Faibussowitsch CHKERRQ(PetscNewLog(sf,&dat)); 45240e23c03SJunchao Zhang sf->data = (void*)dat; 45395fce210SBarry Smith PetscFunctionReturn(0); 45495fce210SBarry Smith } 455