140e23c03SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfbasic.h> 2cd620004SJunchao Zhang #include <../src/vec/is/sf/impls/basic/sfpack.h> 353dd6d7dSJunchao Zhang #include <petsc/private/viewerimpl.h> 4b23bfdefSJunchao Zhang 540e23c03SJunchao Zhang /*===================================================================================*/ 640e23c03SJunchao Zhang /* SF public interface implementations */ 740e23c03SJunchao Zhang /*===================================================================================*/ 840e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF sf) 995fce210SBarry Smith { 1095fce210SBarry Smith PetscErrorCode ierr; 11b23bfdefSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 1271438e86SJunchao Zhang PetscInt *rlengths,*ilengths,i,nRemoteRootRanks,nRemoteLeafRanks; 1340e23c03SJunchao Zhang PetscMPIInt rank,niranks,*iranks,tag; 1495fce210SBarry Smith MPI_Comm comm; 15b5a8e515SJed Brown MPI_Group group; 1640e23c03SJunchao Zhang MPI_Request *rootreqs,*leafreqs; 1795fce210SBarry Smith 1895fce210SBarry Smith PetscFunctionBegin; 19ffc4695bSBarry Smith ierr = MPI_Comm_group(PETSC_COMM_SELF,&group);CHKERRMPI(ierr); 20b5a8e515SJed Brown ierr = PetscSFSetUpRanks(sf,group);CHKERRQ(ierr); 21ffc4695bSBarry Smith ierr = MPI_Group_free(&group);CHKERRMPI(ierr); 2295fce210SBarry Smith ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 2340e23c03SJunchao Zhang ierr = PetscObjectGetNewTag((PetscObject)sf,&tag);CHKERRQ(ierr); 24ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 2595fce210SBarry Smith /* 2695fce210SBarry Smith * Inform roots about how many leaves and from which ranks 2795fce210SBarry Smith */ 28785e854fSJed Brown ierr = PetscMalloc1(sf->nranks,&rlengths);CHKERRQ(ierr); 29cd620004SJunchao Zhang /* Determine number, sending ranks and length of incoming */ 3095fce210SBarry Smith for (i=0; i<sf->nranks; i++) { 3195fce210SBarry Smith rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */ 3295fce210SBarry Smith } 3371438e86SJunchao Zhang nRemoteRootRanks = sf->nranks-sf->ndranks; 3471438e86SJunchao Zhang ierr = PetscCommBuildTwoSided(comm,1,MPIU_INT,nRemoteRootRanks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);CHKERRQ(ierr); 35c943f53fSJed Brown 360b899082SJunchao Zhang /* Sort iranks. See use of VecScatterGetRemoteOrdered_Private() in MatGetBrowsOfAoCols_MPIAIJ() on why. 370b899082SJunchao Zhang We could sort ranks there at the price of allocating extra working arrays. Presumably, niranks is 380b899082SJunchao Zhang small and the sorting is cheap. 390b899082SJunchao Zhang */ 400b899082SJunchao Zhang ierr = PetscSortMPIIntWithIntArray(niranks,iranks,ilengths);CHKERRQ(ierr); 410b899082SJunchao Zhang 42c943f53fSJed Brown /* Partition into distinguished and non-distinguished incoming ranks */ 43c943f53fSJed Brown bas->ndiranks = sf->ndranks; 44c943f53fSJed Brown bas->niranks = bas->ndiranks + niranks; 45c943f53fSJed Brown ierr = PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);CHKERRQ(ierr); 46c943f53fSJed Brown bas->ioffset[0] = 0; 47c943f53fSJed Brown for (i=0; i<bas->ndiranks; i++) { 48c943f53fSJed Brown bas->iranks[i] = sf->ranks[i]; 49c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i]; 50c943f53fSJed Brown } 51*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank),PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks"); 5240e23c03SJunchao Zhang for (; i<bas->niranks; i++) { 53c943f53fSJed Brown bas->iranks[i] = iranks[i-bas->ndiranks]; 54c943f53fSJed Brown bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks]; 55c943f53fSJed Brown } 56c943f53fSJed Brown bas->itotal = bas->ioffset[i]; 5795fce210SBarry Smith ierr = PetscFree(rlengths);CHKERRQ(ierr); 58c943f53fSJed Brown ierr = PetscFree(iranks);CHKERRQ(ierr); 59c943f53fSJed Brown ierr = PetscFree(ilengths);CHKERRQ(ierr); 6095fce210SBarry Smith 6195fce210SBarry Smith /* Send leaf identities to roots */ 6271438e86SJunchao Zhang nRemoteLeafRanks = bas->niranks-bas->ndiranks; 63c943f53fSJed Brown ierr = PetscMalloc1(bas->itotal,&bas->irootloc);CHKERRQ(ierr); 6471438e86SJunchao Zhang ierr = PetscMalloc2(nRemoteLeafRanks,&rootreqs,nRemoteRootRanks,&leafreqs);CHKERRQ(ierr); 6540e23c03SJunchao Zhang for (i=bas->ndiranks; i<bas->niranks; i++) { 66c87b50c4SJunchao Zhang ierr = MPIU_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],tag,comm,&rootreqs[i-bas->ndiranks]);CHKERRMPI(ierr); 6740e23c03SJunchao Zhang } 6840e23c03SJunchao Zhang for (i=0; i<sf->nranks; i++) { 69c87b50c4SJunchao Zhang PetscInt npoints = sf->roffset[i+1] - sf->roffset[i]; 7040e23c03SJunchao Zhang if (i < sf->ndranks) { 71*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(sf->ranks[i] != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank"); 72*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(bas->iranks[0] != rank,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank"); 73*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(npoints != bas->ioffset[1]-bas->ioffset[0],PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths"); 7440e23c03SJunchao Zhang ierr = PetscArraycpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints);CHKERRQ(ierr); 75c943f53fSJed Brown continue; 76c943f53fSJed Brown } 77c87b50c4SJunchao Zhang ierr = MPIU_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],tag,comm,&leafreqs[i-sf->ndranks]);CHKERRMPI(ierr); 78bf39f1bfSJed Brown } 7971438e86SJunchao Zhang ierr = MPI_Waitall(nRemoteLeafRanks,rootreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8071438e86SJunchao Zhang ierr = MPI_Waitall(nRemoteRootRanks,leafreqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr); 8195fce210SBarry Smith 8271438e86SJunchao Zhang sf->nleafreqs = nRemoteRootRanks; 8371438e86SJunchao Zhang bas->nrootreqs = nRemoteLeafRanks; 84cd620004SJunchao Zhang sf->persistent = PETSC_TRUE; 85eb02082bSJunchao Zhang 8671438e86SJunchao Zhang /* Setup fields related to packing, such as rootbuflen[] */ 87cd620004SJunchao Zhang ierr = PetscSFSetUpPackFields(sf);CHKERRQ(ierr); 8871438e86SJunchao Zhang ierr = PetscFree2(rootreqs,leafreqs);CHKERRQ(ierr); 8995fce210SBarry Smith PetscFunctionReturn(0); 9095fce210SBarry Smith } 9195fce210SBarry Smith 9240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF sf) 9395fce210SBarry Smith { 9495fce210SBarry Smith PetscErrorCode ierr; 95cd620004SJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 9671438e86SJunchao Zhang PetscSFLink link = bas->avail,next; 9795fce210SBarry Smith 9895fce210SBarry Smith PetscFunctionBegin; 99*2c71b3e2SJacob Faibussowitsch PetscCheckFalse(bas->inuse,PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed"); 100c943f53fSJed Brown ierr = PetscFree2(bas->iranks,bas->ioffset);CHKERRQ(ierr); 101c943f53fSJed Brown ierr = PetscFree(bas->irootloc);CHKERRQ(ierr); 10271438e86SJunchao Zhang 1037fd2d3dbSJunchao Zhang #if defined(PETSC_HAVE_DEVICE) 10420c24465SJunchao Zhang for (PetscInt i=0; i<2; i++) {ierr = PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,bas->irootloc_d[i]);CHKERRQ(ierr);} 105eb02082bSJunchao Zhang #endif 10671438e86SJunchao Zhang 10771438e86SJunchao Zhang #if defined(PETSC_HAVE_NVSHMEM) 10871438e86SJunchao Zhang ierr = PetscSFReset_Basic_NVSHMEM(sf);CHKERRQ(ierr); 10971438e86SJunchao Zhang #endif 11071438e86SJunchao Zhang 11171438e86SJunchao Zhang for (; link; link=next) {next = link->next; ierr = PetscSFLinkDestroy(sf,link);CHKERRQ(ierr);} 11271438e86SJunchao Zhang bas->avail = NULL; 113cd620004SJunchao Zhang ierr = PetscSFResetPackFields(sf);CHKERRQ(ierr); 11495fce210SBarry Smith PetscFunctionReturn(0); 11595fce210SBarry Smith } 11695fce210SBarry Smith 11740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF sf) 11895fce210SBarry Smith { 11995fce210SBarry Smith PetscErrorCode ierr; 12095fce210SBarry Smith 12195fce210SBarry Smith PetscFunctionBegin; 122f6d956f6SStefano Zampini ierr = PetscSFReset_Basic(sf);CHKERRQ(ierr); 12395fce210SBarry Smith ierr = PetscFree(sf->data);CHKERRQ(ierr); 12495fce210SBarry Smith PetscFunctionReturn(0); 12595fce210SBarry Smith } 12695fce210SBarry Smith 12762152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 12862152dedSBarry Smith #include <petscmat.h> 12962152dedSBarry Smith 13062152dedSBarry Smith PETSC_INTERN PetscErrorCode PetscSFView_Basic_PatternAndSizes(PetscSF sf,PetscViewer viewer) 13162152dedSBarry Smith { 13262152dedSBarry Smith PetscErrorCode ierr; 13362152dedSBarry Smith PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 13453dd6d7dSJunchao Zhang PetscInt i,nrootranks,ndrootranks; 13562152dedSBarry Smith const PetscInt *rootoffset; 13662152dedSBarry Smith PetscMPIInt rank,size; 13753dd6d7dSJunchao Zhang const PetscMPIInt *rootranks; 13862152dedSBarry Smith MPI_Comm comm = PetscObjectComm((PetscObject)sf); 13953dd6d7dSJunchao Zhang PetscScalar unitbytes; 14062152dedSBarry Smith Mat A; 14162152dedSBarry Smith 14262152dedSBarry Smith PetscFunctionBegin; 14362152dedSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 14462152dedSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 14553dd6d7dSJunchao Zhang /* PetscSFView is most useful for the SF used in VecScatterBegin/End in MatMult etc, where we do 14653dd6d7dSJunchao Zhang PetscSFBcast, i.e., roots send data to leaves. We dump the communication pattern into a matrix 14753dd6d7dSJunchao Zhang in senders' view point: how many bytes I will send to my neighbors. 14853dd6d7dSJunchao Zhang 14953dd6d7dSJunchao Zhang Looking at a column of the matrix, one can also know how many bytes the rank will receive from others. 15053dd6d7dSJunchao Zhang 15153dd6d7dSJunchao Zhang If PetscSFLink bas->inuse is available, we can use that to get tree vertex size. But that would give 15253dd6d7dSJunchao Zhang different interpretations for the same SF for different data types. Since we most care about VecScatter, 15353dd6d7dSJunchao Zhang we uniformly treat each vertex as a PetscScalar. 15453dd6d7dSJunchao Zhang */ 15553dd6d7dSJunchao Zhang unitbytes = (PetscScalar)sizeof(PetscScalar); 15653dd6d7dSJunchao Zhang 15753dd6d7dSJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,NULL);CHKERRQ(ierr); 15853dd6d7dSJunchao Zhang ierr = MatCreateAIJ(comm,1,1,size,size,1,NULL,nrootranks-ndrootranks,NULL,&A);CHKERRQ(ierr); 15953dd6d7dSJunchao Zhang ierr = MatSetOptionsPrefix(A,"__petsc_internal__");CHKERRQ(ierr); /* To prevent the internal A from taking any command line options */ 16062152dedSBarry Smith for (i=0; i<nrootranks; i++) { 16153dd6d7dSJunchao Zhang ierr = MatSetValue(A,(PetscInt)rank,bas->iranks[i],(rootoffset[i+1]-rootoffset[i])*unitbytes,INSERT_VALUES);CHKERRQ(ierr); 16262152dedSBarry Smith } 16362152dedSBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16462152dedSBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16562152dedSBarry Smith ierr = MatView(A,viewer);CHKERRQ(ierr); 16662152dedSBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 16762152dedSBarry Smith PetscFunctionReturn(0); 16862152dedSBarry Smith } 16962152dedSBarry Smith #endif 17062152dedSBarry Smith 17140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer) 17295fce210SBarry Smith { 17395fce210SBarry Smith PetscErrorCode ierr; 17453dd6d7dSJunchao Zhang PetscBool isascii; 17595fce210SBarry Smith 17695fce210SBarry Smith PetscFunctionBegin; 17753dd6d7dSJunchao Zhang ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr); 17853dd6d7dSJunchao Zhang if (isascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {ierr = PetscViewerASCIIPrintf(viewer," MultiSF sort=%s\n",sf->rankorder ? "rank-order" : "unordered");CHKERRQ(ierr);} 17962152dedSBarry Smith #if defined(PETSC_USE_SINGLE_LIBRARY) 18053dd6d7dSJunchao Zhang else { 18153dd6d7dSJunchao Zhang PetscBool isdraw,isbinary; 18253dd6d7dSJunchao Zhang ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 18353dd6d7dSJunchao Zhang ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 18453dd6d7dSJunchao Zhang if ((isascii && viewer->format == PETSC_VIEWER_ASCII_MATLAB) || isdraw || isbinary) { 18553dd6d7dSJunchao Zhang ierr = PetscSFView_Basic_PatternAndSizes(sf,viewer);CHKERRQ(ierr); 18653dd6d7dSJunchao Zhang } 18762152dedSBarry Smith } 18862152dedSBarry Smith #endif 18995fce210SBarry Smith PetscFunctionReturn(0); 19095fce210SBarry Smith } 19195fce210SBarry Smith 192ad227feaSJunchao Zhang static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 19395fce210SBarry Smith { 19495fce210SBarry Smith PetscErrorCode ierr; 195cd620004SJunchao Zhang PetscSFLink link = NULL; 19695fce210SBarry Smith 19795fce210SBarry Smith PetscFunctionBegin; 19871438e86SJunchao Zhang /* Create a communication link, which provides buffers, MPI requests etc (if MPI is used) */ 199cd620004SJunchao Zhang ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 20071438e86SJunchao Zhang /* Pack rootdata to rootbuf for remote communication */ 201cd620004SJunchao Zhang ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 20271438e86SJunchao Zhang /* Start communcation, e.g., post MPI_Isend */ 20371438e86SJunchao Zhang ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 20471438e86SJunchao Zhang /* Do local scatter (i.e., self to self communication), which overlaps with the remote communication above */ 20571438e86SJunchao Zhang ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_ROOT2LEAF,(void*)rootdata,leafdata,op);CHKERRQ(ierr); 20695fce210SBarry Smith PetscFunctionReturn(0); 20795fce210SBarry Smith } 20895fce210SBarry Smith 209ad227feaSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 21095fce210SBarry Smith { 21195fce210SBarry Smith PetscErrorCode ierr; 212cd620004SJunchao Zhang PetscSFLink link = NULL; 21395fce210SBarry Smith 21495fce210SBarry Smith PetscFunctionBegin; 215cd620004SJunchao Zhang /* Retrieve the link used in XxxBegin() with root/leafdata as key */ 216cd620004SJunchao Zhang ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 21771438e86SJunchao Zhang /* Finish remote communication, e.g., post MPI_Waitall */ 21871438e86SJunchao Zhang ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 21971438e86SJunchao Zhang /* Unpack data in leafbuf to leafdata for remote communication */ 220cd620004SJunchao Zhang ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafdata,op);CHKERRQ(ierr); 22171438e86SJunchao Zhang /* Recycle the link */ 222cd620004SJunchao Zhang ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 223cd620004SJunchao Zhang PetscFunctionReturn(0); 224cd620004SJunchao Zhang } 225cd620004SJunchao Zhang 226cd620004SJunchao Zhang /* Shared by ReduceBegin and FetchAndOpBegin */ 2279fbee547SJacob Faibussowitsch static inline PetscErrorCode PetscSFLeafToRootBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op,PetscSFOperation sfop,PetscSFLink *out) 228cd620004SJunchao Zhang { 229cd620004SJunchao Zhang PetscErrorCode ierr; 23071438e86SJunchao Zhang PetscSFLink link = NULL; 231cd620004SJunchao Zhang 232cd620004SJunchao Zhang PetscFunctionBegin; 233cd620004SJunchao Zhang ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,sfop,&link);CHKERRQ(ierr); 234cd620004SJunchao Zhang ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 23571438e86SJunchao Zhang ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 236cd620004SJunchao Zhang *out = link; 23795fce210SBarry Smith PetscFunctionReturn(0); 23895fce210SBarry Smith } 23995fce210SBarry Smith 24095fce210SBarry Smith /* leaf -> root with reduction */ 241eb02082bSJunchao Zhang static PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 24295fce210SBarry Smith { 24395fce210SBarry Smith PetscErrorCode ierr; 244cd620004SJunchao Zhang PetscSFLink link = NULL; 24595fce210SBarry Smith 24695fce210SBarry Smith PetscFunctionBegin; 247cd620004SJunchao Zhang ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 24871438e86SJunchao Zhang ierr = PetscSFLinkScatterLocal(sf,link,PETSCSF_LEAF2ROOT,rootdata,(void*)leafdata,op);CHKERRQ(ierr); 24995fce210SBarry Smith PetscFunctionReturn(0); 25095fce210SBarry Smith } 25195fce210SBarry Smith 25200816365SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 25395fce210SBarry Smith { 25495fce210SBarry Smith PetscErrorCode ierr; 255cd620004SJunchao Zhang PetscSFLink link = NULL; 25695fce210SBarry Smith 25795fce210SBarry Smith PetscFunctionBegin; 258cd620004SJunchao Zhang ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 25971438e86SJunchao Zhang ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 260cd620004SJunchao Zhang ierr = PetscSFLinkUnpackRootData(sf,link,PETSCSF_REMOTE,rootdata,op);CHKERRQ(ierr); 261cd620004SJunchao Zhang ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 26295fce210SBarry Smith PetscFunctionReturn(0); 26395fce210SBarry Smith } 26495fce210SBarry Smith 265eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op) 26695fce210SBarry Smith { 26795fce210SBarry Smith PetscErrorCode ierr; 268cd620004SJunchao Zhang PetscSFLink link = NULL; 26995fce210SBarry Smith 27095fce210SBarry Smith PetscFunctionBegin; 271cd620004SJunchao Zhang ierr = PetscSFLeafToRootBegin_Basic(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op,PETSCSF_FETCH,&link);CHKERRQ(ierr); 272cd620004SJunchao Zhang ierr = PetscSFLinkFetchAndOpLocal(sf,link,rootdata,leafdata,leafupdate,op);CHKERRQ(ierr); 27395fce210SBarry Smith PetscFunctionReturn(0); 27495fce210SBarry Smith } 27595fce210SBarry Smith 27600816365SJunchao Zhang static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op) 27795fce210SBarry Smith { 27895fce210SBarry Smith PetscErrorCode ierr; 279cd620004SJunchao Zhang PetscSFLink link = NULL; 28095fce210SBarry Smith 28195fce210SBarry Smith PetscFunctionBegin; 282cd620004SJunchao Zhang ierr = PetscSFLinkGetInUse(sf,unit,rootdata,leafdata,PETSC_OWN_POINTER,&link);CHKERRQ(ierr); 28395fce210SBarry Smith /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */ 28471438e86SJunchao Zhang ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 285cd620004SJunchao Zhang /* Do fetch-and-op, the (remote) update results are in rootbuf */ 28671438e86SJunchao Zhang ierr = PetscSFLinkFetchAndOpRemote(sf,link,rootdata,op);CHKERRQ(ierr); 287cd620004SJunchao Zhang /* Bcast rootbuf to leafupdate */ 28871438e86SJunchao Zhang ierr = PetscSFLinkStartCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 28971438e86SJunchao Zhang ierr = PetscSFLinkFinishCommunication(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 290b23bfdefSJunchao Zhang /* Unpack and insert fetched data into leaves */ 29183df288dSJunchao Zhang ierr = PetscSFLinkUnpackLeafData(sf,link,PETSCSF_REMOTE,leafupdate,MPI_REPLACE);CHKERRQ(ierr); 292cd620004SJunchao Zhang ierr = PetscSFLinkReclaim(sf,&link);CHKERRQ(ierr); 29395fce210SBarry Smith PetscFunctionReturn(0); 29495fce210SBarry Smith } 29595fce210SBarry Smith 29640e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc) 2978750ddebSJunchao Zhang { 2988750ddebSJunchao Zhang PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 2998750ddebSJunchao Zhang 3008750ddebSJunchao Zhang PetscFunctionBegin; 3018750ddebSJunchao Zhang if (niranks) *niranks = bas->niranks; 3028750ddebSJunchao Zhang if (iranks) *iranks = bas->iranks; 3038750ddebSJunchao Zhang if (ioffset) *ioffset = bas->ioffset; 3048750ddebSJunchao Zhang if (irootloc) *irootloc = bas->irootloc; 3058750ddebSJunchao Zhang PetscFunctionReturn(0); 3068750ddebSJunchao Zhang } 3078750ddebSJunchao Zhang 30872502a1fSJunchao Zhang /* An optimized PetscSFCreateEmbeddedRootSF. We aggresively make use of the established communication on sf. 309f659e5c7SJunchao Zhang We need one bcast on sf, and no communication anymore to build the embedded sf. Note that selected[] 310f659e5c7SJunchao Zhang was sorted before calling the routine. 311f659e5c7SJunchao Zhang */ 31272502a1fSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedRootSF_Basic(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 313f659e5c7SJunchao Zhang { 314f659e5c7SJunchao Zhang PetscSF esf; 315cd620004SJunchao Zhang PetscInt esf_nranks,esf_ndranks,*esf_roffset,*esf_rmine,*esf_rremote; 316cd620004SJunchao Zhang PetscInt i,j,p,q,nroots,esf_nleaves,*new_ilocal,nranks,ndranks,niranks,ndiranks,minleaf,maxleaf,maxlocal; 317cd620004SJunchao Zhang char *rootdata,*leafdata,*leafmem; /* Only stores 0 or 1, so we can save memory with char */ 318f659e5c7SJunchao Zhang PetscMPIInt *esf_ranks; 319f659e5c7SJunchao Zhang const PetscMPIInt *ranks,*iranks; 320cd620004SJunchao Zhang const PetscInt *roffset,*rmine,*rremote,*ioffset,*irootloc; 321f659e5c7SJunchao Zhang PetscBool connected; 322f659e5c7SJunchao Zhang PetscSFNode *new_iremote; 323f659e5c7SJunchao Zhang PetscSF_Basic *bas; 324f659e5c7SJunchao Zhang PetscErrorCode ierr; 325f659e5c7SJunchao Zhang 326f659e5c7SJunchao Zhang PetscFunctionBegin; 327f659e5c7SJunchao Zhang ierr = PetscSFCreate(PetscObjectComm((PetscObject)sf),&esf);CHKERRQ(ierr); 32820c24465SJunchao Zhang ierr = PetscSFSetFromOptions(esf);CHKERRQ(ierr); 329f659e5c7SJunchao Zhang ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 330f659e5c7SJunchao Zhang 331cd620004SJunchao Zhang /* Find out which leaves are still connected to roots in the embedded sf by doing a Bcast */ 332f659e5c7SJunchao Zhang ierr = PetscSFGetGraph(sf,&nroots,NULL,NULL,NULL);CHKERRQ(ierr); 333f659e5c7SJunchao Zhang ierr = PetscSFGetLeafRange(sf,&minleaf,&maxleaf);CHKERRQ(ierr); 334cd620004SJunchao Zhang maxlocal = maxleaf - minleaf + 1; 335cd620004SJunchao Zhang ierr = PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);CHKERRQ(ierr); 336cd620004SJunchao Zhang leafdata = leafmem - minleaf; 337f659e5c7SJunchao Zhang /* Tag selected roots */ 338f659e5c7SJunchao Zhang for (i=0; i<nselected; ++i) rootdata[selected[i]] = 1; 339f659e5c7SJunchao Zhang 340ad227feaSJunchao Zhang ierr = PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 341ad227feaSJunchao Zhang ierr = PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);CHKERRQ(ierr); 342f659e5c7SJunchao Zhang ierr = PetscSFGetLeafInfo_Basic(sf,&nranks,&ndranks,&ranks,&roffset,&rmine,&rremote);CHKERRQ(ierr); /* Get send info */ 343cd620004SJunchao Zhang esf_nranks = esf_ndranks = esf_nleaves = 0; 344b23bfdefSJunchao Zhang for (i=0; i<nranks; i++) { 345cd620004SJunchao Zhang connected = PETSC_FALSE; /* Is this process still connected to this remote root rank? */ 346cd620004SJunchao Zhang for (j=roffset[i]; j<roffset[i+1]; j++) {if (leafdata[rmine[j]]) {esf_nleaves++; connected = PETSC_TRUE;}} 347f659e5c7SJunchao Zhang if (connected) {esf_nranks++; if (i < ndranks) esf_ndranks++;} 348f659e5c7SJunchao Zhang } 349f659e5c7SJunchao Zhang 350f659e5c7SJunchao Zhang /* Set graph of esf and also set up its outgoing communication (i.e., send info), which is usually done by PetscSFSetUpRanks */ 351cd620004SJunchao Zhang ierr = PetscMalloc1(esf_nleaves,&new_ilocal);CHKERRQ(ierr); 352cd620004SJunchao Zhang ierr = PetscMalloc1(esf_nleaves,&new_iremote);CHKERRQ(ierr); 353cd620004SJunchao Zhang ierr = PetscMalloc4(esf_nranks,&esf_ranks,esf_nranks+1,&esf_roffset,esf_nleaves,&esf_rmine,esf_nleaves,&esf_rremote);CHKERRQ(ierr); 354f659e5c7SJunchao Zhang p = 0; /* Counter for connected root ranks */ 355f659e5c7SJunchao Zhang q = 0; /* Counter for connected leaves */ 356f659e5c7SJunchao Zhang esf_roffset[0] = 0; 357f659e5c7SJunchao Zhang for (i=0; i<nranks; i++) { /* Scan leaf data again to fill esf arrays */ 358f659e5c7SJunchao Zhang connected = PETSC_FALSE; 359cd620004SJunchao Zhang for (j=roffset[i]; j<roffset[i+1]; j++) { 360cd620004SJunchao Zhang if (leafdata[rmine[j]]) { 361f659e5c7SJunchao Zhang esf_rmine[q] = new_ilocal[q] = rmine[j]; 362f659e5c7SJunchao Zhang esf_rremote[q] = rremote[j]; 363f659e5c7SJunchao Zhang new_iremote[q].index = rremote[j]; 364f659e5c7SJunchao Zhang new_iremote[q].rank = ranks[i]; 365f659e5c7SJunchao Zhang connected = PETSC_TRUE; 366f659e5c7SJunchao Zhang q++; 367f659e5c7SJunchao Zhang } 368f659e5c7SJunchao Zhang } 369f659e5c7SJunchao Zhang if (connected) { 370f659e5c7SJunchao Zhang esf_ranks[p] = ranks[i]; 371f659e5c7SJunchao Zhang esf_roffset[p+1] = q; 372f659e5c7SJunchao Zhang p++; 373f659e5c7SJunchao Zhang } 374f659e5c7SJunchao Zhang } 375f659e5c7SJunchao Zhang 376f659e5c7SJunchao Zhang /* SetGraph internally resets the SF, so we only set its fields after the call */ 377cd620004SJunchao Zhang ierr = PetscSFSetGraph(esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 378f659e5c7SJunchao Zhang esf->nranks = esf_nranks; 379f659e5c7SJunchao Zhang esf->ndranks = esf_ndranks; 380f659e5c7SJunchao Zhang esf->ranks = esf_ranks; 381f659e5c7SJunchao Zhang esf->roffset = esf_roffset; 382f659e5c7SJunchao Zhang esf->rmine = esf_rmine; 383f659e5c7SJunchao Zhang esf->rremote = esf_rremote; 384cd620004SJunchao Zhang esf->nleafreqs = esf_nranks - esf_ndranks; 385f659e5c7SJunchao Zhang 386f659e5c7SJunchao Zhang /* Set up the incoming communication (i.e., recv info) stored in esf->data, which is usually done by PetscSFSetUp_Basic */ 387f659e5c7SJunchao Zhang bas = (PetscSF_Basic*)esf->data; 388f659e5c7SJunchao Zhang ierr = PetscSFGetRootInfo_Basic(sf,&niranks,&ndiranks,&iranks,&ioffset,&irootloc);CHKERRQ(ierr); /* Get recv info */ 389f659e5c7SJunchao Zhang /* Embedded sf always has simpler communication than the original one. We might allocate longer arrays than needed here. But we 390cd620004SJunchao Zhang we do not care since these arrays are usually short. The benefit is we can fill these arrays by just parsing irootloc once. 391f659e5c7SJunchao Zhang */ 392f659e5c7SJunchao Zhang ierr = PetscMalloc2(niranks,&bas->iranks,niranks+1,&bas->ioffset);CHKERRQ(ierr); 393f659e5c7SJunchao Zhang ierr = PetscMalloc1(ioffset[niranks],&bas->irootloc);CHKERRQ(ierr); 394f659e5c7SJunchao Zhang bas->niranks = bas->ndiranks = bas->ioffset[0] = 0; 395f659e5c7SJunchao Zhang p = 0; /* Counter for connected leaf ranks */ 396f659e5c7SJunchao Zhang q = 0; /* Counter for connected roots */ 397f659e5c7SJunchao Zhang for (i=0; i<niranks; i++) { 398f659e5c7SJunchao Zhang connected = PETSC_FALSE; /* Is the current process still connected to this remote leaf rank? */ 399f659e5c7SJunchao Zhang for (j=ioffset[i]; j<ioffset[i+1]; j++) { 400cd620004SJunchao Zhang if (rootdata[irootloc[j]]) { 401f659e5c7SJunchao Zhang bas->irootloc[q++] = irootloc[j]; 402f659e5c7SJunchao Zhang connected = PETSC_TRUE; 403f659e5c7SJunchao Zhang } 404f659e5c7SJunchao Zhang } 405f659e5c7SJunchao Zhang if (connected) { 406f659e5c7SJunchao Zhang bas->niranks++; 407f659e5c7SJunchao Zhang if (i<ndiranks) bas->ndiranks++; /* Note that order of ranks (including distinguished ranks) is kept */ 408f659e5c7SJunchao Zhang bas->iranks[p] = iranks[i]; 409f659e5c7SJunchao Zhang bas->ioffset[p+1] = q; 410f659e5c7SJunchao Zhang p++; 411f659e5c7SJunchao Zhang } 412f659e5c7SJunchao Zhang } 413f659e5c7SJunchao Zhang bas->itotal = q; 414cd620004SJunchao Zhang bas->nrootreqs = bas->niranks - bas->ndiranks; 415cd620004SJunchao Zhang esf->persistent = PETSC_TRUE; 416cd620004SJunchao Zhang /* Setup packing related fields */ 417cd620004SJunchao Zhang ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr); 418f659e5c7SJunchao Zhang 41920c24465SJunchao Zhang /* Copy from PetscSFSetUp(), since this method wants to skip PetscSFSetUp(). */ 42020c24465SJunchao Zhang #if defined(PETSC_HAVE_CUDA) 42120c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_CUDA) { 42271438e86SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_CUDA; 42371438e86SJunchao Zhang esf->ops->Free = PetscSFFree_CUDA; 42420c24465SJunchao Zhang } 42520c24465SJunchao Zhang #endif 42620c24465SJunchao Zhang 42759af0bd3SScott Kruger #if defined(PETSC_HAVE_HIP) 42859af0bd3SScott Kruger /* TODO: Needs debugging */ 42959af0bd3SScott Kruger if (esf->backend == PETSCSF_BACKEND_HIP) { 43059af0bd3SScott Kruger esf->ops->Malloc = PetscSFMalloc_HIP; 43159af0bd3SScott Kruger esf->ops->Free = PetscSFFree_HIP; 43259af0bd3SScott Kruger } 43359af0bd3SScott Kruger #endif 43459af0bd3SScott Kruger 43520c24465SJunchao Zhang #if defined(PETSC_HAVE_KOKKOS) 43620c24465SJunchao Zhang if (esf->backend == PETSCSF_BACKEND_KOKKOS) { 43720c24465SJunchao Zhang esf->ops->Malloc = PetscSFMalloc_Kokkos; 43820c24465SJunchao Zhang esf->ops->Free = PetscSFFree_Kokkos; 43920c24465SJunchao Zhang } 44020c24465SJunchao Zhang #endif 441f659e5c7SJunchao Zhang esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 442cd620004SJunchao Zhang ierr = PetscFree2(rootdata,leafmem);CHKERRQ(ierr); 443f659e5c7SJunchao Zhang *newsf = esf; 444f659e5c7SJunchao Zhang PetscFunctionReturn(0); 445f659e5c7SJunchao Zhang } 446f659e5c7SJunchao Zhang 4478cc058d9SJed Brown PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf) 44895fce210SBarry Smith { 44940e23c03SJunchao Zhang PetscSF_Basic *dat; 45095fce210SBarry Smith PetscErrorCode ierr; 45195fce210SBarry Smith 45295fce210SBarry Smith PetscFunctionBegin; 45395fce210SBarry Smith sf->ops->SetUp = PetscSFSetUp_Basic; 45495fce210SBarry Smith sf->ops->Reset = PetscSFReset_Basic; 45595fce210SBarry Smith sf->ops->Destroy = PetscSFDestroy_Basic; 45695fce210SBarry Smith sf->ops->View = PetscSFView_Basic; 457ad227feaSJunchao Zhang sf->ops->BcastBegin = PetscSFBcastBegin_Basic; 458ad227feaSJunchao Zhang sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 45995fce210SBarry Smith sf->ops->ReduceBegin = PetscSFReduceBegin_Basic; 46095fce210SBarry Smith sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 46195fce210SBarry Smith sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic; 46295fce210SBarry Smith sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic; 4638750ddebSJunchao Zhang sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Basic; 46472502a1fSJunchao Zhang sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Basic; 46595fce210SBarry Smith 46640e23c03SJunchao Zhang ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 46740e23c03SJunchao Zhang sf->data = (void*)dat; 46895fce210SBarry Smith PetscFunctionReturn(0); 46995fce210SBarry Smith } 470