1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4 5 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6 typedef PetscSF_Allgatherv PetscSF_Alltoall; 7 8 /*===================================================================================*/ 9 /* Implementations of SF public APIs */ 10 /*===================================================================================*/ 11 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 12 { 13 PetscErrorCode ierr; 14 PetscInt i; 15 16 PetscFunctionBegin; 17 if (nroots) *nroots = sf->nroots; 18 if (nleaves) *nleaves = sf->nleaves; 19 if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 20 if (iremote) { 21 if (!sf->remote) { 22 ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 23 sf->remote_alloc = sf->remote; 24 for (i=0; i<sf->nleaves; i++) { 25 sf->remote[i].rank = i; 26 sf->remote[i].index = i; 27 } 28 } 29 *iremote = sf->remote; 30 } 31 PetscFunctionReturn(0); 32 } 33 34 static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 35 { 36 PetscErrorCode ierr; 37 PetscSFLink link; 38 MPI_Comm comm; 39 void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */ 40 MPI_Request *req; 41 42 PetscFunctionBegin; 43 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr); 44 ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr); 45 ierr = PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 46 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 47 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr); 48 ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF);CHKERRQ(ierr); 49 ierr = MPIU_Ialltoall(rootbuf,1,unit,leafbuf,1,unit,comm,req);CHKERRMPI(ierr); 50 PetscFunctionReturn(0); 51 } 52 53 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 54 { 55 PetscErrorCode ierr; 56 PetscSFLink link; 57 MPI_Comm comm; 58 void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */ 59 MPI_Request *req; 60 61 PetscFunctionBegin; 62 ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr); 63 ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr); 64 ierr = PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);CHKERRQ(ierr); 65 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 66 ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr); 67 ierr = PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT);CHKERRQ(ierr); 68 ierr = MPIU_Ialltoall(leafbuf,1,unit,rootbuf,1,unit,comm,req);CHKERRMPI(ierr); 69 PetscFunctionReturn(0); 70 } 71 72 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out) 73 { 74 PetscErrorCode ierr; 75 PetscInt nroots = 1,nleaves = 1,*ilocal; 76 PetscSFNode *iremote = NULL; 77 PetscSF lsf; 78 PetscMPIInt rank; 79 80 PetscFunctionBegin; 81 nroots = 1; 82 nleaves = 1; 83 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr); 84 ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 85 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 86 ilocal[0] = rank; 87 iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 88 iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 89 90 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 91 ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 92 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 93 *out = lsf; 94 PetscFunctionReturn(0); 95 } 96 97 static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 98 { 99 PetscErrorCode ierr; 100 PetscInt i,*tmproots,*ilocal,ndranks,ndiranks; 101 PetscSFNode *iremote; 102 PetscMPIInt nroots,*roots,nleaves,*leaves,rank; 103 MPI_Comm comm; 104 PetscSF_Basic *bas; 105 PetscSF esf; 106 107 PetscFunctionBegin; 108 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 109 ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 110 111 /* Uniq selected[] and store the result in roots[] */ 112 ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr); 113 ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr); 114 ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */ 115 if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots); 116 nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 117 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 118 for (i=0; i<nselected; i++) roots[i] = tmproots[i]; 119 ierr = PetscFree(tmproots);CHKERRQ(ierr); 120 121 /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 122 ierr = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr); 123 124 /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 125 ndranks = 0; 126 for (i=0; i<nleaves; i++) { 127 if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;} 128 } 129 ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr); 130 if (nleaves && leaves[0] < 0) leaves[0] = rank; 131 132 /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 133 ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 134 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 135 for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */ 136 ilocal[i] = leaves[i]; 137 iremote[i].rank = leaves[i]; 138 iremote[i].index = leaves[i]; 139 } 140 ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr); 141 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 142 ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 143 144 /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 145 ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr); 146 esf->nranks = nleaves; 147 esf->ndranks = ndranks; 148 esf->roffset[0] = 0; 149 for (i=0; i<nleaves; i++) { 150 esf->ranks[i] = leaves[i]; 151 esf->roffset[i+1] = i+1; 152 esf->rmine[i] = leaves[i]; 153 esf->rremote[i] = leaves[i]; 154 } 155 156 /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 157 bas = (PetscSF_Basic*)esf->data; 158 ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr); 159 ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr); 160 /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 161 ndiranks = 0; 162 for (i=0; i<nroots; i++) { 163 if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;} 164 } 165 ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr); 166 if (nroots && roots[0] < 0) roots[0] = rank; 167 168 bas->niranks = nroots; 169 bas->ndiranks = ndiranks; 170 bas->ioffset[0] = 0; 171 bas->itotal = nroots; 172 for (i=0; i<nroots; i++) { 173 bas->iranks[i] = roots[i]; 174 bas->ioffset[i+1] = i+1; 175 bas->irootloc[i] = roots[i]; 176 } 177 178 /* See PetscSFCreateEmbeddedRootSF_Basic */ 179 esf->nleafreqs = esf->nranks - esf->ndranks; 180 bas->nrootreqs = bas->niranks - bas->ndiranks; 181 esf->persistent = PETSC_TRUE; 182 /* Setup packing related fields */ 183 ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr); 184 185 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 186 *newsf = esf; 187 PetscFunctionReturn(0); 188 } 189 190 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 191 { 192 PetscErrorCode ierr; 193 PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data; 194 195 PetscFunctionBegin; 196 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 197 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 198 199 /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 200 sf->ops->Destroy = PetscSFDestroy_Allgatherv; 201 sf->ops->Reset = PetscSFReset_Allgatherv; 202 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 203 sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 204 205 /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 206 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 207 sf->ops->SetUp = PetscSFSetUp_Allgather; 208 209 /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 210 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 211 212 /* Alltoall stuff */ 213 sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 214 sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall; 215 sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 216 sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 217 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 218 219 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 220 sf->data = (void*)dat; 221 PetscFunctionReturn(0); 222 } 223 224