1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4 5 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6 typedef PetscSF_Allgatherv PetscSF_Alltoall; 7 8 /*===================================================================================*/ 9 /* Implementations of SF public APIs */ 10 /*===================================================================================*/ 11 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 12 { 13 PetscInt i; 14 15 PetscFunctionBegin; 16 if (nroots) *nroots = sf->nroots; 17 if (nleaves) *nleaves = sf->nleaves; 18 if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 19 if (iremote) { 20 if (!sf->remote) { 21 PetscCall(PetscMalloc1(sf->nleaves,&sf->remote)); 22 sf->remote_alloc = sf->remote; 23 for (i=0; i<sf->nleaves; i++) { 24 sf->remote[i].rank = i; 25 sf->remote[i].index = i; 26 } 27 } 28 *iremote = sf->remote; 29 } 30 PetscFunctionReturn(0); 31 } 32 33 static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 34 { 35 PetscSFLink link; 36 MPI_Comm comm; 37 void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */ 38 MPI_Request *req; 39 40 PetscFunctionBegin; 41 PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link)); 42 PetscCall(PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata)); 43 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 44 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 45 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL)); 46 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_ROOT2LEAF)); 47 PetscCallMPI(MPIU_Ialltoall(rootbuf,1,unit,leafbuf,1,unit,comm,req)); 48 PetscFunctionReturn(0); 49 } 50 51 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 52 { 53 PetscSFLink link; 54 MPI_Comm comm; 55 void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */ 56 MPI_Request *req; 57 58 PetscFunctionBegin; 59 PetscCall(PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link)); 60 PetscCall(PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata)); 61 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */)); 62 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 63 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL)); 64 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2ROOT)); 65 PetscCallMPI(MPIU_Ialltoall(leafbuf,1,unit,rootbuf,1,unit,comm,req)); 66 PetscFunctionReturn(0); 67 } 68 69 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out) 70 { 71 PetscInt nroots = 1,nleaves = 1,*ilocal; 72 PetscSFNode *iremote = NULL; 73 PetscSF lsf; 74 PetscMPIInt rank; 75 76 PetscFunctionBegin; 77 nroots = 1; 78 nleaves = 1; 79 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank)); 80 PetscCall(PetscMalloc1(nleaves,&ilocal)); 81 PetscCall(PetscMalloc1(nleaves,&iremote)); 82 ilocal[0] = rank; 83 iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 84 iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 85 86 PetscCall(PetscSFCreate(PETSC_COMM_SELF,&lsf)); 87 PetscCall(PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 88 PetscCall(PetscSFSetUp(lsf)); 89 *out = lsf; 90 PetscFunctionReturn(0); 91 } 92 93 static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 94 { 95 PetscInt i,*tmproots,*ilocal,ndranks,ndiranks; 96 PetscSFNode *iremote; 97 PetscMPIInt nroots,*roots,nleaves,*leaves,rank; 98 MPI_Comm comm; 99 PetscSF_Basic *bas; 100 PetscSF esf; 101 102 PetscFunctionBegin; 103 PetscCall(PetscObjectGetComm((PetscObject)sf,&comm)); 104 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 105 106 /* Uniq selected[] and store the result in roots[] */ 107 PetscCall(PetscMalloc1(nselected,&tmproots)); 108 PetscCall(PetscArraycpy(tmproots,selected,nselected)); 109 PetscCall(PetscSortRemoveDupsInt(&nselected,tmproots)); /* nselected might be changed */ 110 PetscCheck(tmproots[0] >= 0 && tmproots[nselected-1] < sf->nroots,comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")",tmproots[0],tmproots[nselected-1],sf->nroots); 111 nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 112 PetscCall(PetscMalloc1(nselected,&roots)); 113 for (i=0; i<nselected; i++) roots[i] = tmproots[i]; 114 PetscCall(PetscFree(tmproots)); 115 116 /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 117 PetscCall(PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/)); 118 119 /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 120 ndranks = 0; 121 for (i=0; i<nleaves; i++) { 122 if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;} 123 } 124 PetscCall(PetscSortMPIInt(nleaves,leaves)); 125 if (nleaves && leaves[0] < 0) leaves[0] = rank; 126 127 /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 128 PetscCall(PetscMalloc1(nleaves,&ilocal)); 129 PetscCall(PetscMalloc1(nleaves,&iremote)); 130 for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */ 131 ilocal[i] = leaves[i]; 132 iremote[i].rank = leaves[i]; 133 iremote[i].index = leaves[i]; 134 } 135 PetscCall(PetscSFCreate(comm,&esf)); 136 PetscCall(PetscSFSetType(esf,PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 137 PetscCall(PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER)); 138 139 /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 140 PetscCall(PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote)); 141 esf->nranks = nleaves; 142 esf->ndranks = ndranks; 143 esf->roffset[0] = 0; 144 for (i=0; i<nleaves; i++) { 145 esf->ranks[i] = leaves[i]; 146 esf->roffset[i+1] = i+1; 147 esf->rmine[i] = leaves[i]; 148 esf->rremote[i] = leaves[i]; 149 } 150 151 /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 152 bas = (PetscSF_Basic*)esf->data; 153 PetscCall(PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset)); 154 PetscCall(PetscMalloc1(nroots,&bas->irootloc)); 155 /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 156 ndiranks = 0; 157 for (i=0; i<nroots; i++) { 158 if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;} 159 } 160 PetscCall(PetscSortMPIInt(nroots,roots)); 161 if (nroots && roots[0] < 0) roots[0] = rank; 162 163 bas->niranks = nroots; 164 bas->ndiranks = ndiranks; 165 bas->ioffset[0] = 0; 166 bas->itotal = nroots; 167 for (i=0; i<nroots; i++) { 168 bas->iranks[i] = roots[i]; 169 bas->ioffset[i+1] = i+1; 170 bas->irootloc[i] = roots[i]; 171 } 172 173 /* See PetscSFCreateEmbeddedRootSF_Basic */ 174 esf->nleafreqs = esf->nranks - esf->ndranks; 175 bas->nrootreqs = bas->niranks - bas->ndiranks; 176 esf->persistent = PETSC_TRUE; 177 /* Setup packing related fields */ 178 PetscCall(PetscSFSetUpPackFields(esf)); 179 180 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 181 *newsf = esf; 182 PetscFunctionReturn(0); 183 } 184 185 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 186 { 187 PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data; 188 189 PetscFunctionBegin; 190 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 191 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 192 193 /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 194 sf->ops->Destroy = PetscSFDestroy_Allgatherv; 195 sf->ops->Reset = PetscSFReset_Allgatherv; 196 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 197 sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 198 199 /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 200 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 201 sf->ops->SetUp = PetscSFSetUp_Allgather; 202 203 /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 204 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 205 206 /* Alltoall stuff */ 207 sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 208 sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall; 209 sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 210 sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 211 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 212 213 PetscCall(PetscNewLog(sf,&dat)); 214 sf->data = (void*)dat; 215 PetscFunctionReturn(0); 216 } 217