1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4 5 #define PetscSFPackGet_Alltoall PetscSFPackGet_Allgatherv 6 7 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 8 typedef PetscSF_Allgatherv PetscSF_Alltoall; 9 10 /*===================================================================================*/ 11 /* Implementations of SF public APIs */ 12 /*===================================================================================*/ 13 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 14 { 15 PetscErrorCode ierr; 16 PetscInt i; 17 18 PetscFunctionBegin; 19 if (nroots) *nroots = sf->nroots; 20 if (nleaves) *nleaves = sf->nleaves; 21 if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 22 if (iremote) { 23 if (!sf->remote) { 24 ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 25 sf->remote_alloc = sf->remote; 26 for (i=0; i<sf->nleaves; i++) { 27 sf->remote[i].rank = i; 28 sf->remote[i].index = i; 29 } 30 } 31 *iremote = sf->remote; 32 } 33 PetscFunctionReturn(0); 34 } 35 36 static PetscErrorCode PetscSFBcastAndOpBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op) 37 { 38 PetscErrorCode ierr; 39 PetscSFPack link; 40 MPI_Comm comm; 41 char *recvbuf; 42 43 PetscFunctionBegin; 44 ierr = PetscSFPackGet_Alltoall(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 45 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 46 47 if (op != MPIU_REPLACE) { 48 if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,sf->nleaves*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);} 49 recvbuf = link->leafbuf[leafmtype]; 50 } else { 51 recvbuf = (char*)leafdata; 52 } 53 ierr = MPIU_Ialltoall(rootdata,1,unit,recvbuf,1,unit,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype]);CHKERRQ(ierr); 54 PetscFunctionReturn(0); 55 } 56 57 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op) 58 { 59 PetscErrorCode ierr; 60 PetscSFPack link; 61 MPI_Comm comm; 62 char *recvbuf; 63 64 PetscFunctionBegin; 65 ierr = PetscSFPackGet_Alltoall(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr); 66 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 67 68 if (op != MPIU_REPLACE) { 69 if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,sf->nroots*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);} 70 recvbuf = link->rootbuf[rootmtype]; 71 } else { 72 recvbuf = (char*)rootdata; 73 } 74 ierr = MPIU_Ialltoall(leafdata,1,unit,recvbuf,1,unit,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype]);CHKERRQ(ierr); 75 PetscFunctionReturn(0); 76 } 77 78 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out) 79 { 80 PetscErrorCode ierr; 81 PetscInt nroots=1,nleaves=1,*ilocal; 82 PetscSFNode *iremote = NULL; 83 PetscSF lsf; 84 PetscMPIInt rank; 85 86 PetscFunctionBegin; 87 nroots = 1; 88 nleaves = 1; 89 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 90 ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 91 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 92 ilocal[0] = rank; 93 iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 94 iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 95 96 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 97 ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 98 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 99 *out = lsf; 100 PetscFunctionReturn(0); 101 } 102 103 static PetscErrorCode PetscSFCreateEmbeddedSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 104 { 105 PetscErrorCode ierr; 106 PetscInt i,*tmproots,*ilocal,ndranks,ndiranks; 107 PetscSFNode *iremote; 108 PetscMPIInt nroots,*roots,nleaves,*leaves,rank; 109 MPI_Comm comm; 110 PetscSF_Basic *bas; 111 PetscSF esf; 112 113 PetscFunctionBegin; 114 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 115 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 116 117 /* Uniq selected[] and store the result in roots[] */ 118 ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr); 119 ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr); 120 ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */ 121 if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots); 122 nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 123 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 124 for (i=0; i<nselected; i++) roots[i] = tmproots[i]; 125 ierr = PetscFree(tmproots);CHKERRQ(ierr); 126 127 /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 128 ierr = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr); 129 130 /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 131 ndranks = 0; 132 for (i=0; i<nleaves; i++) { 133 if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;} 134 } 135 ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr); 136 if (nleaves && leaves[0] < 0) leaves[0] = rank; 137 138 /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 139 ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 140 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 141 for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */ 142 ilocal[i] = leaves[i]; 143 iremote[i].rank = leaves[i]; 144 iremote[i].index = leaves[i]; 145 } 146 ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr); 147 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 148 ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 149 150 /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 151 ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr); 152 esf->nranks = nleaves; 153 esf->ndranks = ndranks; 154 esf->roffset[0] = 0; 155 for (i=0; i<nleaves; i++) { 156 esf->ranks[i] = leaves[i]; 157 esf->roffset[i+1] = i+1; 158 esf->rmine[i] = leaves[i]; 159 esf->rremote[i] = leaves[i]; 160 } 161 162 /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 163 bas = (PetscSF_Basic*)esf->data; 164 ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr); 165 ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr); 166 /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 167 ndiranks = 0; 168 for (i=0; i<nroots; i++) { 169 if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;} 170 } 171 ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr); 172 if (nroots && roots[0] < 0) roots[0] = rank; 173 174 bas->niranks = nroots; 175 bas->ndiranks = ndiranks; 176 bas->ioffset[0] = 0; 177 bas->itotal = nroots; 178 for (i=0; i<nroots; i++) { 179 bas->iranks[i] = roots[i]; 180 bas->ioffset[i+1] = i+1; 181 bas->irootloc[i] = roots[i]; 182 } 183 184 *newsf = esf; 185 PetscFunctionReturn(0); 186 } 187 188 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 189 { 190 PetscErrorCode ierr; 191 PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data; 192 193 PetscFunctionBegin; 194 /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 195 sf->ops->Destroy = PetscSFDestroy_Allgatherv; 196 sf->ops->Reset = PetscSFReset_Allgatherv; 197 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 198 sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 199 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 200 sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 201 202 /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 203 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 204 205 /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 206 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 207 208 /* Alltoall stuff */ 209 sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 210 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Alltoall; 211 sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 212 sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 213 sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Alltoall; 214 215 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 216 sf->data = (void*)dat; 217 PetscFunctionReturn(0); 218 } 219 220 221