1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4 5 typedef PetscSFPack_Allgatherv PetscSFPack_Alltoall; 6 #define PetscSFPackGet_Alltoall PetscSFPackGet_Allgatherv 7 8 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 9 typedef PetscSF_Allgatherv PetscSF_Alltoall; 10 11 /*===================================================================================*/ 12 /* Implementations of SF public APIs */ 13 /*===================================================================================*/ 14 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote) 15 { 16 PetscErrorCode ierr; 17 PetscInt i; 18 19 PetscFunctionBegin; 20 if (nroots) *nroots = sf->nroots; 21 if (nleaves) *nleaves = sf->nleaves; 22 if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 23 if (iremote) { 24 if (!sf->remote) { 25 ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr); 26 sf->remote_alloc = sf->remote; 27 for (i=0; i<sf->nleaves; i++) { 28 sf->remote[i].rank = i; 29 sf->remote[i].index = i; 30 } 31 } 32 *iremote = sf->remote; 33 } 34 PetscFunctionReturn(0); 35 } 36 37 static PetscErrorCode PetscSFBcastAndOpBegin_Alltoall(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op) 38 { 39 PetscErrorCode ierr; 40 PetscSFPack_Alltoall link; 41 MPI_Comm comm; 42 void *recvbuf; 43 44 PetscFunctionBegin; 45 ierr = PetscSFPackGet_Alltoall(sf,unit,rootdata,leafdata,&link);CHKERRQ(ierr); 46 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 47 48 if (op != MPIU_REPLACE) { 49 if (!link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);} 50 recvbuf = link->leaf; 51 } else { 52 recvbuf = leafdata; 53 } 54 ierr = MPIU_Ialltoall(rootdata,1,unit,recvbuf,1,unit,comm,&link->request);CHKERRQ(ierr); 55 PetscFunctionReturn(0); 56 } 57 58 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op) 59 { 60 PetscErrorCode ierr; 61 PetscSFPack_Alltoall link; 62 MPI_Comm comm; 63 void *recvbuf; 64 65 PetscFunctionBegin; 66 ierr = PetscSFPackGet_Alltoall(sf,unit,rootdata,leafdata,&link);CHKERRQ(ierr); 67 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 68 69 if (op != MPIU_REPLACE) { 70 if (!link->root) {ierr = PetscMalloc(sf->nroots*link->unitbytes,&link->root);CHKERRQ(ierr);} 71 recvbuf = link->root; 72 } else { 73 recvbuf = rootdata; 74 } 75 ierr = MPIU_Ialltoall(leafdata,1,unit,recvbuf,1,unit,comm,&link->request);CHKERRQ(ierr); 76 PetscFunctionReturn(0); 77 } 78 79 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out) 80 { 81 PetscErrorCode ierr; 82 PetscInt nroots=1,nleaves=1,*ilocal; 83 PetscSFNode *iremote = NULL; 84 PetscSF lsf; 85 PetscMPIInt rank; 86 87 PetscFunctionBegin; 88 nroots = 1; 89 nleaves = 1; 90 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr); 91 ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 92 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 93 ilocal[0] = rank; 94 iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 95 iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 96 97 ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr); 98 ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 99 ierr = PetscSFSetUp(lsf);CHKERRQ(ierr); 100 *out = lsf; 101 PetscFunctionReturn(0); 102 } 103 104 static PetscErrorCode PetscSFCreateEmbeddedSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf) 105 { 106 PetscErrorCode ierr; 107 PetscInt i,*tmproots,*ilocal,ndranks,ndiranks; 108 PetscSFNode *iremote; 109 PetscMPIInt nroots,*roots,nleaves,*leaves,rank; 110 MPI_Comm comm; 111 PetscSF_Basic *bas; 112 PetscSF esf; 113 114 PetscFunctionBegin; 115 ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr); 116 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 117 118 /* Uniq selected[] and store the result in roots[] */ 119 ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr); 120 ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr); 121 ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */ 122 if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots); 123 nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 124 ierr = PetscMalloc1(nselected,&roots);CHKERRQ(ierr); 125 for (i=0; i<nselected; i++) roots[i] = tmproots[i]; 126 ierr = PetscFree(tmproots);CHKERRQ(ierr); 127 128 /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 129 ierr = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr); 130 131 /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 132 ndranks = 0; 133 for (i=0; i<nleaves; i++) { 134 if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;} 135 } 136 ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr); 137 if (nleaves && leaves[0] < 0) leaves[0] = rank; 138 139 /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 140 ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr); 141 ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr); 142 for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */ 143 ilocal[i] = leaves[i]; 144 iremote[i].rank = leaves[i]; 145 iremote[i].index = leaves[i]; 146 } 147 ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr); 148 ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */ 149 ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 150 151 /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 152 ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr); 153 esf->nranks = nleaves; 154 esf->ndranks = ndranks; 155 esf->roffset[0] = 0; 156 for (i=0; i<nleaves; i++) { 157 esf->ranks[i] = leaves[i]; 158 esf->roffset[i+1] = i+1; 159 esf->rmine[i] = leaves[i]; 160 esf->rremote[i] = leaves[i]; 161 } 162 163 /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 164 bas = (PetscSF_Basic*)esf->data; 165 ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr); 166 ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr); 167 /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 168 ndiranks = 0; 169 for (i=0; i<nroots; i++) { 170 if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;} 171 } 172 ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr); 173 if (nroots && roots[0] < 0) roots[0] = rank; 174 175 bas->niranks = nroots; 176 bas->ndiranks = ndiranks; 177 bas->ioffset[0] = 0; 178 bas->itotal = nroots; 179 for (i=0; i<nroots; i++) { 180 bas->iranks[i] = roots[i]; 181 bas->ioffset[i+1] = i+1; 182 bas->irootloc[i] = roots[i]; 183 } 184 185 *newsf = esf; 186 PetscFunctionReturn(0); 187 } 188 189 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 190 { 191 PetscErrorCode ierr; 192 PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data; 193 194 PetscFunctionBegin; 195 /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 196 sf->ops->Destroy = PetscSFDestroy_Allgatherv; 197 sf->ops->Reset = PetscSFReset_Allgatherv; 198 sf->ops->BcastAndOpEnd = PetscSFBcastAndOpEnd_Allgatherv; 199 sf->ops->ReduceEnd = PetscSFReduceEnd_Allgatherv; 200 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 201 sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 202 203 /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 204 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 205 206 /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 207 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 208 209 /* Alltoall stuff */ 210 sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 211 sf->ops->BcastAndOpBegin = PetscSFBcastAndOpBegin_Alltoall; 212 sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 213 sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 214 sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Alltoall; 215 216 ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr); 217 sf->data = (void*)dat; 218 PetscFunctionReturn(0); 219 } 220 221 222