1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4 5 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6 typedef PetscSF_Allgatherv PetscSF_Alltoall; 7 8 /*===================================================================================*/ 9 /* Implementations of SF public APIs */ 10 /*===================================================================================*/ 11 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) 12 { 13 PetscInt i; 14 15 PetscFunctionBegin; 16 if (nroots) *nroots = sf->nroots; 17 if (nleaves) *nleaves = sf->nleaves; 18 if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 19 if (iremote) { 20 if (!sf->remote) { 21 PetscCall(PetscMalloc1(sf->nleaves, &sf->remote)); 22 sf->remote_alloc = sf->remote; 23 for (i = 0; i < sf->nleaves; i++) { 24 sf->remote[i].rank = i; 25 sf->remote[i].index = i; 26 } 27 } 28 *iremote = sf->remote; 29 } 30 PetscFunctionReturn(PETSC_SUCCESS); 31 } 32 33 static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) 34 { 35 PetscSFLink link; 36 MPI_Comm comm; 37 void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */ 38 MPI_Request *req; 39 40 PetscFunctionBegin; 41 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 42 PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 43 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 44 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 45 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL)); 46 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 47 PetscCallMPI(MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req)); 48 PetscFunctionReturn(PETSC_SUCCESS); 49 } 50 51 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) 52 { 53 PetscSFLink link; 54 MPI_Comm comm; 55 void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */ 56 MPI_Request *req; 57 58 PetscFunctionBegin; 59 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link)); 60 PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 61 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 62 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 63 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL)); 64 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT)); 65 PetscCallMPI(MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req)); 66 PetscFunctionReturn(PETSC_SUCCESS); 67 } 68 69 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out) 70 { 71 PetscInt nroots = 1, nleaves = 1, *ilocal; 72 PetscSFNode *iremote = NULL; 73 PetscSF lsf; 74 PetscMPIInt rank; 75 76 PetscFunctionBegin; 77 nroots = 1; 78 nleaves = 1; 79 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 80 PetscCall(PetscMalloc1(nleaves, &ilocal)); 81 PetscCall(PetscMalloc1(nleaves, &iremote)); 82 ilocal[0] = rank; 83 iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 84 iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 85 86 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 87 PetscCall(PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 88 PetscCall(PetscSFSetUp(lsf)); 89 *out = lsf; 90 PetscFunctionReturn(PETSC_SUCCESS); 91 } 92 93 static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) 94 { 95 PetscInt i, *tmproots, *ilocal, ndranks, ndiranks; 96 PetscSFNode *iremote; 97 PetscMPIInt nroots, *roots, nleaves, *leaves, rank; 98 MPI_Comm comm; 99 PetscSF_Basic *bas; 100 PetscSF esf; 101 102 PetscFunctionBegin; 103 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 104 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 105 106 /* Uniq selected[] and store the result in roots[] */ 107 PetscCall(PetscMalloc1(nselected, &tmproots)); 108 PetscCall(PetscArraycpy(tmproots, selected, nselected)); 109 PetscCall(PetscSortRemoveDupsInt(&nselected, tmproots)); /* nselected might be changed */ 110 PetscCheck(tmproots[0] >= 0 && tmproots[nselected - 1] < sf->nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", tmproots[0], tmproots[nselected - 1], sf->nroots); 111 nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 112 PetscCall(PetscMalloc1(nselected, &roots)); 113 for (i = 0; i < nselected; i++) roots[i] = tmproots[i]; 114 PetscCall(PetscFree(tmproots)); 115 116 /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 117 PetscCall(PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/)); 118 119 /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 120 ndranks = 0; 121 for (i = 0; i < nleaves; i++) { 122 if (leaves[i] == rank) { 123 leaves[i] = -rank; 124 ndranks = 1; 125 break; 126 } 127 } 128 PetscCall(PetscSortMPIInt(nleaves, leaves)); 129 if (nleaves && leaves[0] < 0) leaves[0] = rank; 130 131 /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 132 PetscCall(PetscMalloc1(nleaves, &ilocal)); 133 PetscCall(PetscMalloc1(nleaves, &iremote)); 134 for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */ 135 ilocal[i] = leaves[i]; 136 iremote[i].rank = leaves[i]; 137 iremote[i].index = leaves[i]; 138 } 139 PetscCall(PetscSFCreate(comm, &esf)); 140 PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 141 PetscCall(PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 142 143 /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 144 PetscCall(PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote)); 145 esf->nranks = nleaves; 146 esf->ndranks = ndranks; 147 esf->roffset[0] = 0; 148 for (i = 0; i < nleaves; i++) { 149 esf->ranks[i] = leaves[i]; 150 esf->roffset[i + 1] = i + 1; 151 esf->rmine[i] = leaves[i]; 152 esf->rremote[i] = leaves[i]; 153 } 154 155 /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 156 bas = (PetscSF_Basic *)esf->data; 157 PetscCall(PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset)); 158 PetscCall(PetscMalloc1(nroots, &bas->irootloc)); 159 /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 160 ndiranks = 0; 161 for (i = 0; i < nroots; i++) { 162 if (roots[i] == rank) { 163 roots[i] = -rank; 164 ndiranks = 1; 165 break; 166 } 167 } 168 PetscCall(PetscSortMPIInt(nroots, roots)); 169 if (nroots && roots[0] < 0) roots[0] = rank; 170 171 bas->niranks = nroots; 172 bas->ndiranks = ndiranks; 173 bas->ioffset[0] = 0; 174 bas->itotal = nroots; 175 for (i = 0; i < nroots; i++) { 176 bas->iranks[i] = roots[i]; 177 bas->ioffset[i + 1] = i + 1; 178 bas->irootloc[i] = roots[i]; 179 } 180 181 /* See PetscSFCreateEmbeddedRootSF_Basic */ 182 esf->nleafreqs = esf->nranks - esf->ndranks; 183 bas->nrootreqs = bas->niranks - bas->ndiranks; 184 esf->persistent = PETSC_TRUE; 185 /* Setup packing related fields */ 186 PetscCall(PetscSFSetUpPackFields(esf)); 187 188 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 189 *newsf = esf; 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) 194 { 195 PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data; 196 197 PetscFunctionBegin; 198 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 199 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 200 201 /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 202 sf->ops->Destroy = PetscSFDestroy_Allgatherv; 203 sf->ops->Reset = PetscSFReset_Allgatherv; 204 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 205 sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 206 207 /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 208 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 209 sf->ops->SetUp = PetscSFSetUp_Allgather; 210 211 /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 212 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 213 214 /* Alltoall stuff */ 215 sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 216 sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall; 217 sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 218 sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 219 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 220 221 PetscCall(PetscNew(&dat)); 222 sf->data = (void *)dat; 223 PetscFunctionReturn(PETSC_SUCCESS); 224 } 225