1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h> 2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h> 3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h> 4 5 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */ 6 typedef PetscSF_Allgatherv PetscSF_Alltoall; 7 8 /*===================================================================================*/ 9 /* Implementations of SF public APIs */ 10 /*===================================================================================*/ 11 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote) { 12 PetscInt i; 13 14 PetscFunctionBegin; 15 if (nroots) *nroots = sf->nroots; 16 if (nleaves) *nleaves = sf->nleaves; 17 if (ilocal) *ilocal = NULL; /* Contiguous local indices */ 18 if (iremote) { 19 if (!sf->remote) { 20 PetscCall(PetscMalloc1(sf->nleaves, &sf->remote)); 21 sf->remote_alloc = sf->remote; 22 for (i = 0; i < sf->nleaves; i++) { 23 sf->remote[i].rank = i; 24 sf->remote[i].index = i; 25 } 26 } 27 *iremote = sf->remote; 28 } 29 PetscFunctionReturn(0); 30 } 31 32 static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType rootmtype, const void *rootdata, PetscMemType leafmtype, void *leafdata, MPI_Op op) { 33 PetscSFLink link; 34 MPI_Comm comm; 35 void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */ 36 MPI_Request *req; 37 38 PetscFunctionBegin; 39 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_BCAST, &link)); 40 PetscCall(PetscSFLinkPackRootData(sf, link, PETSCSF_REMOTE, rootdata)); 41 PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 42 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 43 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_ROOT2LEAF, &rootbuf, &leafbuf, &req, NULL)); 44 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_ROOT2LEAF)); 45 PetscCallMPI(MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req)); 46 PetscFunctionReturn(0); 47 } 48 49 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf, MPI_Datatype unit, PetscMemType leafmtype, const void *leafdata, PetscMemType rootmtype, void *rootdata, MPI_Op op) { 50 PetscSFLink link; 51 MPI_Comm comm; 52 void *rootbuf = NULL, *leafbuf = NULL; /* buffer used by MPI */ 53 MPI_Request *req; 54 55 PetscFunctionBegin; 56 PetscCall(PetscSFLinkCreate(sf, unit, rootmtype, rootdata, leafmtype, leafdata, op, PETSCSF_REDUCE, &link)); 57 PetscCall(PetscSFLinkPackLeafData(sf, link, PETSCSF_REMOTE, leafdata)); 58 PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */)); 59 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 60 PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, PETSCSF_LEAF2ROOT, &rootbuf, &leafbuf, &req, NULL)); 61 PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link, PETSCSF_LEAF2ROOT)); 62 PetscCallMPI(MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req)); 63 PetscFunctionReturn(0); 64 } 65 66 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out) { 67 PetscInt nroots = 1, nleaves = 1, *ilocal; 68 PetscSFNode *iremote = NULL; 69 PetscSF lsf; 70 PetscMPIInt rank; 71 72 PetscFunctionBegin; 73 nroots = 1; 74 nleaves = 1; 75 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank)); 76 PetscCall(PetscMalloc1(nleaves, &ilocal)); 77 PetscCall(PetscMalloc1(nleaves, &iremote)); 78 ilocal[0] = rank; 79 iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */ 80 iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */ 81 82 PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf)); 83 PetscCall(PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 84 PetscCall(PetscSFSetUp(lsf)); 85 *out = lsf; 86 PetscFunctionReturn(0); 87 } 88 89 static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf) { 90 PetscInt i, *tmproots, *ilocal, ndranks, ndiranks; 91 PetscSFNode *iremote; 92 PetscMPIInt nroots, *roots, nleaves, *leaves, rank; 93 MPI_Comm comm; 94 PetscSF_Basic *bas; 95 PetscSF esf; 96 97 PetscFunctionBegin; 98 PetscCall(PetscObjectGetComm((PetscObject)sf, &comm)); 99 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 100 101 /* Uniq selected[] and store the result in roots[] */ 102 PetscCall(PetscMalloc1(nselected, &tmproots)); 103 PetscCall(PetscArraycpy(tmproots, selected, nselected)); 104 PetscCall(PetscSortRemoveDupsInt(&nselected, tmproots)); /* nselected might be changed */ 105 PetscCheck(tmproots[0] >= 0 && tmproots[nselected - 1] < sf->nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", tmproots[0], tmproots[nselected - 1], sf->nroots); 106 nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */ 107 PetscCall(PetscMalloc1(nselected, &roots)); 108 for (i = 0; i < nselected; i++) roots[i] = tmproots[i]; 109 PetscCall(PetscFree(tmproots)); 110 111 /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */ 112 PetscCall(PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/)); 113 114 /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */ 115 ndranks = 0; 116 for (i = 0; i < nleaves; i++) { 117 if (leaves[i] == rank) { 118 leaves[i] = -rank; 119 ndranks = 1; 120 break; 121 } 122 } 123 PetscCall(PetscSortMPIInt(nleaves, leaves)); 124 if (nleaves && leaves[0] < 0) leaves[0] = rank; 125 126 /* Build esf and fill its fields manually (without calling PetscSFSetUp) */ 127 PetscCall(PetscMalloc1(nleaves, &ilocal)); 128 PetscCall(PetscMalloc1(nleaves, &iremote)); 129 for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */ 130 ilocal[i] = leaves[i]; 131 iremote[i].rank = leaves[i]; 132 iremote[i].index = leaves[i]; 133 } 134 PetscCall(PetscSFCreate(comm, &esf)); 135 PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */ 136 PetscCall(PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 137 138 /* As if we are calling PetscSFSetUpRanks(esf,self's group) */ 139 PetscCall(PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote)); 140 esf->nranks = nleaves; 141 esf->ndranks = ndranks; 142 esf->roffset[0] = 0; 143 for (i = 0; i < nleaves; i++) { 144 esf->ranks[i] = leaves[i]; 145 esf->roffset[i + 1] = i + 1; 146 esf->rmine[i] = leaves[i]; 147 esf->rremote[i] = leaves[i]; 148 } 149 150 /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */ 151 bas = (PetscSF_Basic *)esf->data; 152 PetscCall(PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset)); 153 PetscCall(PetscMalloc1(nroots, &bas->irootloc)); 154 /* Move myself ahead if rank is in roots[], since I am a distinguished irank */ 155 ndiranks = 0; 156 for (i = 0; i < nroots; i++) { 157 if (roots[i] == rank) { 158 roots[i] = -rank; 159 ndiranks = 1; 160 break; 161 } 162 } 163 PetscCall(PetscSortMPIInt(nroots, roots)); 164 if (nroots && roots[0] < 0) roots[0] = rank; 165 166 bas->niranks = nroots; 167 bas->ndiranks = ndiranks; 168 bas->ioffset[0] = 0; 169 bas->itotal = nroots; 170 for (i = 0; i < nroots; i++) { 171 bas->iranks[i] = roots[i]; 172 bas->ioffset[i + 1] = i + 1; 173 bas->irootloc[i] = roots[i]; 174 } 175 176 /* See PetscSFCreateEmbeddedRootSF_Basic */ 177 esf->nleafreqs = esf->nranks - esf->ndranks; 178 bas->nrootreqs = bas->niranks - bas->ndiranks; 179 esf->persistent = PETSC_TRUE; 180 /* Setup packing related fields */ 181 PetscCall(PetscSFSetUpPackFields(esf)); 182 183 esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */ 184 *newsf = esf; 185 PetscFunctionReturn(0); 186 } 187 188 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf) { 189 PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data; 190 191 PetscFunctionBegin; 192 sf->ops->BcastEnd = PetscSFBcastEnd_Basic; 193 sf->ops->ReduceEnd = PetscSFReduceEnd_Basic; 194 195 /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */ 196 sf->ops->Destroy = PetscSFDestroy_Allgatherv; 197 sf->ops->Reset = PetscSFReset_Allgatherv; 198 sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv; 199 sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv; 200 201 /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */ 202 sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv; 203 sf->ops->SetUp = PetscSFSetUp_Allgather; 204 205 /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */ 206 sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv; 207 208 /* Alltoall stuff */ 209 sf->ops->GetGraph = PetscSFGetGraph_Alltoall; 210 sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall; 211 sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall; 212 sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall; 213 sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall; 214 215 PetscCall(PetscNewLog(sf, &dat)); 216 sf->data = (void *)dat; 217 PetscFunctionReturn(0); 218 } 219