xref: /petsc/src/vec/is/sf/impls/basic/alltoall/sfalltoall.c (revision be37439ebbbdb2f81c3420c175a94aa72e59929c)
1dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
3dd5b3ca6SJunchao Zhang #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>
4dd5b3ca6SJunchao Zhang 
5dd5b3ca6SJunchao Zhang /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
6dd5b3ca6SJunchao Zhang typedef PetscSF_Allgatherv PetscSF_Alltoall;
7dd5b3ca6SJunchao Zhang 
PetscSFLinkStartCommunication_Alltoall(PetscSF sf,PetscSFLink link,PetscSFDirection direction)8f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFLinkStartCommunication_Alltoall(PetscSF sf, PetscSFLink link, PetscSFDirection direction)
9f5d27ee7SJunchao Zhang {
10f5d27ee7SJunchao Zhang   MPI_Comm     comm    = MPI_COMM_NULL;
11f5d27ee7SJunchao Zhang   void        *rootbuf = NULL, *leafbuf = NULL;
12f5d27ee7SJunchao Zhang   MPI_Request *req  = NULL;
13f5d27ee7SJunchao Zhang   MPI_Datatype unit = link->unit;
14f5d27ee7SJunchao Zhang 
15f5d27ee7SJunchao Zhang   PetscFunctionBegin;
16f5d27ee7SJunchao Zhang   if (direction == PETSCSF_ROOT2LEAF) {
17f5d27ee7SJunchao Zhang     PetscCall(PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host before sending */));
18f5d27ee7SJunchao Zhang   } else {
19f5d27ee7SJunchao Zhang     PetscCall(PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf, link, PETSC_TRUE /* device2host */));
20f5d27ee7SJunchao Zhang   }
21f5d27ee7SJunchao Zhang   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
22f5d27ee7SJunchao Zhang   PetscCall(PetscSFLinkGetMPIBuffersAndRequests(sf, link, direction, &rootbuf, &leafbuf, &req, NULL));
23646b835dSJunchao Zhang   PetscCall(PetscSFLinkSyncStreamBeforeCallMPI(sf, link));
24f5d27ee7SJunchao Zhang 
25f5d27ee7SJunchao Zhang   if (direction == PETSCSF_ROOT2LEAF) {
26f5d27ee7SJunchao Zhang     PetscCallMPI(MPIU_Ialltoall(rootbuf, 1, unit, leafbuf, 1, unit, comm, req));
27f5d27ee7SJunchao Zhang   } else {
28f5d27ee7SJunchao Zhang     PetscCallMPI(MPIU_Ialltoall(leafbuf, 1, unit, rootbuf, 1, unit, comm, req));
29f5d27ee7SJunchao Zhang   }
30f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
31f5d27ee7SJunchao Zhang }
32f5d27ee7SJunchao Zhang 
PetscSFSetCommunicationOps_Alltoall(PetscSF sf,PetscSFLink link)33f5d27ee7SJunchao Zhang static PetscErrorCode PetscSFSetCommunicationOps_Alltoall(PetscSF sf, PetscSFLink link)
34f5d27ee7SJunchao Zhang {
35f5d27ee7SJunchao Zhang   PetscFunctionBegin;
36f5d27ee7SJunchao Zhang   link->StartCommunication = PetscSFLinkStartCommunication_Alltoall;
37f5d27ee7SJunchao Zhang   PetscFunctionReturn(PETSC_SUCCESS);
38f5d27ee7SJunchao Zhang }
39f5d27ee7SJunchao Zhang 
40dd5b3ca6SJunchao Zhang /*===================================================================================*/
41dd5b3ca6SJunchao Zhang /*              Implementations of SF public APIs                                    */
42dd5b3ca6SJunchao Zhang /*===================================================================================*/
PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt * nroots,PetscInt * nleaves,const PetscInt ** ilocal,const PetscSFNode ** iremote)43d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf, PetscInt *nroots, PetscInt *nleaves, const PetscInt **ilocal, const PetscSFNode **iremote)
44d71ae5a4SJacob Faibussowitsch {
45dd5b3ca6SJunchao Zhang   PetscInt i;
46dd5b3ca6SJunchao Zhang 
47dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
48dd5b3ca6SJunchao Zhang   if (nroots) *nroots = sf->nroots;
49dd5b3ca6SJunchao Zhang   if (nleaves) *nleaves = sf->nleaves;
50dd5b3ca6SJunchao Zhang   if (ilocal) *ilocal = NULL; /* Contiguous local indices */
51dd5b3ca6SJunchao Zhang   if (iremote) {
52dd5b3ca6SJunchao Zhang     if (!sf->remote) {
539566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(sf->nleaves, &sf->remote));
54dd5b3ca6SJunchao Zhang       sf->remote_alloc = sf->remote;
55dd5b3ca6SJunchao Zhang       for (i = 0; i < sf->nleaves; i++) {
56*835f2295SStefano Zampini         sf->remote[i].rank  = i; /* this is nonsense, cannot be larger than size */
57dd5b3ca6SJunchao Zhang         sf->remote[i].index = i;
58dd5b3ca6SJunchao Zhang       }
59dd5b3ca6SJunchao Zhang     }
60dd5b3ca6SJunchao Zhang     *iremote = sf->remote;
61dd5b3ca6SJunchao Zhang   }
623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
63dd5b3ca6SJunchao Zhang }
64dd5b3ca6SJunchao Zhang 
PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF * out)65d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf, PetscSF *out)
66d71ae5a4SJacob Faibussowitsch {
67dd5b3ca6SJunchao Zhang   PetscInt     nroots = 1, nleaves = 1, *ilocal;
68dd5b3ca6SJunchao Zhang   PetscSFNode *iremote = NULL;
69dd5b3ca6SJunchao Zhang   PetscSF      lsf;
70dd5b3ca6SJunchao Zhang   PetscMPIInt  rank;
71dd5b3ca6SJunchao Zhang 
72dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
73dd5b3ca6SJunchao Zhang   nroots  = 1;
74dd5b3ca6SJunchao Zhang   nleaves = 1;
759566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sf), &rank));
769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nleaves, &ilocal));
779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nleaves, &iremote));
78dd5b3ca6SJunchao Zhang   ilocal[0]        = rank;
79dd5b3ca6SJunchao Zhang   iremote[0].rank  = 0;    /* rank in PETSC_COMM_SELF */
80dd5b3ca6SJunchao Zhang   iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */
81dd5b3ca6SJunchao Zhang 
829566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PETSC_COMM_SELF, &lsf));
839566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(lsf, nroots, nleaves, NULL /*contiguous leaves*/, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
849566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUp(lsf));
85dd5b3ca6SJunchao Zhang   *out = lsf;
863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
87dd5b3ca6SJunchao Zhang }
88dd5b3ca6SJunchao Zhang 
PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt * selected,PetscSF * newsf)89d71ae5a4SJacob Faibussowitsch static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf, PetscInt nselected, const PetscInt *selected, PetscSF *newsf)
90d71ae5a4SJacob Faibussowitsch {
916497c311SBarry Smith   PetscInt       i, *tmproots, *ilocal;
92dd5b3ca6SJunchao Zhang   PetscSFNode   *iremote;
936497c311SBarry Smith   PetscMPIInt    nroots, *roots, nleaves, *leaves, rank, ndiranks, ndranks;
94dd5b3ca6SJunchao Zhang   MPI_Comm       comm;
95dd5b3ca6SJunchao Zhang   PetscSF_Basic *bas;
96dd5b3ca6SJunchao Zhang   PetscSF        esf;
97dd5b3ca6SJunchao Zhang 
98dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
999566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)sf, &comm));
1009566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
101dd5b3ca6SJunchao Zhang 
102dd5b3ca6SJunchao Zhang   /* Uniq selected[] and store the result in roots[] */
1039566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &tmproots));
1049566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(tmproots, selected, nselected));
1059566063dSJacob Faibussowitsch   PetscCall(PetscSortRemoveDupsInt(&nselected, tmproots)); /* nselected might be changed */
106c9cc58a2SBarry Smith   PetscCheck(tmproots[0] >= 0 && tmproots[nselected - 1] < sf->nroots, comm, PETSC_ERR_ARG_OUTOFRANGE, "Min/Max root indices %" PetscInt_FMT "/%" PetscInt_FMT " are not in [0,%" PetscInt_FMT ")", tmproots[0], tmproots[nselected - 1], sf->nroots);
1076497c311SBarry Smith   PetscCall(PetscMPIIntCast(nselected, &nroots));
1089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nselected, &roots));
1096497c311SBarry Smith   for (PetscMPIInt i = 0; i < nroots; i++) PetscCall(PetscMPIIntCast(tmproots[i], &roots[i]));
1109566063dSJacob Faibussowitsch   PetscCall(PetscFree(tmproots));
111dd5b3ca6SJunchao Zhang 
112dd5b3ca6SJunchao Zhang   /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
1139566063dSJacob Faibussowitsch   PetscCall(PetscCommBuildTwoSided(comm, 0 /*empty msg*/, MPI_INT /*fake*/, nroots, roots, NULL /*todata*/, &nleaves, &leaves, NULL /*fromdata*/));
114dd5b3ca6SJunchao Zhang 
115dd5b3ca6SJunchao Zhang   /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
116dd5b3ca6SJunchao Zhang   ndranks = 0;
117dd5b3ca6SJunchao Zhang   for (i = 0; i < nleaves; i++) {
1189371c9d4SSatish Balay     if (leaves[i] == rank) {
1199371c9d4SSatish Balay       leaves[i] = -rank;
1209371c9d4SSatish Balay       ndranks   = 1;
1219371c9d4SSatish Balay       break;
1229371c9d4SSatish Balay     }
123dd5b3ca6SJunchao Zhang   }
1249566063dSJacob Faibussowitsch   PetscCall(PetscSortMPIInt(nleaves, leaves));
125dd5b3ca6SJunchao Zhang   if (nleaves && leaves[0] < 0) leaves[0] = rank;
126dd5b3ca6SJunchao Zhang 
127dd5b3ca6SJunchao Zhang   /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
1289566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nleaves, &ilocal));
1299566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nleaves, &iremote));
130dd5b3ca6SJunchao Zhang   for (i = 0; i < nleaves; i++) { /* 1:1 map from roots to leaves */
131dd5b3ca6SJunchao Zhang     ilocal[i]        = leaves[i];
132dd5b3ca6SJunchao Zhang     iremote[i].rank  = leaves[i];
133dd5b3ca6SJunchao Zhang     iremote[i].index = leaves[i];
134dd5b3ca6SJunchao Zhang   }
1359566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(comm, &esf));
1369566063dSJacob Faibussowitsch   PetscCall(PetscSFSetType(esf, PETSCSFBASIC)); /* This optimized routine can only create a basic sf */
1379566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(esf, sf->nleaves, nleaves, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
138dd5b3ca6SJunchao Zhang 
139dd5b3ca6SJunchao Zhang   /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
1409566063dSJacob Faibussowitsch   PetscCall(PetscMalloc4(nleaves, &esf->ranks, nleaves + 1, &esf->roffset, nleaves, &esf->rmine, nleaves, &esf->rremote));
141dd5b3ca6SJunchao Zhang   esf->nranks     = nleaves;
142dd5b3ca6SJunchao Zhang   esf->ndranks    = ndranks;
143dd5b3ca6SJunchao Zhang   esf->roffset[0] = 0;
144dd5b3ca6SJunchao Zhang   for (i = 0; i < nleaves; i++) {
145dd5b3ca6SJunchao Zhang     esf->ranks[i]       = leaves[i];
146dd5b3ca6SJunchao Zhang     esf->roffset[i + 1] = i + 1;
147dd5b3ca6SJunchao Zhang     esf->rmine[i]       = leaves[i];
148dd5b3ca6SJunchao Zhang     esf->rremote[i]     = leaves[i];
149dd5b3ca6SJunchao Zhang   }
150dd5b3ca6SJunchao Zhang 
151dd5b3ca6SJunchao Zhang   /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
152dd5b3ca6SJunchao Zhang   bas = (PetscSF_Basic *)esf->data;
1539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(nroots, &bas->iranks, nroots + 1, &bas->ioffset));
1549566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nroots, &bas->irootloc));
155dd5b3ca6SJunchao Zhang   /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
156dd5b3ca6SJunchao Zhang   ndiranks = 0;
157dd5b3ca6SJunchao Zhang   for (i = 0; i < nroots; i++) {
1589371c9d4SSatish Balay     if (roots[i] == rank) {
1599371c9d4SSatish Balay       roots[i] = -rank;
1609371c9d4SSatish Balay       ndiranks = 1;
1619371c9d4SSatish Balay       break;
1629371c9d4SSatish Balay     }
163dd5b3ca6SJunchao Zhang   }
1649566063dSJacob Faibussowitsch   PetscCall(PetscSortMPIInt(nroots, roots));
165dd5b3ca6SJunchao Zhang   if (nroots && roots[0] < 0) roots[0] = rank;
166dd5b3ca6SJunchao Zhang 
167dd5b3ca6SJunchao Zhang   bas->niranks    = nroots;
168dd5b3ca6SJunchao Zhang   bas->ndiranks   = ndiranks;
169dd5b3ca6SJunchao Zhang   bas->ioffset[0] = 0;
170dd5b3ca6SJunchao Zhang   bas->itotal     = nroots;
171dd5b3ca6SJunchao Zhang   for (i = 0; i < nroots; i++) {
172dd5b3ca6SJunchao Zhang     bas->iranks[i]      = roots[i];
173dd5b3ca6SJunchao Zhang     bas->ioffset[i + 1] = i + 1;
174dd5b3ca6SJunchao Zhang     bas->irootloc[i]    = roots[i];
175dd5b3ca6SJunchao Zhang   }
176dd5b3ca6SJunchao Zhang 
17772502a1fSJunchao Zhang   /* See PetscSFCreateEmbeddedRootSF_Basic */
178cd620004SJunchao Zhang   esf->nleafreqs  = esf->nranks - esf->ndranks;
179cd620004SJunchao Zhang   bas->nrootreqs  = bas->niranks - bas->ndiranks;
180cd620004SJunchao Zhang   esf->persistent = PETSC_TRUE;
181cd620004SJunchao Zhang   /* Setup packing related fields */
1829566063dSJacob Faibussowitsch   PetscCall(PetscSFSetUpPackFields(esf));
183cd620004SJunchao Zhang 
184cd620004SJunchao Zhang   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
185dd5b3ca6SJunchao Zhang   *newsf           = esf;
1863ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
187dd5b3ca6SJunchao Zhang }
188dd5b3ca6SJunchao Zhang 
PetscSFCreate_Alltoall(PetscSF sf)189d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
190d71ae5a4SJacob Faibussowitsch {
191dd5b3ca6SJunchao Zhang   PetscSF_Alltoall *dat = (PetscSF_Alltoall *)sf->data;
192dd5b3ca6SJunchao Zhang 
193dd5b3ca6SJunchao Zhang   PetscFunctionBegin;
194f5d27ee7SJunchao Zhang   sf->ops->BcastBegin  = PetscSFBcastBegin_Basic;
195ad227feaSJunchao Zhang   sf->ops->BcastEnd    = PetscSFBcastEnd_Basic;
196f5d27ee7SJunchao Zhang   sf->ops->ReduceBegin = PetscSFReduceBegin_Basic;
197cd620004SJunchao Zhang   sf->ops->ReduceEnd   = PetscSFReduceEnd_Basic;
198cd620004SJunchao Zhang 
199dd5b3ca6SJunchao Zhang   /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
200dd5b3ca6SJunchao Zhang   sf->ops->Destroy       = PetscSFDestroy_Allgatherv;
201dd5b3ca6SJunchao Zhang   sf->ops->Reset         = PetscSFReset_Allgatherv;
202dd5b3ca6SJunchao Zhang   sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv;
203dd5b3ca6SJunchao Zhang   sf->ops->GetRootRanks  = PetscSFGetRootRanks_Allgatherv;
204dd5b3ca6SJunchao Zhang 
205dd5b3ca6SJunchao Zhang   /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
206dd5b3ca6SJunchao Zhang   sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv;
207cd620004SJunchao Zhang   sf->ops->SetUp        = PetscSFSetUp_Allgather;
208dd5b3ca6SJunchao Zhang 
209dd5b3ca6SJunchao Zhang   /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
210dd5b3ca6SJunchao Zhang   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;
211dd5b3ca6SJunchao Zhang 
212dd5b3ca6SJunchao Zhang   /* Alltoall stuff */
213dd5b3ca6SJunchao Zhang   sf->ops->GetGraph             = PetscSFGetGraph_Alltoall;
214dd5b3ca6SJunchao Zhang   sf->ops->CreateLocalSF        = PetscSFCreateLocalSF_Alltoall;
21572502a1fSJunchao Zhang   sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall;
216dd5b3ca6SJunchao Zhang 
217f5d27ee7SJunchao Zhang   sf->ops->SetCommunicationOps = PetscSFSetCommunicationOps_Alltoall;
218f5d27ee7SJunchao Zhang 
2196677b1c1SJunchao Zhang   sf->collective = PETSC_TRUE;
2206677b1c1SJunchao Zhang 
2214dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&dat));
222dd5b3ca6SJunchao Zhang   sf->data = (void *)dat;
2233ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
224dd5b3ca6SJunchao Zhang }
225