xref: /petsc/src/vec/is/sf/impls/basic/alltoall/sfalltoall.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>
4 
5 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
6 typedef PetscSF_Allgatherv PetscSF_Alltoall;
7 
8 /*===================================================================================*/
9 /*              Implementations of SF public APIs                                    */
10 /*===================================================================================*/
11 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
12 {
13   PetscErrorCode ierr;
14   PetscInt       i;
15 
16   PetscFunctionBegin;
17   if (nroots)  *nroots  = sf->nroots;
18   if (nleaves) *nleaves = sf->nleaves;
19   if (ilocal)  *ilocal  = NULL; /* Contiguous local indices */
20   if (iremote) {
21     if (!sf->remote) {
22       ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr);
23       sf->remote_alloc = sf->remote;
24       for (i=0; i<sf->nleaves; i++) {
25         sf->remote[i].rank  = i;
26         sf->remote[i].index = i;
27       }
28     }
29     *iremote = sf->remote;
30   }
31   PetscFunctionReturn(0);
32 }
33 
34 static PetscErrorCode PetscSFBcastAndOpBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
35 {
36   PetscErrorCode       ierr;
37   PetscSFLink          link;
38   MPI_Comm             comm;
39   void                 *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */
40   MPI_Request          *req;
41 
42   PetscFunctionBegin;
43   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);CHKERRQ(ierr);
44   ierr = PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);CHKERRQ(ierr);
45   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
46   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_ROOT2LEAF,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr);
47   ierr = MPIU_Ialltoall(rootbuf,1,unit,leafbuf,1,unit,comm,req);CHKERRQ(ierr);
48   PetscFunctionReturn(0);
49 }
50 
51 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
52 {
53   PetscErrorCode       ierr;
54   PetscSFLink          link;
55   MPI_Comm             comm;
56   void                 *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */
57   MPI_Request          *req;
58 
59   PetscFunctionBegin;
60   ierr = PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link);CHKERRQ(ierr);
61   ierr = PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);CHKERRQ(ierr);
62   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
63   ierr = PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2ROOT,&rootbuf,&leafbuf,&req,NULL);CHKERRQ(ierr);
64   ierr = MPIU_Ialltoall(leafbuf,1,unit,rootbuf,1,unit,comm,req);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out)
69 {
70   PetscErrorCode ierr;
71   PetscInt       nroots = 1,nleaves = 1,*ilocal;
72   PetscSFNode    *iremote = NULL;
73   PetscSF        lsf;
74   PetscMPIInt    rank;
75 
76   PetscFunctionBegin;
77   nroots  = 1;
78   nleaves = 1;
79   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRMPI(ierr);
80   ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
81   ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
82   ilocal[0]        = rank;
83   iremote[0].rank  = 0;    /* rank in PETSC_COMM_SELF */
84   iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */
85 
86   ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
87   ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
88   ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
89   *out = lsf;
90   PetscFunctionReturn(0);
91 }
92 
93 static PetscErrorCode PetscSFCreateEmbeddedSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
94 {
95   PetscErrorCode ierr;
96   PetscInt       i,*tmproots,*ilocal,ndranks,ndiranks;
97   PetscSFNode    *iremote;
98   PetscMPIInt    nroots,*roots,nleaves,*leaves,rank;
99   MPI_Comm       comm;
100   PetscSF_Basic  *bas;
101   PetscSF        esf;
102 
103   PetscFunctionBegin;
104   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
105   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
106 
107   /* Uniq selected[] and store the result in roots[] */
108   ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr);
109   ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr);
110   ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */
111   if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots);
112   nroots = nselected;   /* For Alltoall, we know root indices will not overflow MPI_INT */
113   ierr   = PetscMalloc1(nselected,&roots);CHKERRQ(ierr);
114   for (i=0; i<nselected; i++) roots[i] = tmproots[i];
115   ierr   = PetscFree(tmproots);CHKERRQ(ierr);
116 
117   /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
118   ierr   = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr);
119 
120   /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
121   ndranks = 0;
122   for (i=0; i<nleaves; i++) {
123     if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;}
124   }
125   ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr);
126   if (nleaves && leaves[0] < 0) leaves[0] = rank;
127 
128   /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
129   ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
130   ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
131   for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */
132     ilocal[i]        = leaves[i];
133     iremote[i].rank  = leaves[i];
134     iremote[i].index = leaves[i];
135   }
136   ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr);
137   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
138   ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
139 
140   /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
141   ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr);
142   esf->nranks     = nleaves;
143   esf->ndranks    = ndranks;
144   esf->roffset[0] = 0;
145   for (i=0; i<nleaves; i++) {
146     esf->ranks[i]     = leaves[i];
147     esf->roffset[i+1] = i+1;
148     esf->rmine[i]     = leaves[i];
149     esf->rremote[i]   = leaves[i];
150   }
151 
152   /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
153   bas  = (PetscSF_Basic*)esf->data;
154   ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr);
155   ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr);
156   /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
157   ndiranks = 0;
158   for (i=0; i<nroots; i++) {
159     if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;}
160   }
161   ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr);
162   if (nroots && roots[0] < 0) roots[0] = rank;
163 
164   bas->niranks    = nroots;
165   bas->ndiranks   = ndiranks;
166   bas->ioffset[0] = 0;
167   bas->itotal     = nroots;
168   for (i=0; i<nroots; i++) {
169     bas->iranks[i]    = roots[i];
170     bas->ioffset[i+1] = i+1;
171     bas->irootloc[i]  = roots[i];
172   }
173 
174   /* See PetscSFCreateEmbeddedSF_Basic */
175   esf->nleafreqs  = esf->nranks - esf->ndranks;
176   bas->nrootreqs  = bas->niranks - bas->ndiranks;
177   esf->persistent = PETSC_TRUE;
178   /* Setup packing related fields */
179   ierr = PetscSFSetUpPackFields(esf);CHKERRQ(ierr);
180 
181   esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
182   *newsf = esf;
183   PetscFunctionReturn(0);
184 }
185 
186 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
187 {
188   PetscErrorCode   ierr;
189   PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data;
190 
191   PetscFunctionBegin;
192   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Basic;
193   sf->ops->ReduceEnd       = PetscSFReduceEnd_Basic;
194 
195   /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
196   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
197   sf->ops->Reset           = PetscSFReset_Allgatherv;
198   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
199   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;
200 
201   /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
202   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;
203   sf->ops->SetUp           = PetscSFSetUp_Allgather;
204 
205   /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
206   sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;
207 
208   /* Alltoall stuff */
209   sf->ops->GetGraph         = PetscSFGetGraph_Alltoall;
210   sf->ops->BcastAndOpBegin  = PetscSFBcastAndOpBegin_Alltoall;
211   sf->ops->ReduceBegin      = PetscSFReduceBegin_Alltoall;
212   sf->ops->CreateLocalSF    = PetscSFCreateLocalSF_Alltoall;
213   sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Alltoall;
214 
215   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
216   sf->data = (void*)dat;
217   PetscFunctionReturn(0);
218 }
219 
220 
221