xref: /petsc/src/vec/is/sf/impls/basic/alltoall/sfalltoall.c (revision 1873fc9fedfc3bd8fc2f82ccd29ab8b44a80f1db)
1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>
4 
5 #define PetscSFPackGet_Alltoall PetscSFPackGet_Allgatherv
6 
7 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
8 typedef PetscSF_Allgatherv PetscSF_Alltoall;
9 
10 /*===================================================================================*/
11 /*              Implementations of SF public APIs                                    */
12 /*===================================================================================*/
13 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
14 {
15   PetscErrorCode ierr;
16   PetscInt       i;
17 
18   PetscFunctionBegin;
19   if (nroots)  *nroots  = sf->nroots;
20   if (nleaves) *nleaves = sf->nleaves;
21   if (ilocal)  *ilocal  = NULL; /* Contiguous local indices */
22   if (iremote) {
23     if (!sf->remote) {
24       ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr);
25       sf->remote_alloc = sf->remote;
26       for (i=0; i<sf->nleaves; i++) {
27         sf->remote[i].rank  = i;
28         sf->remote[i].index = i;
29       }
30     }
31     *iremote = sf->remote;
32   }
33   PetscFunctionReturn(0);
34 }
35 
36 static PetscErrorCode PetscSFBcastAndOpBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
37 {
38   PetscErrorCode       ierr;
39   PetscSFPack          link;
40   MPI_Comm             comm;
41   char                 *recvbuf;
42 
43   PetscFunctionBegin;
44   ierr = PetscSFPackGet_Alltoall(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr);
45   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
46 
47   if (op != MPIU_REPLACE) {
48     if (!link->leafbuf[leafmtype]) {ierr = PetscMallocWithMemType(leafmtype,sf->nleaves*link->unitbytes,(void**)&link->leafbuf[leafmtype]);CHKERRQ(ierr);}
49     recvbuf = link->leafbuf[leafmtype];
50   } else {
51     recvbuf = (char*)leafdata;
52   }
53   ierr = MPIU_Ialltoall(rootdata,1,unit,recvbuf,1,unit,comm,link->rootreqs[PETSCSF_ROOT2LEAF_BCAST][rootmtype]);CHKERRQ(ierr);
54   PetscFunctionReturn(0);
55 }
56 
57 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
58 {
59   PetscErrorCode       ierr;
60   PetscSFPack          link;
61   MPI_Comm             comm;
62   char                 *recvbuf;
63 
64   PetscFunctionBegin;
65   ierr = PetscSFPackGet_Alltoall(sf,unit,rootmtype,rootdata,leafmtype,leafdata,&link);CHKERRQ(ierr);
66   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
67 
68   if (op != MPIU_REPLACE) {
69     if (!link->rootbuf[rootmtype]) {ierr = PetscMallocWithMemType(rootmtype,sf->nroots*link->unitbytes,(void**)&link->rootbuf[rootmtype]);CHKERRQ(ierr);}
70     recvbuf = link->rootbuf[rootmtype];
71   } else {
72     recvbuf = (char*)rootdata;
73   }
74   ierr = MPIU_Ialltoall(leafdata,1,unit,recvbuf,1,unit,comm,link->rootreqs[PETSCSF_LEAF2ROOT_REDUCE][rootmtype]);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out)
79 {
80   PetscErrorCode ierr;
81   PetscInt       nroots=1,nleaves=1,*ilocal;
82   PetscSFNode    *iremote = NULL;
83   PetscSF        lsf;
84   PetscMPIInt    rank;
85 
86   PetscFunctionBegin;
87   nroots  = 1;
88   nleaves = 1;
89   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
90   ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
91   ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
92   ilocal[0]        = rank;
93   iremote[0].rank  = 0;    /* rank in PETSC_COMM_SELF */
94   iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */
95 
96   ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
97   ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
98   ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
99   *out = lsf;
100   PetscFunctionReturn(0);
101 }
102 
103 static PetscErrorCode PetscSFCreateEmbeddedSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
104 {
105   PetscErrorCode ierr;
106   PetscInt       i,*tmproots,*ilocal,ndranks,ndiranks;
107   PetscSFNode    *iremote;
108   PetscMPIInt    nroots,*roots,nleaves,*leaves,rank;
109   MPI_Comm       comm;
110   PetscSF_Basic  *bas;
111   PetscSF        esf;
112 
113   PetscFunctionBegin;
114   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
115   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
116 
117   /* Uniq selected[] and store the result in roots[] */
118   ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr);
119   ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr);
120   ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */
121   if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots);
122   nroots = nselected;   /* For Alltoall, we know root indices will not overflow MPI_INT */
123   ierr   = PetscMalloc1(nselected,&roots);CHKERRQ(ierr);
124   for (i=0; i<nselected; i++) roots[i] = tmproots[i];
125   ierr   = PetscFree(tmproots);CHKERRQ(ierr);
126 
127   /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
128   ierr   = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr);
129 
130   /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
131   ndranks = 0;
132   for (i=0; i<nleaves; i++) {
133     if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;}
134   }
135   ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr);
136   if (nleaves && leaves[0] < 0) leaves[0] = rank;
137 
138   /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
139   ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
140   ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
141   for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */
142     ilocal[i]        = leaves[i];
143     iremote[i].rank  = leaves[i];
144     iremote[i].index = leaves[i];
145   }
146   ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr);
147   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
148   ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
149 
150   /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
151   ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr);
152   esf->nranks     = nleaves;
153   esf->ndranks    = ndranks;
154   esf->roffset[0] = 0;
155   for (i=0; i<nleaves; i++) {
156     esf->ranks[i]     = leaves[i];
157     esf->roffset[i+1] = i+1;
158     esf->rmine[i]     = leaves[i];
159     esf->rremote[i]   = leaves[i];
160   }
161 
162   /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
163   bas  = (PetscSF_Basic*)esf->data;
164   ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr);
165   ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr);
166   /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
167   ndiranks = 0;
168   for (i=0; i<nroots; i++) {
169     if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;}
170   }
171   ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr);
172   if (nroots && roots[0] < 0) roots[0] = rank;
173 
174   bas->niranks    = nroots;
175   bas->ndiranks   = ndiranks;
176   bas->ioffset[0] = 0;
177   bas->itotal     = nroots;
178   for (i=0; i<nroots; i++) {
179     bas->iranks[i]    = roots[i];
180     bas->ioffset[i+1] = i+1;
181     bas->irootloc[i]  = roots[i];
182   }
183 
184   *newsf = esf;
185   PetscFunctionReturn(0);
186 }
187 
188 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
189 {
190   PetscErrorCode   ierr;
191   PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data;
192 
193   PetscFunctionBegin;
194   /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
195   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
196   sf->ops->Reset           = PetscSFReset_Allgatherv;
197   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Allgatherv;
198   sf->ops->ReduceEnd       = PetscSFReduceEnd_Allgatherv;
199   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
200   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;
201 
202   /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
203   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;
204 
205   /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
206   sf->ops->FetchAndOpBegin  = PetscSFFetchAndOpBegin_Gatherv;
207 
208   /* Alltoall stuff */
209   sf->ops->GetGraph         = PetscSFGetGraph_Alltoall;
210   sf->ops->BcastAndOpBegin  = PetscSFBcastAndOpBegin_Alltoall;
211   sf->ops->ReduceBegin      = PetscSFReduceBegin_Alltoall;
212   sf->ops->CreateLocalSF    = PetscSFCreateLocalSF_Alltoall;
213   sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Alltoall;
214 
215   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
216   sf->data = (void*)dat;
217   PetscFunctionReturn(0);
218 }
219 
220 
221