xref: /petsc/src/vec/is/sf/impls/basic/alltoall/sfalltoall.c (revision 58c0e5077dcf40d6a880c19c87f1075ae1d22c8e)
1 #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2 #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
3 #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>
4 
5 typedef PetscSFPack_Allgatherv PetscSFPack_Alltoall;
6 #define PetscSFPackGet_Alltoall PetscSFPackGet_Allgatherv
7 
8 /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
9 typedef PetscSF_Allgatherv PetscSF_Alltoall;
10 
11 /*===================================================================================*/
12 /*              Implementations of SF public APIs                                    */
13 /*===================================================================================*/
14 static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
15 {
16   PetscErrorCode ierr;
17   PetscInt       i;
18 
19   PetscFunctionBegin;
20   if (nroots)  *nroots  = sf->nroots;
21   if (nleaves) *nleaves = sf->nleaves;
22   if (ilocal)  *ilocal  = NULL; /* Contiguous local indices */
23   if (iremote) {
24     if (!sf->remote) {
25       ierr = PetscMalloc1(sf->nleaves,&sf->remote);CHKERRQ(ierr);
26       sf->remote_alloc = sf->remote;
27       for (i=0; i<sf->nleaves; i++) {
28         sf->remote[i].rank  = i;
29         sf->remote[i].index = i;
30       }
31     }
32     *iremote = sf->remote;
33   }
34   PetscFunctionReturn(0);
35 }
36 
37 static PetscErrorCode PetscSFBcastAndOpBegin_Alltoall(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
38 {
39   PetscErrorCode       ierr;
40   PetscSFPack_Alltoall link;
41   MPI_Comm             comm;
42   void                 *recvbuf;
43 
44   PetscFunctionBegin;
45   ierr = PetscSFPackGet_Alltoall(sf,unit,rootdata,leafdata,&link);CHKERRQ(ierr);
46   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
47 
48   if (op != MPIU_REPLACE) {
49     if (!link->leaf) {ierr = PetscMalloc(sf->nleaves*link->unitbytes,&link->leaf);CHKERRQ(ierr);}
50     recvbuf = link->leaf;
51   } else {
52     recvbuf = leafdata;
53   }
54   ierr = MPIU_Ialltoall(rootdata,1,unit,recvbuf,1,unit,comm,&link->request);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
59 {
60   PetscErrorCode       ierr;
61   PetscSFPack_Alltoall link;
62   MPI_Comm             comm;
63   void                 *recvbuf;
64 
65   PetscFunctionBegin;
66   ierr = PetscSFPackGet_Alltoall(sf,unit,rootdata,leafdata,&link);CHKERRQ(ierr);
67   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
68 
69   if (op != MPIU_REPLACE) {
70     if (!link->root) {ierr = PetscMalloc(sf->nroots*link->unitbytes,&link->root);CHKERRQ(ierr);}
71     recvbuf = link->root;
72   } else {
73     recvbuf = rootdata;
74   }
75   ierr = MPIU_Ialltoall(leafdata,1,unit,recvbuf,1,unit,comm,&link->request);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out)
80 {
81   PetscErrorCode ierr;
82   PetscInt       nroots=1,nleaves=1,*ilocal;
83   PetscSFNode    *iremote = NULL;
84   PetscSF        lsf;
85   PetscMPIInt    rank;
86 
87   PetscFunctionBegin;
88   nroots  = 1;
89   nleaves = 1;
90   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);CHKERRQ(ierr);
91   ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
92   ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
93   ilocal[0]        = rank;
94   iremote[0].rank  = 0;    /* rank in PETSC_COMM_SELF */
95   iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */
96 
97   ierr = PetscSFCreate(PETSC_COMM_SELF,&lsf);CHKERRQ(ierr);
98   ierr = PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
99   ierr = PetscSFSetUp(lsf);CHKERRQ(ierr);
100   *out = lsf;
101   PetscFunctionReturn(0);
102 }
103 
104 static PetscErrorCode PetscSFCreateEmbeddedSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
105 {
106   PetscErrorCode ierr;
107   PetscInt       i,*tmproots,*ilocal,ndranks,ndiranks;
108   PetscSFNode    *iremote;
109   PetscMPIInt    nroots,*roots,nleaves,*leaves,rank;
110   MPI_Comm       comm;
111   PetscSF_Basic  *bas;
112   PetscSF        esf;
113 
114   PetscFunctionBegin;
115   ierr = PetscObjectGetComm((PetscObject)sf,&comm);CHKERRQ(ierr);
116   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
117 
118   /* Uniq selected[] and store the result in roots[] */
119   ierr = PetscMalloc1(nselected,&tmproots);CHKERRQ(ierr);
120   ierr = PetscArraycpy(tmproots,selected,nselected);CHKERRQ(ierr);
121   ierr = PetscSortRemoveDupsInt(&nselected,tmproots);CHKERRQ(ierr); /* nselected might be changed */
122   if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots);
123   nroots = nselected;   /* For Alltoall, we know root indices will not overflow MPI_INT */
124   ierr   = PetscMalloc1(nselected,&roots);CHKERRQ(ierr);
125   for (i=0; i<nselected; i++) roots[i] = tmproots[i];
126   ierr   = PetscFree(tmproots);CHKERRQ(ierr);
127 
128   /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
129   ierr   = PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);CHKERRQ(ierr);
130 
131   /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
132   ndranks = 0;
133   for (i=0; i<nleaves; i++) {
134     if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;}
135   }
136   ierr = PetscSortMPIInt(nleaves,leaves);CHKERRQ(ierr);
137   if (nleaves && leaves[0] < 0) leaves[0] = rank;
138 
139   /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
140   ierr = PetscMalloc1(nleaves,&ilocal);CHKERRQ(ierr);
141   ierr = PetscMalloc1(nleaves,&iremote);CHKERRQ(ierr);
142   for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */
143     ilocal[i]        = leaves[i];
144     iremote[i].rank  = leaves[i];
145     iremote[i].index = leaves[i];
146   }
147   ierr = PetscSFCreate(comm,&esf);CHKERRQ(ierr);
148   ierr = PetscSFSetType(esf,PETSCSFBASIC);CHKERRQ(ierr); /* This optimized routine can only create a basic sf */
149   ierr = PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
150 
151   /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
152   ierr = PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);CHKERRQ(ierr);
153   esf->nranks     = nleaves;
154   esf->ndranks    = ndranks;
155   esf->roffset[0] = 0;
156   for (i=0; i<nleaves; i++) {
157     esf->ranks[i]     = leaves[i];
158     esf->roffset[i+1] = i+1;
159     esf->rmine[i]     = leaves[i];
160     esf->rremote[i]   = leaves[i];
161   }
162 
163   /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
164   bas  = (PetscSF_Basic*)esf->data;
165   ierr = PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);CHKERRQ(ierr);
166   ierr = PetscMalloc1(nroots,&bas->irootloc);CHKERRQ(ierr);
167   /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
168   ndiranks = 0;
169   for (i=0; i<nroots; i++) {
170     if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;}
171   }
172   ierr = PetscSortMPIInt(nroots,roots);CHKERRQ(ierr);
173   if (nroots && roots[0] < 0) roots[0] = rank;
174 
175   bas->niranks    = nroots;
176   bas->ndiranks   = ndiranks;
177   bas->ioffset[0] = 0;
178   bas->itotal     = nroots;
179   for (i=0; i<nroots; i++) {
180     bas->iranks[i]    = roots[i];
181     bas->ioffset[i+1] = i+1;
182     bas->irootloc[i]  = roots[i];
183   }
184 
185   *newsf = esf;
186   PetscFunctionReturn(0);
187 }
188 
189 PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
190 {
191   PetscErrorCode   ierr;
192   PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data;
193 
194   PetscFunctionBegin;
195   /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
196   sf->ops->Destroy         = PetscSFDestroy_Allgatherv;
197   sf->ops->Reset           = PetscSFReset_Allgatherv;
198   sf->ops->BcastAndOpEnd   = PetscSFBcastAndOpEnd_Allgatherv;
199   sf->ops->ReduceEnd       = PetscSFReduceEnd_Allgatherv;
200   sf->ops->FetchAndOpEnd   = PetscSFFetchAndOpEnd_Allgatherv;
201   sf->ops->GetRootRanks    = PetscSFGetRootRanks_Allgatherv;
202 
203   /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
204   sf->ops->GetLeafRanks    = PetscSFGetLeafRanks_Allgatherv;
205 
206   /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
207   sf->ops->FetchAndOpBegin  = PetscSFFetchAndOpBegin_Gatherv;
208 
209   /* Alltoall stuff */
210   sf->ops->GetGraph         = PetscSFGetGraph_Alltoall;
211   sf->ops->BcastAndOpBegin  = PetscSFBcastAndOpBegin_Alltoall;
212   sf->ops->ReduceBegin      = PetscSFReduceBegin_Alltoall;
213   sf->ops->CreateLocalSF    = PetscSFCreateLocalSF_Alltoall;
214   sf->ops->CreateEmbeddedSF = PetscSFCreateEmbeddedSF_Alltoall;
215 
216   ierr = PetscNewLog(sf,&dat);CHKERRQ(ierr);
217   sf->data = (void*)dat;
218   PetscFunctionReturn(0);
219 }
220 
221 
222