xref: /petsc/src/vec/is/sf/impls/basic/sfbasic.h (revision b23bfdefca792e3d9f47034521b8d4c437693123)
1 #if !defined(__SFBASIC_H)
2 #define __SFBASIC_H
3 
4 #include <../src/vec/is/sf/impls/basic/sfpack.h>
5 
6 typedef enum {PETSCSF_LEAF2ROOT_REDUCE=0, PETSCSF_ROOT2LEAF_BCAST=1} PetscSFDirection;
7 
8 typedef struct _n_PetscSFPack_Basic *PetscSFPack_Basic;
9 
10 
11 /* Why do we want to double MPI requests?
12    Note each PetscSFPack link supports either leaf2root or root2leaf communication, but not simultaneously both.
13    We use persistent MPI requests in SFBasic. By doubling the requests, the communications in both direction can
14    shared rootbuf and leafbuf. SFNeighbor etc do not need this since MPI does not support persistent requests for
15    collectives yet. But once MPI adds this feature, SFNeighbor etc can also benefit from this design.
16  */
17 #define SPPACKBASICHEADER \
18   SFPACKHEADER;                                                                                                                    \
19   PetscMPIInt   half;           /* Number of MPI_Requests used for either leaf2root or root2leaf communication */                  \
20   MPI_Request   *requests       /* [2*half] requests arranged in this order: leaf2root root/leaf reqs, root2leaf root/leaf reqs */
21 
22 struct _n_PetscSFPack_Basic {
23   SPPACKBASICHEADER;
24   PetscBool     initialized[2]; /* Is the communcation pattern in each direction initialized? [0] for leaf2root, [1] for root2leaf */
25 };
26 
27 #define SFBASICHEADER \
28   PetscMPIInt      niranks;         /* Number of incoming ranks (ranks accessing my roots) */                                      \
29   PetscMPIInt      ndiranks;        /* Number of incoming ranks (ranks accessing my roots) in distinguished set */                 \
30   PetscMPIInt      *iranks;         /* Array of ranks that reference my roots */                                                   \
31   PetscInt         itotal;          /* Total number of graph edges referencing my roots */                                         \
32   PetscInt         *ioffset;        /* Array of length niranks+1 holding offset in irootloc[] for each rank */                     \
33   PetscInt         *irootloc;       /* Incoming roots referenced by ranks starting at ioffset[rank] */                             \
34   PetscSFPackOpt   rootpackopt;     /* Optimization plans to (un)pack roots based on patterns in irootloc[]. NULL for no plans */  \
35   PetscSFPackOpt   selfrootpackopt; /* Optimization plans to (un)pack roots connected to local leaves */                           \
36   PetscSFPack      avail;           /* One or more entries per MPI Datatype, lazily constructed */                                 \
37   PetscSFPack      inuse            /* Buffers being used for transactions that have not yet completed */
38 
39 typedef struct {
40   SFBASICHEADER;
41 } PetscSF_Basic;
42 
43 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetRootInfo_Basic(PetscSF sf,PetscInt *nrootranks,PetscInt *ndrootranks,const PetscMPIInt **rootranks,const PetscInt **rootoffset,const PetscInt **rootloc)
44 {
45   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
46 
47   PetscFunctionBegin;
48   if (nrootranks)  *nrootranks  = bas->niranks;
49   if (ndrootranks) *ndrootranks = bas->ndiranks;
50   if (rootranks)   *rootranks   = bas->iranks;
51   if (rootoffset)  *rootoffset  = bas->ioffset;
52   if (rootloc)     *rootloc     = bas->irootloc;
53   PetscFunctionReturn(0);
54 }
55 
56 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetLeafInfo_Basic(PetscSF sf,PetscInt *nleafranks,PetscInt *ndleafranks,const PetscMPIInt **leafranks,const PetscInt **leafoffset,const PetscInt **leafloc,const PetscInt **leafrremote)
57 {
58   PetscFunctionBegin;
59   if (nleafranks)  *nleafranks  = sf->nranks;
60   if (ndleafranks) *ndleafranks = sf->ndranks;
61   if (leafranks)   *leafranks   = sf->ranks;
62   if (leafoffset)  *leafoffset  = sf->roffset;
63   if (leafloc)     *leafloc     = sf->rmine;
64   if (leafrremote) *leafrremote = sf->rremote;
65   PetscFunctionReturn(0);
66 }
67 
68 /* Get root locations either on Host (CPU) or Device (GPU) */
69 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetRootIndicesAtPlace_Basic(PetscSF sf,PetscBool isdevice, const PetscInt **rootloc)
70 {
71   PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
72   PetscFunctionBegin;
73   if (rootloc)     *rootloc     = bas->irootloc;
74   PetscFunctionReturn(0);
75 }
76 
77 /* Get leaf locations either on Host (CPU) or Device (GPU) */
78 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetLeafIndicesAtPlace_Basic(PetscSF sf,PetscBool isdevice, const PetscInt **leafloc)
79 {
80   PetscFunctionBegin;
81   if (leafloc)     *leafloc     = sf->rmine;
82   PetscFunctionReturn(0);
83 }
84 
85 typedef struct {
86   PetscInt       count;  /* Number of entries to pack, unpack etc. */
87   PetscInt       offset; /* Offset of the first entry */
88   PetscSFPackOpt opt;    /* Pack optimizations */
89   char           *buf;   /* The contiguous buffer where we pack to or unpack from */
90 } PackInfo;
91 
92 /* Utility routine to pack selected entries of rootdata into root buffer */
93 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackRootData(PetscSF sf,PetscSFPack link,PetscInt nrootranks,PetscInt ndrootranks,const PetscInt *rootoffset,const PetscInt *rootloc,const void *rootdata)
94 {
95   PetscErrorCode ierr;
96   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
97   PetscInt       i;
98   PackInfo       pinfo[2] = {{rootoffset[ndrootranks], 0, bas->selfrootpackopt, link->selfbuf}, {rootoffset[nrootranks]-rootoffset[ndrootranks], rootoffset[ndrootranks], bas->rootpackopt, link->rootbuf}};
99 
100   PetscFunctionBegin;
101   /* Only do packing when count != 0 so that we can avoid invoking CUDA kernels on GPU. */
102   for (i=0; i<2; i++) {if (pinfo[i].count) {ierr = (*link->Pack)(pinfo[i].count,rootloc+pinfo[i].offset,link->bs,pinfo[i].opt,rootdata,pinfo[i].buf);CHKERRQ(ierr);}}
103   PetscFunctionReturn(0);
104 }
105 
106 /* Utility routine to pack selected entries of leafdata into leaf buffer */
107 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackLeafData(PetscSF sf,PetscSFPack link,PetscInt nleafranks,PetscInt ndleafranks,const PetscInt *leafoffset,const PetscInt *leafloc,const void *leafdata)
108 {
109   PetscErrorCode ierr;
110   PetscInt       i;
111   PackInfo       pinfo[2] = {{leafoffset[ndleafranks], 0, sf->selfleafpackopt, link->selfbuf}, {leafoffset[nleafranks]-leafoffset[ndleafranks], leafoffset[ndleafranks], sf->leafpackopt, link->leafbuf}};
112 
113   PetscFunctionBegin;
114   for (i=0; i<2; i++) {if (pinfo[i].count) {ierr = (*link->Pack)(pinfo[i].count,leafloc+pinfo[i].offset,link->bs,pinfo[i].opt,leafdata,pinfo[i].buf);CHKERRQ(ierr);}}
115   PetscFunctionReturn(0);
116 }
117 
118 /* Utility routine to unpack data from root buffer and Op it into selected entries of rootdata */
119 PETSC_STATIC_INLINE PetscErrorCode PetscSFUnpackAndOpRootData(PetscSF sf,PetscSFPack link,PetscInt nrootranks,PetscInt ndrootranks,const PetscInt *rootoffset,const PetscInt *rootloc,void *rootdata,MPI_Op op)
120 {
121   PetscErrorCode ierr;
122   PetscInt       i,j;
123   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
124   PetscErrorCode (*UnpackAndOp)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
125   PackInfo       pinfo[2] = {{rootoffset[ndrootranks], 0, bas->selfrootpackopt, link->selfbuf}, {rootoffset[nrootranks]-rootoffset[ndrootranks], rootoffset[ndrootranks], bas->rootpackopt, link->rootbuf}};
126 
127   PetscFunctionBegin;
128   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
129   for (i=0; i<2; i++) {
130     if (UnpackAndOp && pinfo[i].count) {ierr = (*UnpackAndOp)(pinfo[i].count,rootloc+pinfo[i].offset,link->bs,pinfo[i].opt,rootdata,pinfo[i].buf);CHKERRQ(ierr);}
131     else {for (j=0; j<pinfo[i].count; j++) {ierr = MPI_Reduce_local(pinfo[i].buf+j*link->unitbytes,(char *)rootdata+(rootloc[pinfo[i].offset+j])*link->unitbytes,1,link->unit,op);CHKERRQ(ierr);}}
132   }
133   PetscFunctionReturn(0);
134 }
135 
136 /* Utility routine to unpack data from leaf buffer and Op it into selected entries of leafdata */
137 PETSC_STATIC_INLINE PetscErrorCode PetscSFUnpackAndOpLeafData(PetscSF sf,PetscSFPack link,PetscInt nleafranks,PetscInt ndleafranks,const PetscInt *leafoffset,const PetscInt *leafloc, void *leafdata,MPI_Op op)
138 {
139   PetscErrorCode ierr;
140   PetscInt       i,j;
141   PetscErrorCode (*UnpackAndOp)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);
142   PackInfo       pinfo[2] = {{leafoffset[ndleafranks], 0, sf->selfleafpackopt, link->selfbuf}, {leafoffset[nleafranks]-leafoffset[ndleafranks], leafoffset[ndleafranks], sf->leafpackopt, link->leafbuf}};
143 
144   PetscFunctionBegin;
145   ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr);
146   for (i=0; i<2; i++) {
147     if (UnpackAndOp && pinfo[i].count) {ierr = (*UnpackAndOp)(pinfo[i].count,leafloc+pinfo[i].offset,link->bs,pinfo[i].opt,leafdata,pinfo[i].buf);CHKERRQ(ierr);}
148     else {for (j=0; j<pinfo[i].count; j++) {ierr = MPI_Reduce_local(pinfo[i].buf+j*link->unitbytes,(char *)leafdata+(leafloc[pinfo[i].offset+j])*link->unitbytes,1,link->unit,op);CHKERRQ(ierr);}}
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 /* Utility routine to fetch and Op selected entries of rootdata */
154 PETSC_STATIC_INLINE PetscErrorCode PetscSFFetchAndOpRootData(PetscSF sf,PetscSFPack link,PetscInt nrootranks,PetscInt ndrootranks,const PetscInt *rootoffset,const PetscInt *rootloc,void *rootdata,MPI_Op op)
155 {
156   PetscErrorCode ierr;
157   PetscInt       i;
158   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
159   PetscErrorCode (*FetchAndOp)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);
160   PackInfo       pinfo[2] = {{rootoffset[ndrootranks], 0, bas->selfrootpackopt, link->selfbuf}, {rootoffset[nrootranks]-rootoffset[ndrootranks], rootoffset[ndrootranks], bas->rootpackopt, link->rootbuf}};
161 
162   PetscFunctionBegin;
163   ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr);
164   for (i=0; i<2; i++) {if (pinfo[i].count) {ierr = (*FetchAndOp)(pinfo[i].count,rootloc+pinfo[i].offset,link->bs,pinfo[i].opt,rootdata,pinfo[i].buf);CHKERRQ(ierr);}}
165   PetscFunctionReturn(0);
166 }
167 
168 
169 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackSetupOptimization_Basic(PetscSF sf)
170 {
171   PetscErrorCode ierr;
172   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
173 
174   PetscFunctionBegin;
175   ierr = PetscSFPackSetupOptimization(sf->ndranks,               sf->roffset,               sf->rmine,    &sf->selfleafpackopt);CHKERRQ(ierr);
176   ierr = PetscSFPackSetupOptimization(sf->nranks-sf->ndranks,    sf->roffset+sf->ndranks,   sf->rmine,    &sf->leafpackopt);CHKERRQ(ierr);
177   ierr = PetscSFPackSetupOptimization(bas->ndiranks,             bas->ioffset,              bas->irootloc,&bas->selfrootpackopt);CHKERRQ(ierr);
178   ierr = PetscSFPackSetupOptimization(bas->niranks-bas->ndiranks,bas->ioffset+bas->ndiranks,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr);
179   PetscFunctionReturn(0);
180 }
181 
182 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackDestroyOptimization_Basic(PetscSF sf)
183 {
184   PetscErrorCode ierr;
185   PetscSF_Basic  *bas = (PetscSF_Basic*)sf->data;
186 
187   PetscFunctionBegin;
188   ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr);
189   ierr = PetscSFPackDestoryOptimization(&sf->selfleafpackopt);CHKERRQ(ierr);
190   ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr);
191   ierr = PetscSFPackDestoryOptimization(&bas->selfrootpackopt);CHKERRQ(ierr);
192   PetscFunctionReturn(0);
193 }
194 
195 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackWaitall_Basic(PetscSFPack_Basic link,PetscSFDirection direction)
196 {
197   PetscErrorCode ierr;
198   MPI_Request    *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE) ? link->requests : link->requests  + link->half;
199 
200   PetscFunctionBegin;
201   ierr = MPI_Waitall(link->half,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
202   PetscFunctionReturn(0);
203 }
204 
205 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF);
206 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF,PetscViewer);
207 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF);
208 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF);
209 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF,MPI_Datatype,const void*,void*,MPI_Op);
210 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF,MPI_Datatype,const void*,void*,MPI_Op);
211 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF,MPI_Datatype,void*,const void*,void*,MPI_Op);
212 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF,PetscInt,const PetscInt*,PetscSF*);
213 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF,PetscInt,const PetscInt*,PetscSF*);
214 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**);
215 PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF,MPI_Datatype,const void*,const void*,PetscInt,PetscSFPack_Basic*);
216 #endif
217