1 #if !defined(__SFBASIC_H) 2 #define __SFBASIC_H 3 4 #include <../src/vec/is/sf/impls/basic/sfpack.h> 5 6 typedef enum {PETSCSF_LEAF2ROOT_REDUCE=0, PETSCSF_ROOT2LEAF_BCAST=1} PetscSFDirection; 7 8 typedef struct _n_PetscSFPack_Basic *PetscSFPack_Basic; 9 10 11 /* Why do we want to double MPI requests? 12 Note each PetscSFPack link supports either leaf2root or root2leaf communication, but not simultaneously both. 13 We use persistent MPI requests in SFBasic. By doubling the requests, the communications in both direction can 14 shared rootbuf and leafbuf. SFNeighbor etc do not need this since MPI does not support persistent requests for 15 collectives yet. But once MPI adds this feature, SFNeighbor etc can also benefit from this design. 16 */ 17 #define SPPACKBASICHEADER \ 18 SFPACKHEADER; \ 19 PetscMPIInt half; /* Number of MPI_Requests used for either leaf2root or root2leaf communication */ \ 20 MPI_Request *requests /* [2*half] requests arranged in this order: leaf2root root/leaf reqs, root2leaf root/leaf reqs */ 21 22 struct _n_PetscSFPack_Basic { 23 SPPACKBASICHEADER; 24 PetscBool initialized[2]; /* Is the communcation pattern in each direction initialized? [0] for leaf2root, [1] for root2leaf */ 25 }; 26 27 #define SFBASICHEADER \ 28 PetscMPIInt niranks; /* Number of incoming ranks (ranks accessing my roots) */ \ 29 PetscMPIInt ndiranks; /* Number of incoming ranks (ranks accessing my roots) in distinguished set */ \ 30 PetscMPIInt *iranks; /* Array of ranks that reference my roots */ \ 31 PetscInt itotal; /* Total number of graph edges referencing my roots */ \ 32 PetscInt *ioffset; /* Array of length niranks+1 holding offset in irootloc[] for each rank */ \ 33 PetscInt *irootloc; /* Incoming roots referenced by ranks starting at ioffset[rank] */ \ 34 PetscSFPackOpt rootpackopt; /* Optimization plans to (un)pack roots based on patterns in irootloc[]. NULL for no plans */ \ 35 PetscSFPackOpt selfrootpackopt; /* Optimization plans to (un)pack roots connected to local leaves */ \ 36 PetscSFPack avail; /* One or more entries per MPI Datatype, lazily constructed */ \ 37 PetscSFPack inuse /* Buffers being used for transactions that have not yet completed */ 38 39 typedef struct { 40 SFBASICHEADER; 41 } PetscSF_Basic; 42 43 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetRootInfo_Basic(PetscSF sf,PetscInt *nrootranks,PetscInt *ndrootranks,const PetscMPIInt **rootranks,const PetscInt **rootoffset,const PetscInt **rootloc) 44 { 45 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 46 47 PetscFunctionBegin; 48 if (nrootranks) *nrootranks = bas->niranks; 49 if (ndrootranks) *ndrootranks = bas->ndiranks; 50 if (rootranks) *rootranks = bas->iranks; 51 if (rootoffset) *rootoffset = bas->ioffset; 52 if (rootloc) *rootloc = bas->irootloc; 53 PetscFunctionReturn(0); 54 } 55 56 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetLeafInfo_Basic(PetscSF sf,PetscInt *nleafranks,PetscInt *ndleafranks,const PetscMPIInt **leafranks,const PetscInt **leafoffset,const PetscInt **leafloc,const PetscInt **leafrremote) 57 { 58 PetscFunctionBegin; 59 if (nleafranks) *nleafranks = sf->nranks; 60 if (ndleafranks) *ndleafranks = sf->ndranks; 61 if (leafranks) *leafranks = sf->ranks; 62 if (leafoffset) *leafoffset = sf->roffset; 63 if (leafloc) *leafloc = sf->rmine; 64 if (leafrremote) *leafrremote = sf->rremote; 65 PetscFunctionReturn(0); 66 } 67 68 /* Get root locations either on Host (CPU) or Device (GPU) */ 69 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetRootIndicesAtPlace_Basic(PetscSF sf,PetscBool isdevice, const PetscInt **rootloc) 70 { 71 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 72 PetscFunctionBegin; 73 if (rootloc) *rootloc = bas->irootloc; 74 PetscFunctionReturn(0); 75 } 76 77 /* Get leaf locations either on Host (CPU) or Device (GPU) */ 78 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetLeafIndicesAtPlace_Basic(PetscSF sf,PetscBool isdevice, const PetscInt **leafloc) 79 { 80 PetscFunctionBegin; 81 if (leafloc) *leafloc = sf->rmine; 82 PetscFunctionReturn(0); 83 } 84 85 typedef struct { 86 PetscInt count; /* Number of entries to pack, unpack etc. */ 87 PetscInt offset; /* Offset of the first entry */ 88 PetscSFPackOpt opt; /* Pack optimizations */ 89 char *buf; /* The contiguous buffer where we pack to or unpack from */ 90 } PackInfo; 91 92 /* Utility routine to pack selected entries of rootdata into root buffer */ 93 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackRootData(PetscSF sf,PetscSFPack link,PetscInt nrootranks,PetscInt ndrootranks,const PetscInt *rootoffset,const PetscInt *rootloc,const void *rootdata) 94 { 95 PetscErrorCode ierr; 96 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 97 PetscInt i; 98 PackInfo pinfo[2] = {{rootoffset[ndrootranks], 0, bas->selfrootpackopt, link->selfbuf}, {rootoffset[nrootranks]-rootoffset[ndrootranks], rootoffset[ndrootranks], bas->rootpackopt, link->rootbuf}}; 99 100 PetscFunctionBegin; 101 /* Only do packing when count != 0 so that we can avoid invoking CUDA kernels on GPU. */ 102 for (i=0; i<2; i++) {if (pinfo[i].count) {ierr = (*link->Pack)(pinfo[i].count,rootloc+pinfo[i].offset,link->bs,pinfo[i].opt,rootdata,pinfo[i].buf);CHKERRQ(ierr);}} 103 PetscFunctionReturn(0); 104 } 105 106 /* Utility routine to pack selected entries of leafdata into leaf buffer */ 107 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackLeafData(PetscSF sf,PetscSFPack link,PetscInt nleafranks,PetscInt ndleafranks,const PetscInt *leafoffset,const PetscInt *leafloc,const void *leafdata) 108 { 109 PetscErrorCode ierr; 110 PetscInt i; 111 PackInfo pinfo[2] = {{leafoffset[ndleafranks], 0, sf->selfleafpackopt, link->selfbuf}, {leafoffset[nleafranks]-leafoffset[ndleafranks], leafoffset[ndleafranks], sf->leafpackopt, link->leafbuf}}; 112 113 PetscFunctionBegin; 114 for (i=0; i<2; i++) {if (pinfo[i].count) {ierr = (*link->Pack)(pinfo[i].count,leafloc+pinfo[i].offset,link->bs,pinfo[i].opt,leafdata,pinfo[i].buf);CHKERRQ(ierr);}} 115 PetscFunctionReturn(0); 116 } 117 118 /* Utility routine to unpack data from root buffer and Op it into selected entries of rootdata */ 119 PETSC_STATIC_INLINE PetscErrorCode PetscSFUnpackAndOpRootData(PetscSF sf,PetscSFPack link,PetscInt nrootranks,PetscInt ndrootranks,const PetscInt *rootoffset,const PetscInt *rootloc,void *rootdata,MPI_Op op) 120 { 121 PetscErrorCode ierr; 122 PetscInt i,j; 123 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 124 PetscErrorCode (*UnpackAndOp)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 125 PackInfo pinfo[2] = {{rootoffset[ndrootranks], 0, bas->selfrootpackopt, link->selfbuf}, {rootoffset[nrootranks]-rootoffset[ndrootranks], rootoffset[ndrootranks], bas->rootpackopt, link->rootbuf}}; 126 127 PetscFunctionBegin; 128 ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 129 for (i=0; i<2; i++) { 130 if (UnpackAndOp && pinfo[i].count) {ierr = (*UnpackAndOp)(pinfo[i].count,rootloc+pinfo[i].offset,link->bs,pinfo[i].opt,rootdata,pinfo[i].buf);CHKERRQ(ierr);} 131 else {for (j=0; j<pinfo[i].count; j++) {ierr = MPI_Reduce_local(pinfo[i].buf+j*link->unitbytes,(char *)rootdata+(rootloc[pinfo[i].offset+j])*link->unitbytes,1,link->unit,op);CHKERRQ(ierr);}} 132 } 133 PetscFunctionReturn(0); 134 } 135 136 /* Utility routine to unpack data from leaf buffer and Op it into selected entries of leafdata */ 137 PETSC_STATIC_INLINE PetscErrorCode PetscSFUnpackAndOpLeafData(PetscSF sf,PetscSFPack link,PetscInt nleafranks,PetscInt ndleafranks,const PetscInt *leafoffset,const PetscInt *leafloc, void *leafdata,MPI_Op op) 138 { 139 PetscErrorCode ierr; 140 PetscInt i,j; 141 PetscErrorCode (*UnpackAndOp)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); 142 PackInfo pinfo[2] = {{leafoffset[ndleafranks], 0, sf->selfleafpackopt, link->selfbuf}, {leafoffset[nleafranks]-leafoffset[ndleafranks], leafoffset[ndleafranks], sf->leafpackopt, link->leafbuf}}; 143 144 PetscFunctionBegin; 145 ierr = PetscSFPackGetUnpackAndOp(sf,(PetscSFPack)link,op,&UnpackAndOp);CHKERRQ(ierr); 146 for (i=0; i<2; i++) { 147 if (UnpackAndOp && pinfo[i].count) {ierr = (*UnpackAndOp)(pinfo[i].count,leafloc+pinfo[i].offset,link->bs,pinfo[i].opt,leafdata,pinfo[i].buf);CHKERRQ(ierr);} 148 else {for (j=0; j<pinfo[i].count; j++) {ierr = MPI_Reduce_local(pinfo[i].buf+j*link->unitbytes,(char *)leafdata+(leafloc[pinfo[i].offset+j])*link->unitbytes,1,link->unit,op);CHKERRQ(ierr);}} 149 } 150 PetscFunctionReturn(0); 151 } 152 153 /* Utility routine to fetch and Op selected entries of rootdata */ 154 PETSC_STATIC_INLINE PetscErrorCode PetscSFFetchAndOpRootData(PetscSF sf,PetscSFPack link,PetscInt nrootranks,PetscInt ndrootranks,const PetscInt *rootoffset,const PetscInt *rootloc,void *rootdata,MPI_Op op) 155 { 156 PetscErrorCode ierr; 157 PetscInt i; 158 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 159 PetscErrorCode (*FetchAndOp)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); 160 PackInfo pinfo[2] = {{rootoffset[ndrootranks], 0, bas->selfrootpackopt, link->selfbuf}, {rootoffset[nrootranks]-rootoffset[ndrootranks], rootoffset[ndrootranks], bas->rootpackopt, link->rootbuf}}; 161 162 PetscFunctionBegin; 163 ierr = PetscSFPackGetFetchAndOp(sf,(PetscSFPack)link,op,&FetchAndOp);CHKERRQ(ierr); 164 for (i=0; i<2; i++) {if (pinfo[i].count) {ierr = (*FetchAndOp)(pinfo[i].count,rootloc+pinfo[i].offset,link->bs,pinfo[i].opt,rootdata,pinfo[i].buf);CHKERRQ(ierr);}} 165 PetscFunctionReturn(0); 166 } 167 168 169 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackSetupOptimization_Basic(PetscSF sf) 170 { 171 PetscErrorCode ierr; 172 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 173 174 PetscFunctionBegin; 175 ierr = PetscSFPackSetupOptimization(sf->ndranks, sf->roffset, sf->rmine, &sf->selfleafpackopt);CHKERRQ(ierr); 176 ierr = PetscSFPackSetupOptimization(sf->nranks-sf->ndranks, sf->roffset+sf->ndranks, sf->rmine, &sf->leafpackopt);CHKERRQ(ierr); 177 ierr = PetscSFPackSetupOptimization(bas->ndiranks, bas->ioffset, bas->irootloc,&bas->selfrootpackopt);CHKERRQ(ierr); 178 ierr = PetscSFPackSetupOptimization(bas->niranks-bas->ndiranks,bas->ioffset+bas->ndiranks,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 179 PetscFunctionReturn(0); 180 } 181 182 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackDestroyOptimization_Basic(PetscSF sf) 183 { 184 PetscErrorCode ierr; 185 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 186 187 PetscFunctionBegin; 188 ierr = PetscSFPackDestoryOptimization(&sf->leafpackopt);CHKERRQ(ierr); 189 ierr = PetscSFPackDestoryOptimization(&sf->selfleafpackopt);CHKERRQ(ierr); 190 ierr = PetscSFPackDestoryOptimization(&bas->rootpackopt);CHKERRQ(ierr); 191 ierr = PetscSFPackDestoryOptimization(&bas->selfrootpackopt);CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 195 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackWaitall_Basic(PetscSFPack_Basic link,PetscSFDirection direction) 196 { 197 PetscErrorCode ierr; 198 MPI_Request *requests = (direction == PETSCSF_LEAF2ROOT_REDUCE) ? link->requests : link->requests + link->half; 199 200 PetscFunctionBegin; 201 ierr = MPI_Waitall(link->half,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF); 206 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF,PetscViewer); 207 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF); 208 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF); 209 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic(PetscSF,MPI_Datatype,const void*,void*,MPI_Op); 210 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic(PetscSF,MPI_Datatype,const void*,void*,MPI_Op); 211 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF,MPI_Datatype,void*,const void*,void*,MPI_Op); 212 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF,PetscInt,const PetscInt*,PetscSF*); 213 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF,PetscInt,const PetscInt*,PetscSF*); 214 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**); 215 PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF,MPI_Datatype,const void*,const void*,PetscInt,PetscSFPack_Basic*); 216 #endif 217