1 #if !defined(__SFBASIC_H) 2 #define __SFBASIC_H 3 4 #include <../src/vec/is/sf/impls/basic/sfpack.h> 5 6 typedef enum {PETSCSF_LEAF2ROOT_REDUCE=0, PETSCSF_ROOT2LEAF_BCAST=1} PetscSFDirection; 7 8 #define SFBASICHEADER \ 9 PetscMPIInt niranks; /* Number of incoming ranks (ranks accessing my roots) */ \ 10 PetscMPIInt ndiranks; /* Number of incoming ranks (ranks accessing my roots) in distinguished set */ \ 11 PetscMPIInt *iranks; /* Array of ranks that reference my roots */ \ 12 PetscInt itotal; /* Total number of graph edges referencing my roots */ \ 13 PetscInt *ioffset; /* Array of length niranks+1 holding offset in irootloc[] for each rank */ \ 14 PetscInt *irootloc; /* Incoming roots referenced by ranks starting at ioffset[rank] */ \ 15 PetscInt *irootloc_d; /* irootloc in device memory when needed */ \ 16 PetscSFPackOpt rootpackopt; /* Optimization plans to (un)pack roots based on patterns in irootloc[]. NULL for no plans */ \ 17 PetscSFPackOpt selfrootpackopt; /* Optimization plans to (un)pack roots connected to local leaves */ \ 18 PetscSFPack avail; /* One or more entries per MPI Datatype, lazily constructed */ \ 19 PetscSFPack inuse; /* Buffers being used for transactions that have not yet completed */ \ 20 PetscBool selfrootdups; /* Indices of roots in irootloc[0,ioffset[ndiranks]) have dups, implying theads working ... */ \ 21 /* ... on these roots in parallel may have data race. */ \ 22 PetscBool remoterootdups /* Indices of roots in irootloc[ioffset[ndiranks],ioffset[niranks]) have dups */ 23 24 typedef struct { 25 SFBASICHEADER; 26 } PetscSF_Basic; 27 28 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetRootInfo_Basic(PetscSF sf,PetscInt *nrootranks,PetscInt *ndrootranks,const PetscMPIInt **rootranks,const PetscInt **rootoffset,const PetscInt **rootloc) 29 { 30 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 31 32 PetscFunctionBegin; 33 if (nrootranks) *nrootranks = bas->niranks; 34 if (ndrootranks) *ndrootranks = bas->ndiranks; 35 if (rootranks) *rootranks = bas->iranks; 36 if (rootoffset) *rootoffset = bas->ioffset; 37 if (rootloc) *rootloc = bas->irootloc; 38 PetscFunctionReturn(0); 39 } 40 41 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetLeafInfo_Basic(PetscSF sf,PetscInt *nleafranks,PetscInt *ndleafranks,const PetscMPIInt **leafranks,const PetscInt **leafoffset,const PetscInt **leafloc,const PetscInt **leafrremote) 42 { 43 PetscFunctionBegin; 44 if (nleafranks) *nleafranks = sf->nranks; 45 if (ndleafranks) *ndleafranks = sf->ndranks; 46 if (leafranks) *leafranks = sf->ranks; 47 if (leafoffset) *leafoffset = sf->roffset; 48 if (leafloc) *leafloc = sf->rmine; 49 if (leafrremote) *leafrremote = sf->rremote; 50 PetscFunctionReturn(0); 51 } 52 53 /* Get root locations either on Host or Device */ 54 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetRootIndicesWithMemType_Basic(PetscSF sf,PetscMemType mtype, const PetscInt **rootloc) 55 { 56 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 57 PetscFunctionBegin; 58 if (rootloc) { 59 if (mtype == PETSC_MEMTYPE_HOST) *rootloc = bas->irootloc; 60 #if defined(PETSC_HAVE_CUDA) 61 else if (mtype == PETSC_MEMTYPE_DEVICE) { 62 if (!bas->irootloc_d) { 63 cudaError_t err; 64 size_t size = bas->ioffset[bas->niranks]*sizeof(PetscInt); 65 err = cudaMalloc((void **)&bas->irootloc_d,size);CHKERRCUDA(err); 66 err = cudaMemcpy(bas->irootloc_d,bas->irootloc,size,cudaMemcpyHostToDevice);CHKERRCUDA(err); 67 } 68 *rootloc = bas->irootloc_d; 69 } 70 #endif 71 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType in getting rootloc %d",(int)mtype); 72 } 73 PetscFunctionReturn(0); 74 } 75 76 /* Get leaf locations either on Host (CPU) or Device (GPU) */ 77 PETSC_STATIC_INLINE PetscErrorCode PetscSFGetLeafIndicesWithMemType_Basic(PetscSF sf,PetscMemType mtype, const PetscInt **leafloc) 78 { 79 PetscFunctionBegin; 80 if (leafloc) { 81 if (mtype == PETSC_MEMTYPE_HOST) *leafloc = sf->rmine; 82 #if defined(PETSC_HAVE_CUDA) 83 else if (mtype == PETSC_MEMTYPE_DEVICE) { 84 if (!sf->rmine_d) { 85 cudaError_t err; 86 size_t size = sf->roffset[sf->nranks]*sizeof(PetscInt); 87 err = cudaMalloc((void **)&sf->rmine_d,size);CHKERRCUDA(err); 88 err = cudaMemcpy(sf->rmine_d,sf->rmine,size,cudaMemcpyHostToDevice);CHKERRCUDA(err); 89 } 90 *leafloc = sf->rmine_d; 91 } 92 #endif 93 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType in getting leafloc %d",(int)mtype); 94 } 95 PetscFunctionReturn(0); 96 } 97 98 /* A convenience struct to provide info to the following (un)packing routines so that we can treat packings to self and to remote in one loop. */ 99 typedef struct _n_PackInfo { 100 PetscInt count; /* Number of entries to pack, unpack etc. */ 101 const PetscInt *idx; /* Indices of the entries. NULL for contiguous indices [0,count-1) */ 102 PetscSFPackOpt opt; /* Pack optimizations */ 103 char *buf; /* The contiguous buffer where we pack to or unpack from */ 104 PetscBool atomic; /* Whether the unpack routine needs to use atomics */ 105 } PackInfo; 106 107 /* Utility routine to pack selected entries of rootdata into root buffer. 108 Input Arguments: 109 + sf - The SF this packing works on. 110 . link - The PetscSFPack, which gives the memtype of the roots and also provides root buffer. 111 . rootloc - Indices of the roots, only meaningful if the root space is sparse 112 . rootdata - Where to read the roots. 113 - sparse - Is the root space sparse (for SFBasic, SFNeighbor) or dense (for SFAllgatherv etc) 114 */ 115 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackRootData(PetscSF sf,PetscSFPack link,const PetscInt *rootloc,const void *rootdata,PetscBool sparse) 116 { 117 PetscErrorCode ierr; 118 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 119 PetscInt i; 120 PetscErrorCode (*Pack)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*); 121 PackInfo p[2]; 122 123 PetscFunctionBegin; 124 if (sparse) { 125 /* For SFBasic and SFNeighbor, whose root space is sparse and have separate buffers for self and remote. */ 126 p[0].count = bas->ioffset[bas->ndiranks]; p[1].count = bas->ioffset[bas->niranks]-bas->ioffset[bas->ndiranks]; 127 p[0].idx = rootloc; p[1].idx = rootloc+bas->ioffset[bas->ndiranks]; 128 p[0].opt = bas->selfrootpackopt; p[1].opt = bas->rootpackopt; 129 p[0].buf = link->selfbuf[link->rootmtype]; p[1].buf = link->rootbuf[link->rootmtype]; 130 p[0].atomic = PETSC_FALSE; p[1].atomic = PETSC_FALSE; 131 } else { 132 /* For SFAllgatherv etc collectives, which have a dense root space and do not differentiate self/remote communication. */ 133 p[0].count = sf->nroots; 134 p[0].idx = NULL; 135 p[0].opt = NULL; 136 p[0].buf = link->rootbuf[link->rootmtype]; 137 p[0].atomic = PETSC_FALSE; 138 p[1].count = 0; 139 } 140 141 ierr = PetscSFPackGetPack(link,link->rootmtype,&Pack);CHKERRQ(ierr); 142 /* Only do packing when count != 0 so that we can avoid invoking CUDA kernels on GPU. */ 143 for (i=0; i<2; i++) {if (p[i].count) {ierr = (*Pack)(p[i].count,p[i].idx,link,p[i].opt,rootdata,p[i].buf);CHKERRQ(ierr);}} 144 #if defined(PETSC_HAVE_CUDA) 145 /* Let's assume MPI does not do cudaDeviceSynchronize() at entrance, which I do not know. Then we have to sync the packing kernel to make rootbuf & selfbuf ready for MPI & self to self copy */ 146 if (link->rootmtype == PETSC_MEMTYPE_DEVICE && (p[0].count || p[1].count)) {cudaError_t err = cudaStreamSynchronize(link->stream);CHKERRCUDA(err);} 147 #endif 148 PetscFunctionReturn(0); 149 } 150 151 /* Utility routine to pack selected entries of leafdata into leaf buffer */ 152 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackLeafData(PetscSF sf,PetscSFPack link,const PetscInt *leafloc,const void *leafdata,PetscBool sparse) 153 { 154 PetscErrorCode ierr; 155 PetscInt i; 156 PetscErrorCode (*Pack)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*); 157 PackInfo p[2]; 158 159 PetscFunctionBegin; 160 if (sparse) { 161 p[0].count = sf->roffset[sf->ndranks]; p[1].count = sf->roffset[sf->nranks]-sf->roffset[sf->ndranks]; 162 p[0].idx = leafloc; p[1].idx = leafloc+sf->roffset[sf->ndranks]; 163 p[0].opt = sf->selfleafpackopt; p[1].opt = sf->leafpackopt; 164 p[0].buf = link->selfbuf[link->leafmtype]; p[1].buf = link->leafbuf[link->leafmtype]; 165 p[0].atomic = PETSC_FALSE; p[1].atomic = PETSC_FALSE; 166 } else { 167 p[0].count = sf->nleaves; 168 p[0].idx = NULL; 169 p[0].opt = NULL; 170 p[0].buf = link->leafbuf[link->leafmtype]; 171 p[0].atomic = PETSC_FALSE; 172 p[1].count = 0; 173 } 174 175 ierr = PetscSFPackGetPack(link,link->leafmtype,&Pack);CHKERRQ(ierr); 176 for (i=0; i<2; i++) {if (p[i].count) {ierr = (*Pack)(p[i].count,p[i].idx,link,p[i].opt,leafdata,p[i].buf);CHKERRQ(ierr);}} 177 #if defined(PETSC_HAVE_CUDA) 178 if (link->leafmtype == PETSC_MEMTYPE_DEVICE && (p[0].count || p[1].count)) {cudaError_t err = cudaStreamSynchronize(link->stream);CHKERRCUDA(err);} 179 #endif 180 PetscFunctionReturn(0); 181 } 182 183 /* Utility routine to unpack data from root buffer and Op it into selected entries of rootdata */ 184 PETSC_STATIC_INLINE PetscErrorCode PetscSFUnpackAndOpRootData(PetscSF sf,PetscSFPack link,const PetscInt *rootloc,void *rootdata,MPI_Op op,PetscBool sparse) 185 { 186 PetscErrorCode ierr; 187 PetscInt i; 188 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 189 PetscErrorCode (*UnpackAndOp)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 190 PackInfo p[2]; 191 192 PetscFunctionBegin; 193 if (sparse) { 194 p[0].count = bas->ioffset[bas->ndiranks]; p[1].count = bas->ioffset[bas->niranks]-bas->ioffset[bas->ndiranks]; 195 p[0].idx = rootloc; p[1].idx = rootloc+bas->ioffset[bas->ndiranks]; 196 p[0].opt = bas->selfrootpackopt; p[1].opt = bas->rootpackopt; 197 p[0].buf = link->selfbuf[link->rootmtype]; p[1].buf = link->rootbuf[link->rootmtype]; 198 p[0].atomic = bas->selfrootdups; p[1].atomic = bas->remoterootdups; 199 } else { 200 p[0].count = sf->nroots; 201 p[0].idx = NULL; 202 p[0].opt = NULL; 203 p[0].buf = link->rootbuf[link->rootmtype]; 204 p[0].atomic = PETSC_FALSE; 205 p[1].count = 0; 206 } 207 208 for (i=0; i<2; i++) { 209 if (p[i].count) { 210 ierr = PetscSFPackGetUnpackAndOp(link,link->rootmtype,op,p[i].atomic,&UnpackAndOp);CHKERRQ(ierr); 211 if (UnpackAndOp) {ierr = (*UnpackAndOp)(p[i].count,p[i].idx,link,p[i].opt,rootdata,p[i].buf);CHKERRQ(ierr);} 212 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 213 else { 214 PetscInt j; 215 PetscMPIInt n; 216 if (p[i].idx) { 217 /* Note if done on GPU, this can be very slow due to the huge number of kernel calls. The op is likely user-defined. We must 218 use link->unit (instead of link->basicunit) as the datatype and 1 (instead of link->bs) as the count in MPI_Reduce_local. 219 */ 220 for (j=0; j<p[i].count; j++) {ierr = MPI_Reduce_local(p[i].buf+j*link->unitbytes,(char *)rootdata+p[i].idx[j]*link->unitbytes,1,link->unit,op);CHKERRQ(ierr);} 221 } else { 222 ierr = PetscMPIIntCast(p[i].count,&n);CHKERRQ(ierr); 223 ierr = MPI_Reduce_local(p[i].buf,rootdata,n,link->unit,op);CHKERRQ(ierr); 224 } 225 } 226 #else 227 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 228 #endif 229 } 230 } 231 #if defined(PETSC_HAVE_CUDA) 232 if (link->rootmtype == PETSC_MEMTYPE_DEVICE && (p[0].count || p[1].count)) {cudaError_t err = cudaStreamSynchronize(link->stream);CHKERRCUDA(err);} 233 #endif 234 PetscFunctionReturn(0); 235 } 236 237 /* Utility routine to unpack data from leaf buffer and Op it into selected entries of leafdata */ 238 PETSC_STATIC_INLINE PetscErrorCode PetscSFUnpackAndOpLeafData(PetscSF sf,PetscSFPack link,const PetscInt *leafloc,void *leafdata,MPI_Op op,PetscBool sparse) 239 { 240 PetscErrorCode ierr; 241 PetscInt i; 242 PetscErrorCode (*UnpackAndOp)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 243 PackInfo p[2]; 244 245 PetscFunctionBegin; 246 if (sparse) { 247 p[0].count = sf->roffset[sf->ndranks]; p[1].count = sf->roffset[sf->nranks]-sf->roffset[sf->ndranks]; 248 p[0].idx = leafloc; p[1].idx = leafloc+sf->roffset[sf->ndranks]; 249 p[0].opt = sf->selfleafpackopt; p[1].opt = sf->leafpackopt; 250 p[0].buf = link->selfbuf[link->leafmtype]; p[1].buf = link->leafbuf[link->leafmtype]; 251 p[0].atomic = sf->selfleafdups; p[1].atomic = sf->remoteleafdups; 252 } else { 253 p[0].count = sf->nleaves; 254 p[0].idx = NULL; 255 p[0].opt = NULL; 256 p[0].buf = link->leafbuf[link->leafmtype]; 257 p[0].atomic = PETSC_FALSE; 258 p[1].count = 0; 259 } 260 261 for (i=0; i<2; i++) { 262 if (p[i].count) { 263 ierr = PetscSFPackGetUnpackAndOp(link,link->leafmtype,op,p[i].atomic,&UnpackAndOp);CHKERRQ(ierr); 264 if (UnpackAndOp) {ierr = (*UnpackAndOp)(p[i].count,p[i].idx,link,p[i].opt,leafdata,p[i].buf);CHKERRQ(ierr);} 265 #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL) 266 else { 267 PetscInt j; 268 PetscMPIInt n; 269 if (p[i].idx) { 270 for (j=0; j<p[i].count; j++) {ierr = MPI_Reduce_local(p[i].buf+j*link->unitbytes,(char *)leafdata+p[i].idx[j]*link->unitbytes,1,link->unit,op);CHKERRQ(ierr);} 271 } else { 272 ierr = PetscMPIIntCast(p[i].count,&n);CHKERRQ(ierr); 273 ierr = MPI_Reduce_local(p[i].buf,leafdata,n,link->unit,op);CHKERRQ(ierr); 274 } 275 } 276 #else 277 else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op"); 278 #endif 279 } 280 } 281 #if defined(PETSC_HAVE_CUDA) 282 if (link->leafmtype == PETSC_MEMTYPE_DEVICE && (p[0].count || p[1].count)) {cudaError_t err = cudaStreamSynchronize(link->stream);CHKERRCUDA(err);} 283 #endif 284 PetscFunctionReturn(0); 285 } 286 287 /* Utility routine to fetch and Op selected entries of rootdata */ 288 PETSC_STATIC_INLINE PetscErrorCode PetscSFFetchAndOpRootData(PetscSF sf,PetscSFPack link,const PetscInt *rootloc,void *rootdata,MPI_Op op,PetscBool sparse) 289 { 290 PetscErrorCode ierr; 291 PetscInt i; 292 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 293 PetscErrorCode (*FetchAndOp)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,void*); 294 PackInfo p[2]; 295 296 PetscFunctionBegin; 297 if (sparse) { 298 /* For SFBasic and SFNeighbor, whose root space is sparse and have separate buffers for self and remote. */ 299 p[0].count = bas->ioffset[bas->ndiranks]; p[1].count = bas->ioffset[bas->niranks]-bas->ioffset[bas->ndiranks]; 300 p[0].idx = rootloc; p[1].idx = rootloc+bas->ioffset[bas->ndiranks]; 301 p[0].opt = bas->selfrootpackopt; p[1].opt = bas->rootpackopt; 302 p[0].buf = link->selfbuf[link->rootmtype]; p[1].buf = link->rootbuf[link->rootmtype]; 303 p[0].atomic = bas->selfrootdups; p[1].atomic = bas->remoterootdups; 304 } else { 305 p[0].count = sf->nroots; 306 p[0].idx = NULL; 307 p[0].opt = NULL; 308 p[0].buf = link->rootbuf[link->rootmtype]; 309 p[0].atomic = PETSC_FALSE; 310 p[1].count = 0; 311 } 312 313 for (i=0; i<2; i++) { 314 if (p[i].count) { 315 ierr = PetscSFPackGetFetchAndOp(link,link->rootmtype,op,p[i].atomic,&FetchAndOp);CHKERRQ(ierr); 316 ierr = (*FetchAndOp)(p[i].count,p[i].idx,link,p[i].opt,rootdata,p[i].buf);CHKERRQ(ierr); 317 } 318 } 319 #if defined(PETSC_HAVE_CUDA) 320 if (link->rootmtype == PETSC_MEMTYPE_DEVICE && (p[0].count || p[1].count)) {cudaError_t err = cudaStreamSynchronize(link->stream);CHKERRCUDA(err);} 321 #endif 322 PetscFunctionReturn(0); 323 } 324 325 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackSetupOptimizations_Basic(PetscSF sf) 326 { 327 PetscErrorCode ierr; 328 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 329 330 PetscFunctionBegin; 331 ierr = PetscSFPackOptCreate(sf->ndranks, sf->roffset, sf->rmine, &sf->selfleafpackopt);CHKERRQ(ierr); 332 ierr = PetscSFPackOptCreate(sf->nranks-sf->ndranks, sf->roffset+sf->ndranks, sf->rmine, &sf->leafpackopt);CHKERRQ(ierr); 333 ierr = PetscSFPackOptCreate(bas->ndiranks, bas->ioffset, bas->irootloc,&bas->selfrootpackopt);CHKERRQ(ierr); 334 ierr = PetscSFPackOptCreate(bas->niranks-bas->ndiranks,bas->ioffset+bas->ndiranks,bas->irootloc,&bas->rootpackopt);CHKERRQ(ierr); 335 336 #if defined(PETSC_HAVE_CUDA) 337 /* Check duplicates in irootloc[] so CUDA packing kernels can use cheaper regular operations 338 instead of atomics to unpack data on leaves/roots, when they know there is not data race. 339 */ 340 ierr = PetscCheckDupsInt(sf->roffset[sf->ndranks], sf->rmine, &sf->selfleafdups);CHKERRQ(ierr); 341 ierr = PetscCheckDupsInt(sf->roffset[sf->nranks]-sf->roffset[sf->ndranks], sf->rmine+sf->roffset[sf->ndranks], &sf->remoteleafdups);CHKERRQ(ierr); 342 ierr = PetscCheckDupsInt(bas->ioffset[bas->ndiranks], bas->irootloc, &bas->selfrootdups);CHKERRQ(ierr); 343 ierr = PetscCheckDupsInt(bas->ioffset[bas->niranks]-bas->ioffset[bas->ndiranks],bas->irootloc+bas->ioffset[bas->ndiranks],&bas->remoterootdups);CHKERRQ(ierr); 344 #endif 345 PetscFunctionReturn(0); 346 } 347 348 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackDestroyOptimizations_Basic(PetscSF sf) 349 { 350 PetscErrorCode ierr; 351 PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; 352 353 PetscFunctionBegin; 354 ierr = PetscSFPackOptDestroy(&sf->leafpackopt);CHKERRQ(ierr); 355 ierr = PetscSFPackOptDestroy(&sf->selfleafpackopt);CHKERRQ(ierr); 356 ierr = PetscSFPackOptDestroy(&bas->rootpackopt);CHKERRQ(ierr); 357 ierr = PetscSFPackOptDestroy(&bas->selfrootpackopt);CHKERRQ(ierr); 358 #if defined(PETSC_HAVE_CUDA) 359 sf->selfleafdups = PETSC_TRUE; 360 sf->remoteleafdups = PETSC_TRUE; 361 bas->selfrootdups = PETSC_TRUE; /* The default is assuming there are dups so that atomics are used. */ 362 bas->remoterootdups = PETSC_TRUE; 363 #endif 364 PetscFunctionReturn(0); 365 } 366 367 PETSC_STATIC_INLINE PetscErrorCode PetscSFPackWaitall_Basic(PetscSFPack link,PetscSFDirection direction) 368 { 369 PetscErrorCode ierr; 370 371 PetscFunctionBegin; 372 ierr = MPI_Waitall(link->nrootreqs,link->rootreqs[direction][link->rootmtype],MPI_STATUSES_IGNORE);CHKERRQ(ierr); 373 ierr = MPI_Waitall(link->nleafreqs,link->leafreqs[direction][link->leafmtype],MPI_STATUSES_IGNORE);CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 PETSC_INTERN PetscErrorCode PetscSFSetUp_Basic(PetscSF); 378 PETSC_INTERN PetscErrorCode PetscSFView_Basic(PetscSF,PetscViewer); 379 PETSC_INTERN PetscErrorCode PetscSFReset_Basic(PetscSF); 380 PETSC_INTERN PetscErrorCode PetscSFDestroy_Basic(PetscSF); 381 PETSC_INTERN PetscErrorCode PetscSFBcastAndOpEnd_Basic (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType, void*, MPI_Op); 382 PETSC_INTERN PetscErrorCode PetscSFReduceEnd_Basic (PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType, void*, MPI_Op); 383 PETSC_INTERN PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF,MPI_Datatype,PetscMemType, void*,PetscMemType,const void*,void*,MPI_Op); 384 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedSF_Basic(PetscSF,PetscInt,const PetscInt*,PetscSF*); 385 PETSC_INTERN PetscErrorCode PetscSFCreateEmbeddedLeafSF_Basic(PetscSF,PetscInt,const PetscInt*,PetscSF*); 386 PETSC_INTERN PetscErrorCode PetscSFGetLeafRanks_Basic(PetscSF,PetscInt*,const PetscMPIInt**,const PetscInt**,const PetscInt**); 387 PETSC_INTERN PetscErrorCode PetscSFPackGet_Basic_Common(PetscSF,MPI_Datatype,PetscMemType,const void*,PetscMemType,const void*,PetscInt,PetscInt,PetscSFPack*); 388 #endif 389