140e23c03SJunchao Zhang #if !defined(__SFPACK_H) 240e23c03SJunchao Zhang #define __SFPACK_H 340e23c03SJunchao Zhang 440e23c03SJunchao Zhang #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 540e23c03SJunchao Zhang 6b23bfdefSJunchao Zhang /* Optimization plans in packing & unpacking for destination ranks. 740e23c03SJunchao Zhang 8b23bfdefSJunchao Zhang Suppose there are count indices stored in idx[], and two addresses u, p. We want to do packing: 9b23bfdefSJunchao Zhang p[i] = u[idx[i]], for i in [0,count) 1040e23c03SJunchao Zhang 11b23bfdefSJunchao Zhang Often, the indices are associated with n ranks. Each rank's indices are stored consecutively in idx[]. 12b23bfdefSJunchao Zhang We analyze indices for each rank and see if they are patterns that can be used to optimize the packing. 13*eb02082bSJunchao Zhang The result is stored in PetscSFPackOpt. Packing for a rank might be non-optimizable, or optimized into 14*eb02082bSJunchao Zhang a small number of contiguous memory copies or one strided memory copy. 1540e23c03SJunchao Zhang */ 16b23bfdefSJunchao Zhang typedef enum {PETSCSF_PACKOPT_NONE=0, PETSCSF_PACKOPT_MULTICOPY, PETSCSF_PACKOPT_STRIDE} PetscSFPackOptType; 17b23bfdefSJunchao Zhang 1840e23c03SJunchao Zhang struct _n_PetscSFPackOpt { 19b23bfdefSJunchao Zhang PetscInt n; /* Number of destination ranks */ 20b23bfdefSJunchao Zhang PetscSFPackOptType *type; /* [n] Optimization types for the n ranks */ 21b23bfdefSJunchao Zhang PetscInt *offset; /* [n+1] Indices for i-th rank are in [offset[i],offset[i+1]) of idx[] */ 22b23bfdefSJunchao Zhang PetscInt *copy_offset; /* [n+1] If type[i] = PETSCSF_PACKOPT_MULTICOPY, packing for i-th rank is optimized into copies numbered between [copy_offset[i],copy_offset[i+1]) */ 23b23bfdefSJunchao Zhang PetscInt *copy_start; /* [*] j-th copy starts at copy_start[j] in idx[]. In other words, there are copy_length[j] contiguous indices */ 24*eb02082bSJunchao Zhang PetscInt *copy_length; /* [*] starting at idx[copy_start[j]] */ 25b23bfdefSJunchao Zhang PetscInt *stride_step; /* [n] If type[i] = PETSCSF_PACKOPT_STRIDE, then packing for i-th rank is strided, with first index being idx[offset[i]] and step stride_step[i], */ 2640e23c03SJunchao Zhang PetscInt *stride_n; /* [n] and total stride_n[i] steps */ 2740e23c03SJunchao Zhang }; 2840e23c03SJunchao Zhang 2940e23c03SJunchao Zhang typedef struct _n_PetscSFPack* PetscSFPack; 3040e23c03SJunchao Zhang 31*eb02082bSJunchao Zhang /* An abstract class that defines a communication link, which includes how to pack/unpack data and send/recv buffers 3240e23c03SJunchao Zhang */ 3340e23c03SJunchao Zhang struct _n_PetscSFPack { 34*eb02082bSJunchao Zhang PetscErrorCode (*h_Pack) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*); 35*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 36*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndAdd) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 37*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndMin) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 38*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndMax) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 39*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 40*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 41*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndMult) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 42*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndLAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 43*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndBAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 44*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndLOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 45*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndBOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 46*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndLXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 47*eb02082bSJunchao Zhang PetscErrorCode (*h_UnpackAndBXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 48*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 49*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndAdd) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 50*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndMin) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 51*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndMax) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 52*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 53*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 54*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndMult) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 55*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndLAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 56*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndBAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 57*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndLOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 58*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndBOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 59*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndLXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 60*eb02082bSJunchao Zhang PetscErrorCode (*h_FetchAndBXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 61*eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 62*eb02082bSJunchao Zhang /* These fields are lazily initialized in a sense that only when device pointers are passed to an SF, the SF 63*eb02082bSJunchao Zhang will set them, otherwise it just leaves them alone even though PETSC_HAVE_CUDA. Packing routines using 64*eb02082bSJunchao Zhang regular ops when there are no data race chances. 65*eb02082bSJunchao Zhang */ 66*eb02082bSJunchao Zhang PetscBool deviceinited; /* Are device related fields initialized? */ 67*eb02082bSJunchao Zhang PetscErrorCode (*d_Pack) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*); 68*eb02082bSJunchao Zhang 69*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 70*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndAdd) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 71*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndMin) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 72*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndMax) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 73*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 74*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 75*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndMult) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 76*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndLAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 77*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndBAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 78*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndLOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 79*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndBOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 80*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndLXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 81*eb02082bSJunchao Zhang PetscErrorCode (*d_UnpackAndBXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 82*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 83*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndAdd) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 84*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndMin) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 85*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndMax) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 86*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 87*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 88*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndMult) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 89*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndLAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 90*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndBAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 91*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndLOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 92*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndBOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 93*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndLXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 94*eb02082bSJunchao Zhang PetscErrorCode (*d_FetchAndBXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 95*eb02082bSJunchao Zhang 96*eb02082bSJunchao Zhang /* Packing routines using atomics when there are data race chances */ 97*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndInsert)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 98*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndAdd) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 99*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndMin) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 100*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndMax) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 101*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndMinloc)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 102*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndMaxloc)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 103*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndMult) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 104*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndLAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 105*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndBAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 106*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndLOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 107*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndBOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 108*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndLXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 109*eb02082bSJunchao Zhang PetscErrorCode (*da_UnpackAndBXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*); 110*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 111*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndAdd) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 112*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndMin) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 113*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndMax) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 114*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 115*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 116*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndMult) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 117*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndLAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 118*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndBAND) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 119*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndLOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 120*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndBOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 121*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndLXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 122*eb02082bSJunchao Zhang PetscErrorCode (*da_FetchAndBXOR) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*, void*); 123*eb02082bSJunchao Zhang 124*eb02082bSJunchao Zhang PetscInt MAX_CORESIDENT_THREADS; /* It is a copy from SF for convenience. */ 125*eb02082bSJunchao Zhang cudaStream_t stream; /* Stream to launch pack/unapck kernels if not using the default stream */ 126*eb02082bSJunchao Zhang #endif 127*eb02082bSJunchao Zhang PetscMPIInt tag; /* Each link has a tag so we can perform multiple SF ops at the same time */ 128*eb02082bSJunchao Zhang MPI_Datatype unit; 129*eb02082bSJunchao Zhang MPI_Datatype basicunit; /* unit is made of MPI builtin dataype basicunit */ 130*eb02082bSJunchao Zhang PetscBool isbuiltin; /* Is unit an MPI builtin datatype? If it is true, basicunit=unit, bs=1 */ 131*eb02082bSJunchao Zhang size_t unitbytes; /* Number of bytes in a unit */ 132*eb02082bSJunchao Zhang PetscInt bs; /* Number of basic units in a unit */ 133*eb02082bSJunchao Zhang const void *rkey,*lkey; /* rootdata and leafdata used as keys for operation */ 134*eb02082bSJunchao Zhang char *rootbuf[2]; /* Buffer for packed roots on Host (0 or PETSC_MEMTYPE_HOST) or Device (1 or PETSC_MEMTYPE_DEVICE) */ 135*eb02082bSJunchao Zhang char *leafbuf[2]; /* Buffer for packed leaves on Host (0) or Device (1) */ 136*eb02082bSJunchao Zhang char *selfbuf[2]; /* Buffer for roots in self to self communication on Host (0) or Device (1) */ 137*eb02082bSJunchao Zhang PetscInt rootbuflen; /* Length of root buffer in <unit> */ 138*eb02082bSJunchao Zhang PetscInt leafbuflen; /* Length of leaf buffer in <unit> */ 139*eb02082bSJunchao Zhang PetscInt selfbuflen; /* Length of self buffer in <unit> */ 140*eb02082bSJunchao Zhang PetscMemType rootmtype; /* rootdata's memory type */ 141*eb02082bSJunchao Zhang PetscMemType leafmtype; /* leafdata's memory type */ 142*eb02082bSJunchao Zhang PetscMPIInt nrootreqs; /* Number of root requests */ 143*eb02082bSJunchao Zhang PetscMPIInt nleafreqs; /* Number of leaf requests */ 144*eb02082bSJunchao Zhang MPI_Request *rootreqs[2][2]; /* Pointers to root requests in this layout [PETSCSF_DIRECTION][PETSC_MEMTYPE] */ 145*eb02082bSJunchao Zhang MPI_Request *leafreqs[2][2]; /* Pointers to leaf requests in this layout [PETSCSF_DIRECTION][PETSC_MEMTYPE] */ 146*eb02082bSJunchao Zhang PetscBool rootreqsinited[2][2]; /* Are root requests initialized? Also in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE]*/ 147*eb02082bSJunchao Zhang PetscBool leafreqsinited[2][2]; /* Are leaf requests initialized? Also in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE]*/ 148*eb02082bSJunchao Zhang MPI_Request *reqs; /* An array of length (nrootreqs+nleafreqs)*4. Pointers in rootreqs[][] and leafreqs[][] point here */ 149*eb02082bSJunchao Zhang PetscSFPack next; 15040e23c03SJunchao Zhang }; 15140e23c03SJunchao Zhang 1529d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetInUse(PetscSF,MPI_Datatype,const void*,const void*,PetscCopyMode,PetscSFPack*); 15340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackReclaim(PetscSF,PetscSFPack*); 154*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackDestoryAvailable(PetscSFPack*); 155*eb02082bSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFPackGetPack(PetscSFPack link,PetscMemType mtype,PetscErrorCode (**Pack)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*)) 156*eb02082bSJunchao Zhang { 157*eb02082bSJunchao Zhang PetscFunctionBegin; 158*eb02082bSJunchao Zhang *Pack = NULL; 159*eb02082bSJunchao Zhang if (mtype == PETSC_MEMTYPE_HOST) *Pack = link->h_Pack; 160*eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 161*eb02082bSJunchao Zhang else if (mtype == PETSC_MEMTYPE_DEVICE) *Pack = link->d_Pack; 162*eb02082bSJunchao Zhang #endif 163*eb02082bSJunchao Zhang else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %D",mtype); 164*eb02082bSJunchao Zhang PetscFunctionReturn(0); 165*eb02082bSJunchao Zhang } 166*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetUnpackAndOp(PetscSFPack,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**UnpackAndOp)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*)); 167*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetFetchAndOp (PetscSFPack,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**FetchAndOp) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,void*)); 1689d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetErrorOnUnsupportedOverlap(PetscSF,MPI_Datatype,const void*,const void*); 169*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetUp_Host(PetscSF,PetscSFPack,MPI_Datatype); 170*eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA) 171*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetUp_Device(PetscSF,PetscSFPack,MPI_Datatype); 172*eb02082bSJunchao Zhang #endif 173*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackOptCreate(PetscInt,const PetscInt*,const PetscInt*,PetscSFPackOpt*); 174*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackOptDestory(PetscSFPackOpt *out); 17540e23c03SJunchao Zhang #endif 176