xref: /petsc/src/vec/is/sf/impls/basic/sfpack.h (revision 5ad154601493f05a65d6299cdb7ac3e5ec86a07d)
140e23c03SJunchao Zhang #if !defined(__SFPACK_H)
240e23c03SJunchao Zhang #define __SFPACK_H
340e23c03SJunchao Zhang 
440e23c03SJunchao Zhang #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
540e23c03SJunchao Zhang 
6b23bfdefSJunchao Zhang /* Optimization plans in packing & unpacking for destination ranks.
740e23c03SJunchao Zhang 
8b23bfdefSJunchao Zhang   Suppose there are count indices stored in idx[], and two addresses u, p. We want to do packing:
9b23bfdefSJunchao Zhang      p[i] = u[idx[i]], for i in [0,count)
1040e23c03SJunchao Zhang 
11b23bfdefSJunchao Zhang   Often, the indices are associated with n ranks. Each rank's indices are stored consecutively in idx[].
12b23bfdefSJunchao Zhang   We analyze indices for each rank and see if they are patterns that can be used to optimize the packing.
13eb02082bSJunchao Zhang   The result is stored in PetscSFPackOpt. Packing for a rank might be non-optimizable, or optimized into
14eb02082bSJunchao Zhang   a small number of contiguous memory copies or one strided memory copy.
1540e23c03SJunchao Zhang  */
16b23bfdefSJunchao Zhang typedef enum {PETSCSF_PACKOPT_NONE=0, PETSCSF_PACKOPT_MULTICOPY, PETSCSF_PACKOPT_STRIDE} PetscSFPackOptType;
17b23bfdefSJunchao Zhang 
1840e23c03SJunchao Zhang struct _n_PetscSFPackOpt {
19b23bfdefSJunchao Zhang   PetscInt           n;             /* Number of destination ranks */
20b23bfdefSJunchao Zhang   PetscSFPackOptType *type;         /* [n] Optimization types for the n ranks */
21b23bfdefSJunchao Zhang   PetscInt           *offset;       /* [n+1] Indices for i-th rank are in [offset[i],offset[i+1]) of idx[] */
22b23bfdefSJunchao Zhang   PetscInt           *copy_offset;  /* [n+1] If type[i] = PETSCSF_PACKOPT_MULTICOPY, packing for i-th rank is optimized into copies numbered between [copy_offset[i],copy_offset[i+1]) */
23b23bfdefSJunchao Zhang   PetscInt           *copy_start;   /* [*]     j-th copy starts at copy_start[j] in idx[]. In other words, there are copy_length[j] contiguous indices */
24eb02082bSJunchao Zhang   PetscInt           *copy_length;  /* [*]     starting at idx[copy_start[j]] */
25b23bfdefSJunchao Zhang   PetscInt           *stride_step;  /* [n]   If type[i] = PETSCSF_PACKOPT_STRIDE, then packing for i-th rank is strided, with first index being idx[offset[i]] and step stride_step[i], */
2640e23c03SJunchao Zhang   PetscInt           *stride_n;     /* [n]     and total stride_n[i] steps */
2740e23c03SJunchao Zhang };
2840e23c03SJunchao Zhang 
2940e23c03SJunchao Zhang typedef struct _n_PetscSFPack* PetscSFPack;
3040e23c03SJunchao Zhang 
31eb02082bSJunchao Zhang /* An abstract class that defines a communication link, which includes how to pack/unpack data and send/recv buffers
3240e23c03SJunchao Zhang  */
3340e23c03SJunchao Zhang struct _n_PetscSFPack {
34eb02082bSJunchao Zhang   PetscErrorCode (*h_Pack)            (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*);
35eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
36eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndAdd)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
37eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMin)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
38eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMax)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
39eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
40eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
41eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMult)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
42eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndLAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
43eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndBAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
44eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndLOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
45eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndBOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
46eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndLXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
47eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndBXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
48eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndInsert)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
49eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndAdd)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
50eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMin)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
51eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMax)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
52eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMinloc)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
53eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMaxloc)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
54eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMult)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
55eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndLAND)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
56eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndBAND)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
57eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndLOR)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
58eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndBOR)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
59eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndLXOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
60eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndBXOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
61eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
62eb02082bSJunchao Zhang   /* These fields are lazily initialized in a sense that only when device pointers are passed to an SF, the SF
63eb02082bSJunchao Zhang      will set them, otherwise it just leaves them alone even though PETSC_HAVE_CUDA. Packing routines using
64eb02082bSJunchao Zhang      regular ops when there are no data race chances.
65eb02082bSJunchao Zhang   */
66eb02082bSJunchao Zhang   PetscBool      deviceinited;        /* Are device related fields initialized? */
67eb02082bSJunchao Zhang   PetscErrorCode (*d_Pack)            (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*);
68eb02082bSJunchao Zhang 
69eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
70eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndAdd)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
71eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMin)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
72eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMax)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
73eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
74eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
75eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMult)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
76eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndLAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
77eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndBAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
78eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndLOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
79eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndBOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
80eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndLXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
81eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndBXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
82eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndInsert)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
83eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndAdd)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
84eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMin)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
85eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMax)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
86eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMinloc)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
87eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMaxloc)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
88eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMult)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
89eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndLAND)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
90eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndBAND)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
91eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndLOR)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
92eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndBOR)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
93eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndLXOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
94eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndBXOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
95eb02082bSJunchao Zhang 
96eb02082bSJunchao Zhang   /* Packing routines using atomics when there are data race chances */
97eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndInsert)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
98eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndAdd)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
99eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMin)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
100eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMax)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
101eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMinloc)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
102eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMaxloc)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
103eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMult)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
104eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndLAND)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
105eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndBAND)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
106eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndLOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
107eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndBOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
108eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndLXOR)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
109eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndBXOR)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
110eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
111eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndAdd)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
112eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMin)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
113eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMax)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
114eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
115eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
116eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMult)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
117eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndLAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
118eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndBAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
119eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndLOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
120eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndBOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
121eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndLXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
122eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndBXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
123eb02082bSJunchao Zhang 
124eb02082bSJunchao Zhang   PetscInt       MAX_CORESIDENT_THREADS; /* It is a copy from SF for convenience. */
125eb02082bSJunchao Zhang   cudaStream_t   stream;                 /* Stream to launch pack/unapck kernels if not using the default stream */
126eb02082bSJunchao Zhang #endif
127eb02082bSJunchao Zhang   PetscMPIInt    tag;                    /* Each link has a tag so we can perform multiple SF ops at the same time */
128*5ad15460SJunchao Zhang   MPI_Datatype   unit;                   /* The MPI datatype this PetscSFPack is built for */
129eb02082bSJunchao Zhang   MPI_Datatype   basicunit;              /* unit is made of MPI builtin dataype basicunit */
130*5ad15460SJunchao Zhang   PetscBool      isbuiltin;              /* Is unit an MPI/PETSc builtin datatype? If it is true, basicunit=unit, bs=1 */
131eb02082bSJunchao Zhang   size_t         unitbytes;              /* Number of bytes in a unit */
132eb02082bSJunchao Zhang   PetscInt       bs;                     /* Number of basic units in a unit */
133eb02082bSJunchao Zhang   const void     *rkey,*lkey;            /* rootdata and leafdata used as keys for operation */
134eb02082bSJunchao Zhang   char           *rootbuf[2];            /* Buffer for packed roots on Host (0 or PETSC_MEMTYPE_HOST) or Device (1 or PETSC_MEMTYPE_DEVICE) */
135eb02082bSJunchao Zhang   char           *leafbuf[2];            /* Buffer for packed leaves on Host (0) or Device (1) */
136eb02082bSJunchao Zhang   char           *selfbuf[2];            /* Buffer for roots in self to self communication on Host (0) or Device (1) */
137eb02082bSJunchao Zhang   PetscInt       rootbuflen;             /* Length of root buffer in <unit> */
138eb02082bSJunchao Zhang   PetscInt       leafbuflen;             /* Length of leaf buffer in <unit> */
139eb02082bSJunchao Zhang   PetscInt       selfbuflen;             /* Length of self buffer in <unit> */
140eb02082bSJunchao Zhang   PetscMemType   rootmtype;              /* rootdata's memory type */
141eb02082bSJunchao Zhang   PetscMemType   leafmtype;              /* leafdata's memory type */
142eb02082bSJunchao Zhang   PetscMPIInt    nrootreqs;              /* Number of root requests */
143eb02082bSJunchao Zhang   PetscMPIInt    nleafreqs;              /* Number of leaf requests */
144eb02082bSJunchao Zhang   MPI_Request    *rootreqs[2][2];        /* Pointers to root requests in this layout [PETSCSF_DIRECTION][PETSC_MEMTYPE] */
145eb02082bSJunchao Zhang   MPI_Request    *leafreqs[2][2];        /* Pointers to leaf requests in this layout [PETSCSF_DIRECTION][PETSC_MEMTYPE] */
146eb02082bSJunchao Zhang   PetscBool      rootreqsinited[2][2];   /* Are root requests initialized? Also in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE]*/
147eb02082bSJunchao Zhang   PetscBool      leafreqsinited[2][2];   /* Are leaf requests initialized? Also in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE]*/
148eb02082bSJunchao Zhang   MPI_Request    *reqs;                  /* An array of length (nrootreqs+nleafreqs)*4. Pointers in rootreqs[][] and leafreqs[][] point here */
149eb02082bSJunchao Zhang   PetscSFPack    next;
15040e23c03SJunchao Zhang };
15140e23c03SJunchao Zhang 
1529d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetInUse(PetscSF,MPI_Datatype,const void*,const void*,PetscCopyMode,PetscSFPack*);
15340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackReclaim(PetscSF,PetscSFPack*);
154eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackDestoryAvailable(PetscSFPack*);
155eb02082bSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFPackGetPack(PetscSFPack link,PetscMemType mtype,PetscErrorCode (**Pack)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*))
156eb02082bSJunchao Zhang {
157eb02082bSJunchao Zhang   PetscFunctionBegin;
158eb02082bSJunchao Zhang   *Pack = NULL;
159eb02082bSJunchao Zhang   if (mtype == PETSC_MEMTYPE_HOST)        *Pack = link->h_Pack;
160eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
161eb02082bSJunchao Zhang   else if (mtype == PETSC_MEMTYPE_DEVICE) *Pack = link->d_Pack;
162eb02082bSJunchao Zhang #endif
163eb02082bSJunchao Zhang   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %D",mtype);
164eb02082bSJunchao Zhang   PetscFunctionReturn(0);
165eb02082bSJunchao Zhang }
166eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetUnpackAndOp(PetscSFPack,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**UnpackAndOp)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*));
167eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetFetchAndOp (PetscSFPack,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**FetchAndOp) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,void*));
1689d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetErrorOnUnsupportedOverlap(PetscSF,MPI_Datatype,const void*,const void*);
169eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetUp_Host(PetscSF,PetscSFPack,MPI_Datatype);
170eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
171eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetUp_Device(PetscSF,PetscSFPack,MPI_Datatype);
172eb02082bSJunchao Zhang #endif
173eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackOptCreate(PetscInt,const PetscInt*,const PetscInt*,PetscSFPackOpt*);
174eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackOptDestory(PetscSFPackOpt *out);
17540e23c03SJunchao Zhang #endif
176