xref: /petsc/src/vec/is/sf/impls/basic/sfpack.h (revision eb02082b21b627ae704d1b64176a7a4b8db6ed4b)
140e23c03SJunchao Zhang #if !defined(__SFPACK_H)
240e23c03SJunchao Zhang #define __SFPACK_H
340e23c03SJunchao Zhang 
440e23c03SJunchao Zhang #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
540e23c03SJunchao Zhang 
6b23bfdefSJunchao Zhang /* Optimization plans in packing & unpacking for destination ranks.
740e23c03SJunchao Zhang 
8b23bfdefSJunchao Zhang   Suppose there are count indices stored in idx[], and two addresses u, p. We want to do packing:
9b23bfdefSJunchao Zhang      p[i] = u[idx[i]], for i in [0,count)
1040e23c03SJunchao Zhang 
11b23bfdefSJunchao Zhang   Often, the indices are associated with n ranks. Each rank's indices are stored consecutively in idx[].
12b23bfdefSJunchao Zhang   We analyze indices for each rank and see if they are patterns that can be used to optimize the packing.
13*eb02082bSJunchao Zhang   The result is stored in PetscSFPackOpt. Packing for a rank might be non-optimizable, or optimized into
14*eb02082bSJunchao Zhang   a small number of contiguous memory copies or one strided memory copy.
1540e23c03SJunchao Zhang  */
16b23bfdefSJunchao Zhang typedef enum {PETSCSF_PACKOPT_NONE=0, PETSCSF_PACKOPT_MULTICOPY, PETSCSF_PACKOPT_STRIDE} PetscSFPackOptType;
17b23bfdefSJunchao Zhang 
1840e23c03SJunchao Zhang struct _n_PetscSFPackOpt {
19b23bfdefSJunchao Zhang   PetscInt           n;             /* Number of destination ranks */
20b23bfdefSJunchao Zhang   PetscSFPackOptType *type;         /* [n] Optimization types for the n ranks */
21b23bfdefSJunchao Zhang   PetscInt           *offset;       /* [n+1] Indices for i-th rank are in [offset[i],offset[i+1]) of idx[] */
22b23bfdefSJunchao Zhang   PetscInt           *copy_offset;  /* [n+1] If type[i] = PETSCSF_PACKOPT_MULTICOPY, packing for i-th rank is optimized into copies numbered between [copy_offset[i],copy_offset[i+1]) */
23b23bfdefSJunchao Zhang   PetscInt           *copy_start;   /* [*]     j-th copy starts at copy_start[j] in idx[]. In other words, there are copy_length[j] contiguous indices */
24*eb02082bSJunchao Zhang   PetscInt           *copy_length;  /* [*]     starting at idx[copy_start[j]] */
25b23bfdefSJunchao Zhang   PetscInt           *stride_step;  /* [n]   If type[i] = PETSCSF_PACKOPT_STRIDE, then packing for i-th rank is strided, with first index being idx[offset[i]] and step stride_step[i], */
2640e23c03SJunchao Zhang   PetscInt           *stride_n;     /* [n]     and total stride_n[i] steps */
2740e23c03SJunchao Zhang };
2840e23c03SJunchao Zhang 
2940e23c03SJunchao Zhang typedef struct _n_PetscSFPack* PetscSFPack;
3040e23c03SJunchao Zhang 
31*eb02082bSJunchao Zhang /* An abstract class that defines a communication link, which includes how to pack/unpack data and send/recv buffers
3240e23c03SJunchao Zhang  */
3340e23c03SJunchao Zhang struct _n_PetscSFPack {
34*eb02082bSJunchao Zhang   PetscErrorCode (*h_Pack)            (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*);
35*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
36*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndAdd)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
37*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMin)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
38*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMax)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
39*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
40*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
41*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndMult)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
42*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndLAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
43*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndBAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
44*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndLOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
45*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndBOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
46*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndLXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
47*eb02082bSJunchao Zhang   PetscErrorCode (*h_UnpackAndBXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
48*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndInsert)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
49*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndAdd)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
50*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMin)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
51*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMax)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
52*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMinloc)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
53*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMaxloc)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
54*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndMult)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
55*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndLAND)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
56*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndBAND)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
57*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndLOR)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
58*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndBOR)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
59*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndLXOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
60*eb02082bSJunchao Zhang   PetscErrorCode (*h_FetchAndBXOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
61*eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
62*eb02082bSJunchao Zhang   /* These fields are lazily initialized in a sense that only when device pointers are passed to an SF, the SF
63*eb02082bSJunchao Zhang      will set them, otherwise it just leaves them alone even though PETSC_HAVE_CUDA. Packing routines using
64*eb02082bSJunchao Zhang      regular ops when there are no data race chances.
65*eb02082bSJunchao Zhang   */
66*eb02082bSJunchao Zhang   PetscBool      deviceinited;        /* Are device related fields initialized? */
67*eb02082bSJunchao Zhang   PetscErrorCode (*d_Pack)            (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*);
68*eb02082bSJunchao Zhang 
69*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
70*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndAdd)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
71*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMin)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
72*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMax)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
73*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
74*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
75*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndMult)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
76*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndLAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
77*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndBAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
78*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndLOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
79*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndBOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
80*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndLXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
81*eb02082bSJunchao Zhang   PetscErrorCode (*d_UnpackAndBXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
82*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndInsert)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
83*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndAdd)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
84*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMin)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
85*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMax)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
86*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMinloc)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
87*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMaxloc)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
88*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndMult)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
89*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndLAND)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
90*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndBAND)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
91*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndLOR)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
92*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndBOR)     (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
93*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndLXOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
94*eb02082bSJunchao Zhang   PetscErrorCode (*d_FetchAndBXOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
95*eb02082bSJunchao Zhang 
96*eb02082bSJunchao Zhang   /* Packing routines using atomics when there are data race chances */
97*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndInsert)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
98*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndAdd)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
99*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMin)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
100*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMax)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
101*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMinloc)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
102*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMaxloc)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
103*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndMult)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
104*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndLAND)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
105*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndBAND)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
106*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndLOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
107*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndBOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
108*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndLXOR)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
109*eb02082bSJunchao Zhang   PetscErrorCode (*da_UnpackAndBXOR)  (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*);
110*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndInsert) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
111*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndAdd)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
112*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMin)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
113*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMax)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
114*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMinloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
115*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMaxloc) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
116*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndMult)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
117*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndLAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
118*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndBAND)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
119*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndLOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
120*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndBOR)    (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
121*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndLXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
122*eb02082bSJunchao Zhang   PetscErrorCode (*da_FetchAndBXOR)   (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,      void*);
123*eb02082bSJunchao Zhang 
124*eb02082bSJunchao Zhang   PetscInt       MAX_CORESIDENT_THREADS; /* It is a copy from SF for convenience. */
125*eb02082bSJunchao Zhang   cudaStream_t   stream;                 /* Stream to launch pack/unapck kernels if not using the default stream */
126*eb02082bSJunchao Zhang #endif
127*eb02082bSJunchao Zhang   PetscMPIInt    tag;                    /* Each link has a tag so we can perform multiple SF ops at the same time */
128*eb02082bSJunchao Zhang   MPI_Datatype   unit;
129*eb02082bSJunchao Zhang   MPI_Datatype   basicunit;              /* unit is made of MPI builtin dataype basicunit */
130*eb02082bSJunchao Zhang   PetscBool      isbuiltin;              /* Is unit an MPI builtin datatype? If it is true, basicunit=unit, bs=1 */
131*eb02082bSJunchao Zhang   size_t         unitbytes;              /* Number of bytes in a unit */
132*eb02082bSJunchao Zhang   PetscInt       bs;                     /* Number of basic units in a unit */
133*eb02082bSJunchao Zhang   const void     *rkey,*lkey;            /* rootdata and leafdata used as keys for operation */
134*eb02082bSJunchao Zhang   char           *rootbuf[2];            /* Buffer for packed roots on Host (0 or PETSC_MEMTYPE_HOST) or Device (1 or PETSC_MEMTYPE_DEVICE) */
135*eb02082bSJunchao Zhang   char           *leafbuf[2];            /* Buffer for packed leaves on Host (0) or Device (1) */
136*eb02082bSJunchao Zhang   char           *selfbuf[2];            /* Buffer for roots in self to self communication on Host (0) or Device (1) */
137*eb02082bSJunchao Zhang   PetscInt       rootbuflen;             /* Length of root buffer in <unit> */
138*eb02082bSJunchao Zhang   PetscInt       leafbuflen;             /* Length of leaf buffer in <unit> */
139*eb02082bSJunchao Zhang   PetscInt       selfbuflen;             /* Length of self buffer in <unit> */
140*eb02082bSJunchao Zhang   PetscMemType   rootmtype;              /* rootdata's memory type */
141*eb02082bSJunchao Zhang   PetscMemType   leafmtype;              /* leafdata's memory type */
142*eb02082bSJunchao Zhang   PetscMPIInt    nrootreqs;              /* Number of root requests */
143*eb02082bSJunchao Zhang   PetscMPIInt    nleafreqs;              /* Number of leaf requests */
144*eb02082bSJunchao Zhang   MPI_Request    *rootreqs[2][2];        /* Pointers to root requests in this layout [PETSCSF_DIRECTION][PETSC_MEMTYPE] */
145*eb02082bSJunchao Zhang   MPI_Request    *leafreqs[2][2];        /* Pointers to leaf requests in this layout [PETSCSF_DIRECTION][PETSC_MEMTYPE] */
146*eb02082bSJunchao Zhang   PetscBool      rootreqsinited[2][2];   /* Are root requests initialized? Also in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE]*/
147*eb02082bSJunchao Zhang   PetscBool      leafreqsinited[2][2];   /* Are leaf requests initialized? Also in layout of [PETSCSF_DIRECTION][PETSC_MEMTYPE]*/
148*eb02082bSJunchao Zhang   MPI_Request    *reqs;                  /* An array of length (nrootreqs+nleafreqs)*4. Pointers in rootreqs[][] and leafreqs[][] point here */
149*eb02082bSJunchao Zhang   PetscSFPack    next;
15040e23c03SJunchao Zhang };
15140e23c03SJunchao Zhang 
1529d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetInUse(PetscSF,MPI_Datatype,const void*,const void*,PetscCopyMode,PetscSFPack*);
15340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackReclaim(PetscSF,PetscSFPack*);
154*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackDestoryAvailable(PetscSFPack*);
155*eb02082bSJunchao Zhang PETSC_STATIC_INLINE PetscErrorCode PetscSFPackGetPack(PetscSFPack link,PetscMemType mtype,PetscErrorCode (**Pack)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,const void*,void*))
156*eb02082bSJunchao Zhang {
157*eb02082bSJunchao Zhang   PetscFunctionBegin;
158*eb02082bSJunchao Zhang   *Pack = NULL;
159*eb02082bSJunchao Zhang   if (mtype == PETSC_MEMTYPE_HOST)        *Pack = link->h_Pack;
160*eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
161*eb02082bSJunchao Zhang   else if (mtype == PETSC_MEMTYPE_DEVICE) *Pack = link->d_Pack;
162*eb02082bSJunchao Zhang #endif
163*eb02082bSJunchao Zhang   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong PetscMemType %D",mtype);
164*eb02082bSJunchao Zhang   PetscFunctionReturn(0);
165*eb02082bSJunchao Zhang }
166*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetUnpackAndOp(PetscSFPack,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**UnpackAndOp)(PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,const void*));
167*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetFetchAndOp (PetscSFPack,PetscMemType,MPI_Op,PetscBool,PetscErrorCode (**FetchAndOp) (PetscInt,const PetscInt*,PetscSFPack,PetscSFPackOpt,void*,void*));
1689d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetErrorOnUnsupportedOverlap(PetscSF,MPI_Datatype,const void*,const void*);
169*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetUp_Host(PetscSF,PetscSFPack,MPI_Datatype);
170*eb02082bSJunchao Zhang #if defined(PETSC_HAVE_CUDA)
171*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetUp_Device(PetscSF,PetscSFPack,MPI_Datatype);
172*eb02082bSJunchao Zhang #endif
173*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackOptCreate(PetscInt,const PetscInt*,const PetscInt*,PetscSFPackOpt*);
174*eb02082bSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackOptDestory(PetscSFPackOpt *out);
17540e23c03SJunchao Zhang #endif
176