xref: /petsc/src/vec/is/sf/impls/basic/sfpack.h (revision b23bfdefca792e3d9f47034521b8d4c437693123)
140e23c03SJunchao Zhang #if !defined(__SFPACK_H)
240e23c03SJunchao Zhang #define __SFPACK_H
340e23c03SJunchao Zhang 
440e23c03SJunchao Zhang #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
540e23c03SJunchao Zhang 
6*b23bfdefSJunchao Zhang /* Optimization plans in packing & unpacking for destination ranks.
740e23c03SJunchao Zhang 
8*b23bfdefSJunchao Zhang   Suppose there are count indices stored in idx[], and two addresses u, p. We want to do packing:
9*b23bfdefSJunchao Zhang      p[i] = u[idx[i]], for i in [0,count)
1040e23c03SJunchao Zhang 
11*b23bfdefSJunchao Zhang   Often, the indices are associated with n ranks. Each rank's indices are stored consecutively in idx[].
12*b23bfdefSJunchao Zhang   We analyze indices for each rank and see if they are patterns that can be used to optimize the packing.
13*b23bfdefSJunchao Zhang   The result is stored in PetscSFPackOpt. Packing for a rank might be not optimizable, or optimized in
14*b23bfdefSJunchao Zhang   to a small number of contiguous memory copies or one strided memory copy.
1540e23c03SJunchao Zhang  */
16*b23bfdefSJunchao Zhang typedef enum {PETSCSF_PACKOPT_NONE=0, PETSCSF_PACKOPT_MULTICOPY, PETSCSF_PACKOPT_STRIDE} PetscSFPackOptType;
17*b23bfdefSJunchao Zhang 
1840e23c03SJunchao Zhang struct _n_PetscSFPackOpt {
19*b23bfdefSJunchao Zhang   PetscInt           n;             /* Number of destination ranks */
20*b23bfdefSJunchao Zhang   PetscSFPackOptType *type;         /* [n] Optimization types for the n ranks */
21*b23bfdefSJunchao Zhang   PetscInt           *offset;       /* [n+1] Indices for i-th rank are in [offset[i],offset[i+1]) of idx[] */
22*b23bfdefSJunchao Zhang   PetscInt           *copy_offset;  /* [n+1] If type[i] = PETSCSF_PACKOPT_MULTICOPY, packing for i-th rank is optimized into copies numbered between [copy_offset[i],copy_offset[i+1]) */
23*b23bfdefSJunchao Zhang   PetscInt           *copy_start;   /* [*]     j-th copy starts at copy_start[j] in idx[]. In other words, there are copy_length[j] contiguous indices */
24*b23bfdefSJunchao Zhang   PetscInt           *copy_length;  /* [*]     starting from idx[copy_start[j]] */
25*b23bfdefSJunchao Zhang   PetscInt           *stride_step;  /* [n]   If type[i] = PETSCSF_PACKOPT_STRIDE, then packing for i-th rank is strided, with first index being idx[offset[i]] and step stride_step[i], */
2640e23c03SJunchao Zhang   PetscInt           *stride_n;     /* [n]     and total stride_n[i] steps */
2740e23c03SJunchao Zhang };
2840e23c03SJunchao Zhang 
2940e23c03SJunchao Zhang typedef struct _n_PetscSFPack* PetscSFPack;
3040e23c03SJunchao Zhang 
3140e23c03SJunchao Zhang #define SFPACKHEADER \
32*b23bfdefSJunchao Zhang   PetscErrorCode (*Pack)           (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,const void*,void*);  \
33*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndInsert)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
34*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndAdd)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
35*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndMin)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
36*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndMax)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
37*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndMinloc)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
38*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndMaxloc)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
39*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndMult)  (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
40*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndLAND)  (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
41*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndBAND)  (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
42*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndLOR)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
43*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndBOR)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
44*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndLXOR)  (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
45*b23bfdefSJunchao Zhang   PetscErrorCode (*UnpackAndBXOR)  (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
46*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndInsert) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
47*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndAdd)    (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
48*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndMin)    (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
49*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndMax)    (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
50*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndMinloc) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
51*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndMaxloc) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
52*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndMult)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
53*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndLAND)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
54*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndBAND)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
55*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndLOR)    (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
56*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndBOR)    (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
57*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndLXOR)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
58*b23bfdefSJunchao Zhang   PetscErrorCode (*FetchAndBXOR)   (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
5940e23c03SJunchao Zhang   PetscMPIInt    tag;         /* Each link has a tag so we can perform multiple SF ops at the same time */\
6040e23c03SJunchao Zhang   MPI_Datatype   unit;                                                                                    \
6140e23c03SJunchao Zhang   MPI_Datatype   basicunit;   /* unit is made of MPI builtin dataype basicunit */                         \
6240e23c03SJunchao Zhang   PetscBool      isbuiltin;   /* Is unit an MPI builtin datatype? If it is true, basicunit=unit, bs=1 */  \
6340e23c03SJunchao Zhang   size_t         unitbytes;   /* Number of bytes in a unit */                                             \
6440e23c03SJunchao Zhang   PetscInt       bs;          /* Number of basic units in a unit */                                       \
659d1c8addSJunchao Zhang   const void     *rkey,*lkey; /* rootdata and leafdata used as keys for operation */                      \
66*b23bfdefSJunchao Zhang   char           *rootbuf;       /* Buffer for packed roots in send/recv */                                                         \
67*b23bfdefSJunchao Zhang   char           *leafbuf;       /* Buffer for packed leaves in send/recv */                                                        \
68*b23bfdefSJunchao Zhang   char           *selfbuf;       /* If self communication does not use MPI, this is the shared buffer for packed roots or leaves */ \
6940e23c03SJunchao Zhang   PetscSFPack    next
7040e23c03SJunchao Zhang 
7140e23c03SJunchao Zhang /* An abstract class that defines a communication link, which includes how to
7240e23c03SJunchao Zhang    pack/unpack data. Subclasses may further contain fields for send/recv buffers,
7340e23c03SJunchao Zhang    MPI_Requests etc used in communication.
7440e23c03SJunchao Zhang  */
7540e23c03SJunchao Zhang struct _n_PetscSFPack {
7640e23c03SJunchao Zhang   SFPACKHEADER;
7740e23c03SJunchao Zhang };
7840e23c03SJunchao Zhang 
799d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetInUse(PetscSF,MPI_Datatype,const void*,const void*,PetscCopyMode,PetscSFPack*);
8040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackReclaim(PetscSF,PetscSFPack*);
8140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetupType(PetscSFPack,MPI_Datatype);
82*b23bfdefSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetUnpackAndOp(PetscSF,PetscSFPack,MPI_Op,PetscErrorCode (**UnpackAndOp)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*));
83*b23bfdefSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetFetchAndOp (PetscSF,PetscSFPack,MPI_Op,PetscErrorCode (**FetchAndOp) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,      void*));
8440e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetupOptimization(PetscInt,const PetscInt*,const PetscInt*,PetscSFPackOpt*);
8540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackDestoryOptimization(PetscSFPackOpt *out);
869d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetErrorOnUnsupportedOverlap(PetscSF,MPI_Datatype,const void*,const void*);
879d1c8addSJunchao Zhang 
8840e23c03SJunchao Zhang #endif
89