xref: /petsc/src/vec/is/sf/impls/basic/sfpack.h (revision 9d1c8add3f910d1f6c9c3b06f1a2a7ffcd567655)
140e23c03SJunchao Zhang #if !defined(__SFPACK_H)
240e23c03SJunchao Zhang #define __SFPACK_H
340e23c03SJunchao Zhang 
440e23c03SJunchao Zhang #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/
540e23c03SJunchao Zhang 
640e23c03SJunchao Zhang /* Optimization plans in packing(unpacking) for target processors.
740e23c03SJunchao Zhang 
840e23c03SJunchao Zhang    Indirect accesses in packing like p[i] = u[idx[i]] are expensive and are not vectorization friendly. We
940e23c03SJunchao Zhang    try to optimize them if we found cenrtain patterns among indices in idx[]. As a result, a pack might be
1040e23c03SJunchao Zhang    optimized into 1) a small number of contiguous memory copies; OR 2) one strided memory copy.
1140e23c03SJunchao Zhang 
1240e23c03SJunchao Zhang    Each target has its own plan. n, the number of target processors, is nranks or niranks depending on the context.
1340e23c03SJunchao Zhang  */
1440e23c03SJunchao Zhang struct _n_PetscSFPackOpt {
1540e23c03SJunchao Zhang   PetscBool *optimized;      /* [n]   Is the packing to i-th target optimized? If yes, other fields give the opt plan */
1640e23c03SJunchao Zhang   PetscInt  *copy_offset;    /* [n+1] We number all memory copies. Packing for target i is optimized into copies in [copy_offset[i],copy_offset[i+1]) */
1740e23c03SJunchao Zhang   PetscInt  *copy_start;     /* [*]   j-th copy starts at index copy_start[j] */
1840e23c03SJunchao Zhang   PetscInt  *copy_length;    /* [*]     with length copy_length[j] in unit of the <unit> used in for example, PetscSFReduceBegin(sf,unit,...) */
1940e23c03SJunchao Zhang   PetscInt  *stride_first;   /* [n]   If optimized[i] is TRUE but copy_offset[i] == copy_offset[i+1], then packing for remote i is strided. The first */
2040e23c03SJunchao Zhang   PetscInt  *stride_step;    /* [n]     index is stride_first[i], step is stride_step[i], */
2140e23c03SJunchao Zhang   PetscInt  *stride_n;       /* [n]     and total stride_n[i] steps */
2240e23c03SJunchao Zhang };
2340e23c03SJunchao Zhang 
2440e23c03SJunchao Zhang typedef struct _n_PetscSFPack* PetscSFPack;
2540e23c03SJunchao Zhang 
2640e23c03SJunchao Zhang #define SFPACKHEADER \
2740e23c03SJunchao Zhang   PetscErrorCode (*Pack)           (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,const void*,void*);  \
2840e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndInsert)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
2940e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndAdd)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3040e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndMin)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3140e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndMax)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3240e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndMinloc)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3340e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndMaxloc)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3440e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndMult)  (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3540e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndLAND)  (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3640e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndBAND)  (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3740e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndLOR)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3840e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndBOR)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
3940e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndLXOR)  (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
4040e23c03SJunchao Zhang   PetscErrorCode (*UnpackAndBXOR)  (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*);  \
4140e23c03SJunchao Zhang   PetscErrorCode (*FetchAndInsert) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
4240e23c03SJunchao Zhang   PetscErrorCode (*FetchAndAdd)    (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
4340e23c03SJunchao Zhang   PetscErrorCode (*FetchAndMin)    (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
4440e23c03SJunchao Zhang   PetscErrorCode (*FetchAndMax)    (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
4540e23c03SJunchao Zhang   PetscErrorCode (*FetchAndMinloc) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
4640e23c03SJunchao Zhang   PetscErrorCode (*FetchAndMaxloc) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
4740e23c03SJunchao Zhang   PetscErrorCode (*FetchAndMult)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
4840e23c03SJunchao Zhang   PetscErrorCode (*FetchAndLAND)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
4940e23c03SJunchao Zhang   PetscErrorCode (*FetchAndBAND)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
5040e23c03SJunchao Zhang   PetscErrorCode (*FetchAndLOR)    (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
5140e23c03SJunchao Zhang   PetscErrorCode (*FetchAndBOR)    (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
5240e23c03SJunchao Zhang   PetscErrorCode (*FetchAndLXOR)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
5340e23c03SJunchao Zhang   PetscErrorCode (*FetchAndBXOR)   (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*);        \
5440e23c03SJunchao Zhang   PetscMPIInt    tag;         /* Each link has a tag so we can perform multiple SF ops at the same time */         \
5540e23c03SJunchao Zhang   MPI_Datatype   unit;                                                                                             \
5640e23c03SJunchao Zhang   MPI_Datatype   basicunit;   /* unit is made of MPI builtin dataype basicunit */                                  \
5740e23c03SJunchao Zhang   PetscBool      isbuiltin;   /* Is unit an MPI builtin datatype? If it is true, basicunit=unit, bs=1 */           \
5840e23c03SJunchao Zhang   size_t         unitbytes;   /* Number of bytes in a unit */                                                      \
5940e23c03SJunchao Zhang   PetscInt       bs;          /* Number of basic units in a unit */                                                \
60*9d1c8addSJunchao Zhang   const void     *rkey,*lkey; /* rootdata and leafdata used as keys for operation */                                                \
6140e23c03SJunchao Zhang   PetscSFPack    next
6240e23c03SJunchao Zhang 
6340e23c03SJunchao Zhang /* An abstract class that defines a communication link, which includes how to
6440e23c03SJunchao Zhang    pack/unpack data. Subclasses may further contain fields for send/recv buffers,
6540e23c03SJunchao Zhang    MPI_Requests etc used in communication.
6640e23c03SJunchao Zhang  */
6740e23c03SJunchao Zhang struct _n_PetscSFPack {
6840e23c03SJunchao Zhang   SFPACKHEADER;
6940e23c03SJunchao Zhang };
7040e23c03SJunchao Zhang 
71*9d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetInUse(PetscSF,MPI_Datatype,const void*,const void*,PetscCopyMode,PetscSFPack*);
7240e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackReclaim(PetscSF,PetscSFPack*);
7340e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetupType(PetscSFPack,MPI_Datatype);
7440e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetUnpackAndOp(PetscSF,PetscSFPack,MPI_Op,PetscErrorCode (**UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*));
7540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetFetchAndOp(PetscSF,PetscSFPack,MPI_Op,PetscErrorCode (**FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*));
7640e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetupOptimization(PetscInt,const PetscInt*,const PetscInt*,PetscSFPackOpt*);
7740e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackDestoryOptimization(PetscSFPackOpt *out);
78*9d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetErrorOnUnsupportedOverlap(PetscSF,MPI_Datatype,const void*,const void*);
79*9d1c8addSJunchao Zhang 
8040e23c03SJunchao Zhang #endif
81