140e23c03SJunchao Zhang #if !defined(__SFPACK_H) 240e23c03SJunchao Zhang #define __SFPACK_H 340e23c03SJunchao Zhang 440e23c03SJunchao Zhang #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 540e23c03SJunchao Zhang 6*b23bfdefSJunchao Zhang /* Optimization plans in packing & unpacking for destination ranks. 740e23c03SJunchao Zhang 8*b23bfdefSJunchao Zhang Suppose there are count indices stored in idx[], and two addresses u, p. We want to do packing: 9*b23bfdefSJunchao Zhang p[i] = u[idx[i]], for i in [0,count) 1040e23c03SJunchao Zhang 11*b23bfdefSJunchao Zhang Often, the indices are associated with n ranks. Each rank's indices are stored consecutively in idx[]. 12*b23bfdefSJunchao Zhang We analyze indices for each rank and see if they are patterns that can be used to optimize the packing. 13*b23bfdefSJunchao Zhang The result is stored in PetscSFPackOpt. Packing for a rank might be not optimizable, or optimized in 14*b23bfdefSJunchao Zhang to a small number of contiguous memory copies or one strided memory copy. 1540e23c03SJunchao Zhang */ 16*b23bfdefSJunchao Zhang typedef enum {PETSCSF_PACKOPT_NONE=0, PETSCSF_PACKOPT_MULTICOPY, PETSCSF_PACKOPT_STRIDE} PetscSFPackOptType; 17*b23bfdefSJunchao Zhang 1840e23c03SJunchao Zhang struct _n_PetscSFPackOpt { 19*b23bfdefSJunchao Zhang PetscInt n; /* Number of destination ranks */ 20*b23bfdefSJunchao Zhang PetscSFPackOptType *type; /* [n] Optimization types for the n ranks */ 21*b23bfdefSJunchao Zhang PetscInt *offset; /* [n+1] Indices for i-th rank are in [offset[i],offset[i+1]) of idx[] */ 22*b23bfdefSJunchao Zhang PetscInt *copy_offset; /* [n+1] If type[i] = PETSCSF_PACKOPT_MULTICOPY, packing for i-th rank is optimized into copies numbered between [copy_offset[i],copy_offset[i+1]) */ 23*b23bfdefSJunchao Zhang PetscInt *copy_start; /* [*] j-th copy starts at copy_start[j] in idx[]. In other words, there are copy_length[j] contiguous indices */ 24*b23bfdefSJunchao Zhang PetscInt *copy_length; /* [*] starting from idx[copy_start[j]] */ 25*b23bfdefSJunchao Zhang PetscInt *stride_step; /* [n] If type[i] = PETSCSF_PACKOPT_STRIDE, then packing for i-th rank is strided, with first index being idx[offset[i]] and step stride_step[i], */ 2640e23c03SJunchao Zhang PetscInt *stride_n; /* [n] and total stride_n[i] steps */ 2740e23c03SJunchao Zhang }; 2840e23c03SJunchao Zhang 2940e23c03SJunchao Zhang typedef struct _n_PetscSFPack* PetscSFPack; 3040e23c03SJunchao Zhang 3140e23c03SJunchao Zhang #define SFPACKHEADER \ 32*b23bfdefSJunchao Zhang PetscErrorCode (*Pack) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,const void*,void*); \ 33*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndInsert)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 34*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndAdd) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 35*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndMin) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 36*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndMax) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 37*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndMinloc)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 38*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndMaxloc)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 39*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndMult) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 40*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndLAND) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 41*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndBAND) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 42*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndLOR) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 43*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndBOR) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 44*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndLXOR) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 45*b23bfdefSJunchao Zhang PetscErrorCode (*UnpackAndBXOR) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 46*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndInsert) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 47*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndAdd) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 48*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndMin) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 49*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndMax) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 50*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndMinloc) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 51*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndMaxloc) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 52*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndMult) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 53*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndLAND) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 54*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndBAND) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 55*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndLOR) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 56*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndBOR) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 57*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndLXOR) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 58*b23bfdefSJunchao Zhang PetscErrorCode (*FetchAndBXOR) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 5940e23c03SJunchao Zhang PetscMPIInt tag; /* Each link has a tag so we can perform multiple SF ops at the same time */\ 6040e23c03SJunchao Zhang MPI_Datatype unit; \ 6140e23c03SJunchao Zhang MPI_Datatype basicunit; /* unit is made of MPI builtin dataype basicunit */ \ 6240e23c03SJunchao Zhang PetscBool isbuiltin; /* Is unit an MPI builtin datatype? If it is true, basicunit=unit, bs=1 */ \ 6340e23c03SJunchao Zhang size_t unitbytes; /* Number of bytes in a unit */ \ 6440e23c03SJunchao Zhang PetscInt bs; /* Number of basic units in a unit */ \ 659d1c8addSJunchao Zhang const void *rkey,*lkey; /* rootdata and leafdata used as keys for operation */ \ 66*b23bfdefSJunchao Zhang char *rootbuf; /* Buffer for packed roots in send/recv */ \ 67*b23bfdefSJunchao Zhang char *leafbuf; /* Buffer for packed leaves in send/recv */ \ 68*b23bfdefSJunchao Zhang char *selfbuf; /* If self communication does not use MPI, this is the shared buffer for packed roots or leaves */ \ 6940e23c03SJunchao Zhang PetscSFPack next 7040e23c03SJunchao Zhang 7140e23c03SJunchao Zhang /* An abstract class that defines a communication link, which includes how to 7240e23c03SJunchao Zhang pack/unpack data. Subclasses may further contain fields for send/recv buffers, 7340e23c03SJunchao Zhang MPI_Requests etc used in communication. 7440e23c03SJunchao Zhang */ 7540e23c03SJunchao Zhang struct _n_PetscSFPack { 7640e23c03SJunchao Zhang SFPACKHEADER; 7740e23c03SJunchao Zhang }; 7840e23c03SJunchao Zhang 799d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetInUse(PetscSF,MPI_Datatype,const void*,const void*,PetscCopyMode,PetscSFPack*); 8040e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackReclaim(PetscSF,PetscSFPack*); 8140e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetupType(PetscSFPack,MPI_Datatype); 82*b23bfdefSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetUnpackAndOp(PetscSF,PetscSFPack,MPI_Op,PetscErrorCode (**UnpackAndOp)(PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*)); 83*b23bfdefSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackGetFetchAndOp (PetscSF,PetscSFPack,MPI_Op,PetscErrorCode (**FetchAndOp) (PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*, void*)); 8440e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetupOptimization(PetscInt,const PetscInt*,const PetscInt*,PetscSFPackOpt*); 8540e23c03SJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackDestoryOptimization(PetscSFPackOpt *out); 869d1c8addSJunchao Zhang PETSC_INTERN PetscErrorCode PetscSFPackSetErrorOnUnsupportedOverlap(PetscSF,MPI_Datatype,const void*,const void*); 879d1c8addSJunchao Zhang 8840e23c03SJunchao Zhang #endif 89