1 #if !defined(__SFPACK_H) 2 #define __SFPACK_H 3 4 #include <petsc/private/sfimpl.h> /*I "petscsf.h" I*/ 5 6 /* Optimization plans in packing(unpacking) for destination ranks. 7 8 Indirect accesses in packing like p[i] = u[idx[i]] are expensive and are not vectorization friendly. We 9 try to optimize them if we found cenrtain patterns among indices in idx[]. As a result, a pack might be 10 optimized into 1) a small number of contiguous memory copies; OR 2) one strided memory copy. 11 12 Each target has its own plan. n, the number of destination ranks, is nranks or niranks depending on the context. 13 */ 14 struct _n_PetscSFPackOpt { 15 PetscInt n; /* The number of destination ranks */ 16 PetscBool *optimized; /* [n] Is the packing to i-th rank optimized? If yes, other fields give the opt plan */ 17 PetscInt *copy_offset; /* [n+1] We number all memory copies. Packing for i-th rank is optimized into copies in [copy_offset[i],copy_offset[i+1]) */ 18 PetscInt *copy_start; /* [*] j-th copy starts at index copy_start[j] */ 19 PetscInt *copy_length; /* [*] with length copy_length[j] in unit of the <unit> used in for example, PetscSFReduceBegin(sf,unit,...) */ 20 PetscInt *stride_first; /* [n] If optimized[i] is TRUE but copy_offset[i] == copy_offset[i+1], then packing for i-th rank is strided. The first */ 21 PetscInt *stride_step; /* [n] index is stride_first[i], step is stride_step[i], */ 22 PetscInt *stride_n; /* [n] and total stride_n[i] steps */ 23 }; 24 25 typedef struct _n_PetscSFPack* PetscSFPack; 26 27 #define SFPACKHEADER \ 28 PetscErrorCode (*Pack) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,const void*,void*); \ 29 PetscErrorCode (*UnpackAndInsert)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 30 PetscErrorCode (*UnpackAndAdd) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 31 PetscErrorCode (*UnpackAndMin) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 32 PetscErrorCode (*UnpackAndMax) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 33 PetscErrorCode (*UnpackAndMinloc)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 34 PetscErrorCode (*UnpackAndMaxloc)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 35 PetscErrorCode (*UnpackAndMult) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 36 PetscErrorCode (*UnpackAndLAND) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 37 PetscErrorCode (*UnpackAndBAND) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 38 PetscErrorCode (*UnpackAndLOR) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 39 PetscErrorCode (*UnpackAndBOR) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 40 PetscErrorCode (*UnpackAndLXOR) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 41 PetscErrorCode (*UnpackAndBXOR) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*); \ 42 PetscErrorCode (*FetchAndInsert) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 43 PetscErrorCode (*FetchAndAdd) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 44 PetscErrorCode (*FetchAndMin) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 45 PetscErrorCode (*FetchAndMax) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 46 PetscErrorCode (*FetchAndMinloc) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 47 PetscErrorCode (*FetchAndMaxloc) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 48 PetscErrorCode (*FetchAndMult) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 49 PetscErrorCode (*FetchAndLAND) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 50 PetscErrorCode (*FetchAndBAND) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 51 PetscErrorCode (*FetchAndLOR) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 52 PetscErrorCode (*FetchAndBOR) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 53 PetscErrorCode (*FetchAndLXOR) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 54 PetscErrorCode (*FetchAndBXOR) (PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*); \ 55 PetscMPIInt tag; /* Each link has a tag so we can perform multiple SF ops at the same time */ \ 56 MPI_Datatype unit; \ 57 MPI_Datatype basicunit; /* unit is made of MPI builtin dataype basicunit */ \ 58 PetscBool isbuiltin; /* Is unit an MPI builtin datatype? If it is true, basicunit=unit, bs=1 */ \ 59 size_t unitbytes; /* Number of bytes in a unit */ \ 60 PetscInt bs; /* Number of basic units in a unit */ \ 61 const void *rkey,*lkey; /* rootdata and leafdata used as keys for operation */ \ 62 PetscSFPack next 63 64 /* An abstract class that defines a communication link, which includes how to 65 pack/unpack data. Subclasses may further contain fields for send/recv buffers, 66 MPI_Requests etc used in communication. 67 */ 68 struct _n_PetscSFPack { 69 SFPACKHEADER; 70 }; 71 72 PETSC_INTERN PetscErrorCode PetscSFPackGetInUse(PetscSF,MPI_Datatype,const void*,const void*,PetscCopyMode,PetscSFPack*); 73 PETSC_INTERN PetscErrorCode PetscSFPackReclaim(PetscSF,PetscSFPack*); 74 PETSC_INTERN PetscErrorCode PetscSFPackSetupType(PetscSFPack,MPI_Datatype); 75 PETSC_INTERN PetscErrorCode PetscSFPackGetUnpackAndOp(PetscSF,PetscSFPack,MPI_Op,PetscErrorCode (**UnpackAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,const void*)); 76 PETSC_INTERN PetscErrorCode PetscSFPackGetFetchAndOp(PetscSF,PetscSFPack,MPI_Op,PetscErrorCode (**FetchAndOp)(PetscInt,PetscInt,const PetscInt*,PetscInt,PetscSFPackOpt,void*,void*)); 77 PETSC_INTERN PetscErrorCode PetscSFPackSetupOptimization(PetscInt,const PetscInt*,const PetscInt*,PetscSFPackOpt*); 78 PETSC_INTERN PetscErrorCode PetscSFPackDestoryOptimization(PetscSFPackOpt *out); 79 PETSC_INTERN PetscErrorCode PetscSFPackSetErrorOnUnsupportedOverlap(PetscSF,MPI_Datatype,const void*,const void*); 80 81 #endif 82