1 #if !defined(PETSCLANDAU_H) 2 #define PETSCLANDAU_H 3 4 #include <petscdmplex.h> /*I "petscdmplex.h" I*/ 5 #include <petscts.h> 6 7 PETSC_EXTERN PetscErrorCode DMPlexLandauPrintNorms(Vec, PetscInt); 8 PETSC_EXTERN PetscErrorCode DMPlexLandauCreateVelocitySpace(MPI_Comm,PetscInt,const char[],Vec*,Mat*,DM*); 9 PETSC_EXTERN PetscErrorCode DMPlexLandauDestroyVelocitySpace(DM*); 10 PETSC_EXTERN PetscErrorCode DMPlexLandauAddToFunction(DM,Vec,PetscErrorCode (*)(DM, Vec, PetscInt, PetscInt, PetscInt, void*), void*); 11 PETSC_EXTERN PetscErrorCode DMPlexLandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, PetscInt, void *); 12 PETSC_EXTERN PetscErrorCode DMPlexLandauCreateMassMatrix(DM dm, Mat *Amat); 13 PETSC_EXTERN PetscErrorCode DMPlexLandauIFunction(TS, PetscReal,Vec,Vec,Vec,void *); 14 PETSC_EXTERN PetscErrorCode DMPlexLandauIJacobian(TS, PetscReal,Vec,Vec,PetscReal,Mat,Mat,void *); 15 16 typedef int LandauIdx; 17 18 /* the Fokker-Planck-Landau context */ 19 #if !defined(LANDAU_MAX_SPECIES) 20 #if defined(PETSC_USE_DMLANDAU_2D) 21 #define LANDAU_MAX_SPECIES 10 22 #define LANDAU_MAX_GRIDS 3 23 #else 24 #define LANDAU_MAX_SPECIES 10 25 #define LANDAU_MAX_GRIDS 3 26 #endif 27 #else 28 #define LANDAU_MAX_GRIDS 3 29 #endif 30 31 #if !defined(LANDAU_MAX_Q) 32 #if defined(LANDAU_MAX_NQ) 33 #error"LANDAU_MAX_NQ but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements" 34 #endif 35 #if defined(PETSC_USE_DMLANDAU_2D) 36 #define LANDAU_MAX_Q 5 37 #else 38 // 3D CUDA fails with > 3 (KK-CUDA is OK) 39 #define LANDAU_MAX_Q 3 40 #endif 41 #else 42 #undef LANDAU_MAX_NQ 43 #endif 44 45 #if defined(PETSC_USE_DMLANDAU_2D) 46 #define LANDAU_MAX_Q_FACE LANDAU_MAX_Q 47 #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q) 48 #define LANDAU_MAX_BATCH_SZ 1024 49 #define LANDAU_DIM 2 50 #else 51 #define LANDAU_MAX_Q_FACE (LANDAU_MAX_Q*LANDAU_MAX_Q) 52 #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q*LANDAU_MAX_Q) 53 #define LANDAU_MAX_BATCH_SZ 64 54 #define LANDAU_DIM 3 55 #endif 56 57 typedef enum {LANDAU_CUDA, LANDAU_KOKKOS, LANDAU_CPU} LandauDeviceType; 58 59 // static data - will be "device" data 60 typedef struct { 61 void *invJ; // nip*dim*dim 62 void *D; // nq*nb*dim 63 void *B; // nq*nb 64 void *alpha; // ns 65 void *beta; // ns 66 void *invMass; // ns 67 void *w; // nip 68 void *x; // nip 69 void *y; // nip 70 void *z; // nip 71 void *Eq_m; // ns - dynamic 72 void *f; // nip*Nf - dynamic (IP) 73 void *dfdx; // nip*Nf - dynamic (IP) 74 void *dfdy; // nip*Nf - dynamic (IP) 75 void *dfdz; // nip*Nf - dynamic (IP) 76 int dim_,ns_,nip_,nq_,nb_; 77 void *NCells; // remove and ise elem_offset - TODO 78 void *species_offset; // for each grid, but same for all batched vertices 79 void *mat_offset; // for each grid, but same for all batched vertices 80 void *elem_offset; // for each grid, but same for all batched vertices 81 void *ip_offset; // for each grid, but same for all batched vertices 82 void *ipf_offset; // for each grid, but same for all batched vertices 83 void *ipfdf_data; // for each grid, but same for all batched vertices 84 void *maps; // for each grid, but same for all batched vertices 85 // COO 86 void *coo_elem_offsets; 87 void *coo_elem_point_offsets; 88 void *coo_elem_fullNb; 89 void *coo_vals; 90 LandauIdx coo_n_cellsTot; 91 LandauIdx coo_size; 92 LandauIdx coo_max_fullnb; 93 } LandauStaticData; 94 95 typedef enum {LANDAU_EX2_TSSOLVE, LANDAU_MATRIX_TOTAL, LANDAU_OPERATOR, LANDAU_JACOBIAN_COUNT, LANDAU_JACOBIAN, LANDAU_MASS, LANDAU_F_DF, LANDAU_KERNEL, KSP_FACTOR, KSP_SOLVE, LANDAU_NUM_TIMERS} LandauOMPTimers; 96 97 typedef struct { 98 PetscBool interpolate; /* Generate intermediate mesh elements */ 99 PetscBool gpu_assembly; 100 MPI_Comm comm; /* global communicator to use for errors and diagnostics */ 101 double times[LANDAU_NUM_TIMERS]; 102 PetscBool use_matrix_mass; 103 /* FE */ 104 PetscFE fe[LANDAU_MAX_SPECIES]; 105 /* geometry */ 106 PetscReal radius[LANDAU_MAX_GRIDS]; 107 PetscReal re_radius; /* RE: radius of refinement along v_perp=0, z>0 */ 108 PetscReal vperp0_radius1; /* RE: radius of refinement along v_perp=0 */ 109 PetscReal vperp0_radius2; /* RE: radius of refinement along v_perp=0 after origin AMR refinement */ 110 PetscBool sphere; // not used 111 PetscBool inflate; // not used 112 PetscReal i_radius[LANDAU_MAX_GRIDS]; // not used 113 PetscReal e_radius; // not used 114 PetscInt num_sections; // not used 115 /* AMR */ 116 PetscBool use_p4est; 117 PetscInt numRERefine; /* RE: refinement along v_perp=0, z > 0 */ 118 PetscInt nZRefine1; /* RE: origin refinement after v_perp=0 refinement */ 119 PetscInt nZRefine2; /* RE: origin refinement after origin AMR refinement */ 120 PetscInt numAMRRefine[LANDAU_MAX_GRIDS]; /* normal AMR - refine from origin */ 121 PetscInt postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */ 122 /* relativistic */ 123 PetscBool use_energy_tensor_trick; 124 PetscBool use_relativistic_corrections; 125 /* physics */ 126 PetscReal thermal_temps[LANDAU_MAX_SPECIES]; 127 PetscReal masses[LANDAU_MAX_SPECIES]; /* mass of each species */ 128 PetscReal charges[LANDAU_MAX_SPECIES]; /* charge of each species */ 129 PetscReal n[LANDAU_MAX_SPECIES]; /* number density of each species */ 130 PetscReal m_0; /* reference mass */ 131 PetscReal v_0; /* reference velocity */ 132 PetscReal n_0; /* reference number density */ 133 PetscReal t_0; /* reference time */ 134 PetscReal Ez; 135 PetscReal epsilon0; 136 PetscReal k; 137 PetscReal lnLam; 138 PetscReal electronShift; 139 PetscInt num_species; 140 PetscInt num_grids; 141 PetscInt species_offset[LANDAU_MAX_GRIDS+1]; // for each grid, but same for all batched vertices 142 PetscInt mat_offset[LANDAU_MAX_GRIDS+1]; // for each grid, but same for all batched vertices 143 // batching 144 PetscBool jacobian_field_major_order; // this could be a type but lets not get pedantic 145 VecScatter plex_batch; 146 Vec work_vec; 147 IS batch_is; 148 PetscErrorCode (*seqaij_mult)(Mat,Vec,Vec); 149 PetscErrorCode (*seqaij_multtranspose)(Mat,Vec,Vec); 150 PetscErrorCode (*seqaij_solve)(Mat,Vec,Vec); 151 PetscErrorCode (*seqaij_getdiagonal)(Mat,Vec); 152 /* COO */ 153 PetscBool coo_assembly; 154 /* cache */ 155 Mat J; 156 Mat M; 157 Vec X; 158 /* derived type */ 159 void *data; 160 /* computing */ 161 LandauDeviceType deviceType; 162 DM pack; 163 DM plex[LANDAU_MAX_GRIDS]; 164 LandauStaticData SData_d; /* static geometric data on device */ 165 /* diagnostics */ 166 PetscInt verbose; 167 PetscLogEvent events[20]; 168 PetscLogStage stage; 169 PetscObjectState norm_state; 170 PetscInt batch_sz; 171 PetscInt batch_view_idx; 172 } LandauCtx; 173 174 #define LANDAU_SPECIES_MAJOR 175 #if !defined(LANDAU_SPECIES_MAJOR) 176 #define LAND_PACK_IDX(_b,_g) (_b*ctx->num_grids + _g) 177 #define LAND_MOFFSET(_b,_g,_nbch,_ngrid,_mat_off) (_b*_mat_off[_ngrid] + _mat_off[_g]) 178 #else 179 #define LAND_PACK_IDX(_b,_g) (_g*ctx->batch_sz + _b) 180 #define LAND_MOFFSET(_b,_g,_nbch,_ngrid,_mat_off) (_nbch*_mat_off[_g] + _b*(_mat_off[_g+1] - _mat_off[_g])) 181 #endif 182 183 typedef struct { 184 PetscReal scale; 185 LandauIdx gid; // Landau matrix index (<10,000) 186 } pointInterpolationP4est; 187 typedef struct _lP4estVertexMaps { 188 LandauIdx (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQ]; // #elems * LANDAU_MAX_NQ (spoof for max , Nb) on device, 189 LandauIdx num_elements; 190 LandauIdx num_reduced; 191 LandauIdx num_face; // (Q or Q^2 for 3D) 192 LandauDeviceType deviceType; 193 PetscInt Nf; 194 pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE]; 195 struct _lP4estVertexMaps*d_self; 196 void *vp1,*vp2,*vp3; 197 PetscInt numgrids; 198 } P4estVertexMaps; 199 200 PETSC_EXTERN PetscErrorCode LandauCreateColoring(Mat, DM, PetscContainer *); 201 #if defined(PETSC_HAVE_CUDA) 202 PETSC_EXTERN PetscErrorCode LandauCUDAJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], 203 const LandauStaticData *, const PetscReal, const PetscLogEvent[],const PetscInt[], const PetscInt[], Mat[], Mat); 204 PETSC_EXTERN PetscErrorCode LandauCUDACreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt); 205 PETSC_EXTERN PetscErrorCode LandauCUDADestroyMatMaps(P4estVertexMaps *, PetscInt); 206 PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], 207 PetscReal[], LandauStaticData *); 208 PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataClear(LandauStaticData *); 209 #endif 210 #if defined(PETSC_HAVE_KOKKOS) 211 PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscScalar[], 212 const LandauStaticData *, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat); 213 PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt); 214 PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt); 215 PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[], 216 PetscReal[], LandauStaticData *); 217 PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData*); 218 #endif 219 220 #endif /* PETSCLANDAU_H */ 221