xref: /petsc/include/petsclandau.h (revision ed5e4e85792117e4bf875eb7efb50a9b8564ac26)
1 #if !defined(PETSCLANDAU_H)
2 #define PETSCLANDAU_H
3 
4 #include <petscdmplex.h> /*I      "petscdmplex.h"    I*/
5 #include <petscts.h>
6 
7 PETSC_EXTERN PetscErrorCode LandauPrintNorms(Vec, PetscInt);
8 PETSC_EXTERN PetscErrorCode LandauCreateVelocitySpace(MPI_Comm,PetscInt,const char[],Vec*,Mat*,DM*);
9 PETSC_EXTERN PetscErrorCode LandauDestroyVelocitySpace(DM*);
10 PETSC_EXTERN PetscErrorCode LandauAddMaxwellians(DM, Vec, PetscReal, PetscReal[], PetscReal[], PetscInt, PetscInt, void *);
11 PETSC_EXTERN PetscErrorCode LandauCreateMassMatrix(DM dm, Mat *Amat);
12 PETSC_EXTERN PetscErrorCode LandauIFunction(TS, PetscReal,Vec,Vec,Vec,void *);
13 PETSC_EXTERN PetscErrorCode LandauIJacobian(TS, PetscReal,Vec,Vec,PetscReal,Mat,Mat,void *);
14 
15 /* the Fokker-Planck-Landau context */
16 #if !defined(LANDAU_DIM)
17 #define LANDAU_DIM 2
18 #endif
19 
20 #if !defined(LANDAU_MAX_SPECIES)
21 #if LANDAU_DIM==2
22 #define LANDAU_MAX_SPECIES 10
23 #define LANDAU_MAX_GRIDS 3
24 #else
25 #define LANDAU_MAX_SPECIES 3
26 #define LANDAU_MAX_GRIDS 2
27 #endif
28 #else
29 #define LANDAU_MAX_GRIDS 3
30 #endif
31 
32 #define LANDAU_MAX_BATCH_SZ 320
33 
34 #if !defined(LANDAU_MAX_Q)
35 #if defined(LANDAU_MAX_NQ)
36 #error"LANDAU_MAX_NQ but not LANDAU_MAX_Q. Use -DLANDAU_MAX_Q=4 for Q3 elements"
37 #endif
38 #if LANDAU_DIM==2
39 #define LANDAU_MAX_Q 5
40 #else
41 #define LANDAU_MAX_Q 3
42 #endif
43 #else
44 #undef LANDAU_MAX_NQ
45 #endif
46 
47 #if LANDAU_DIM==2
48 #define LANDAU_MAX_Q_FACE LANDAU_MAX_Q
49 #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q)
50 #else
51 #define LANDAU_MAX_Q_FACE (LANDAU_MAX_Q*LANDAU_MAX_Q)
52 #define LANDAU_MAX_NQ (LANDAU_MAX_Q*LANDAU_MAX_Q*LANDAU_MAX_Q)
53 #endif
54 
55 typedef enum {LANDAU_CUDA, LANDAU_KOKKOS, LANDAU_CPU} LandauDeviceType;
56 
57 // static data - will be "device" data
58 typedef struct {
59   void  *invJ;  // nip*dim*dim
60   void  *D;     // nq*nb*dim
61   void  *B;     // nq*nb
62   void  *alpha; // ns
63   void  *beta;  // ns
64   void  *invMass; // ns
65   void  *w; // nip
66   void  *x; // nip
67   void  *y; // nip
68   void  *z; // nip
69   void  *Eq_m; // ns - dynamic
70   void  *f; //  nip*Nf - dynamic (IP)
71   void  *dfdx; // nip*Nf - dynamic (IP)
72   void  *dfdy; // nip*Nf - dynamic (IP)
73   void  *dfdz; // nip*Nf - dynamic (IP)
74   int   dim_,ns_,nip_,nq_,nb_;
75   void  *NCells; // remove and ise elem_offset - TODO
76   void  *species_offset; // for each grid, but same for all batched vertices
77   void  *mat_offset; // for each grid, but same for all batched vertices
78   void  *elem_offset; // for each grid, but same for all batched vertices
79   void  *ip_offset; // for each grid, but same for all batched vertices
80   void  *ipf_offset; // for each grid, but same for all batched vertices
81   void  *ipfdf_data; // for each grid, but same for all batched vertices
82   void  *maps; // for each grid, but same for all batched vertices
83 } LandauStaticData;
84 
85 typedef enum {LANDAU_EX2_TSSOLVE, LANDAU_MATRIX_TOTAL, LANDAU_OPERATOR, LANDAU_JACOBIAN_COUNT, LANDAU_JACOBIAN, LANDAU_MASS, LANDAU_F_DF, LANDAU_KERNEL, KSP_FACTOR, KSP_SOLVE, LANDAU_NUM_TIMERS} LandauOMPTimers;
86 
87 typedef struct {
88   PetscBool      interpolate;                  /* Generate intermediate mesh elements */
89   PetscBool      gpu_assembly;
90   MPI_Comm       comm; /* global communicator to use for errors and diagnostics */
91   double         times[LANDAU_NUM_TIMERS];
92   PetscBool      initialized;
93   PetscBool      use_matrix_mass;
94   /* FE */
95   PetscFE        fe[LANDAU_MAX_SPECIES];
96   /* geometry  */
97   PetscReal      radius[LANDAU_MAX_GRIDS];
98   PetscReal      re_radius;           /* RE: radius of refinement along v_perp=0, z>0 */
99   PetscReal      vperp0_radius1;      /* RE: radius of refinement along v_perp=0 */
100   PetscReal      vperp0_radius2;      /* RE: radius of refinement along v_perp=0 after origin AMR refinement */
101   PetscBool      sphere; // not used
102   PetscBool      inflate; // not used
103   PetscReal      i_radius[LANDAU_MAX_GRIDS]; // not used
104   PetscReal      e_radius; // not used
105   PetscInt       num_sections; // not used
106   /* AMR */
107   PetscBool      use_p4est;
108   PetscInt       numRERefine;        /* RE: refinement along v_perp=0, z > 0 */
109   PetscInt       nZRefine1;          /* RE: origin refinement after v_perp=0 refinement */
110   PetscInt       nZRefine2;          /* RE: origin refinement after origin AMR refinement */
111   PetscInt       numAMRRefine[LANDAU_MAX_GRIDS];  /* normal AMR - refine from origin */
112   PetscInt       postAMRRefine[LANDAU_MAX_GRIDS]; /* uniform refinement of AMR */
113   /* relativistic */
114   PetscBool      use_energy_tensor_trick;
115   PetscBool      use_relativistic_corrections;
116   /* physics */
117   PetscReal      thermal_temps[LANDAU_MAX_SPECIES];
118   PetscReal      masses[LANDAU_MAX_SPECIES];  /* mass of each species  */
119   PetscReal      charges[LANDAU_MAX_SPECIES]; /* charge of each species  */
120   PetscReal      n[LANDAU_MAX_SPECIES];       /* number density of each species  */
121   PetscReal      m_0;      /* reference mass */
122   PetscReal      v_0;      /* reference velocity */
123   PetscReal      n_0;      /* reference number density */
124   PetscReal      t_0;      /* reference time */
125   PetscReal      Ez;
126   PetscReal      epsilon0;
127   PetscReal      k;
128   PetscReal      lnLam;
129   PetscReal      electronShift;
130   PetscInt       num_species;
131   PetscInt       num_grids;
132   PetscInt       species_offset[LANDAU_MAX_GRIDS+1]; // for each grid, but same for all batched vertices
133   PetscInt       mat_offset[LANDAU_MAX_GRIDS+1]; // for each grid, but same for all batched vertices
134   /* cache */
135   Mat              J;
136   Mat              M;
137   Vec              X;
138   /* derived type */
139   void             *data;
140   /* computing */
141   LandauDeviceType deviceType;
142   PetscInt         subThreadBlockSize;
143   PetscInt         numConcurrency; /* number of SMs in Cuda to use */
144   DM               pack;
145   DM               plex[LANDAU_MAX_GRIDS];
146   LandauStaticData SData_d; /* static geometric data on device */
147   /* diagnostics */
148   PetscInt         verbose;
149   PetscLogEvent    events[20];
150   PetscLogStage    stage;
151   PetscBool        aux_bool;  /* helper */
152   PetscInt         batch_sz;
153   PetscInt         batch_view_idx;
154 } LandauCtx;
155 
156 #define LANDAU_SPECIES_MAJOR
157 #if !defined(LANDAU_SPECIES_MAJOR)
158 #define LAND_PACK_IDX(_b,_g) (_b*ctx->num_grids + _g)
159 #define LAND_MOFFSET(_b,_g,_nbch,_ngrid,_mat_off) (_b*_mat_off[_ngrid] + _mat_off[_g])
160 #else
161 #define LAND_PACK_IDX(_b,_g) (_g*ctx->batch_sz + _b)
162 #define LAND_MOFFSET(_b,_g,_nbch,_ngrid,_mat_off) (_nbch*_mat_off[_g] + _b*(_mat_off[_g+1] - _mat_off[_g]))
163 #endif
164 
165 typedef int LandauIdx;
166 typedef struct {
167   PetscReal scale;
168   LandauIdx gid;   // Lanadu matrix index (<10,000)
169 } pointInterpolationP4est;
170 typedef struct _lP4estVertexMaps {
171   LandauIdx                (*gIdx)[LANDAU_MAX_SPECIES][LANDAU_MAX_NQ]; // #elems *  LANDAU_MAX_NQ (spoof for max , Nb) on device,
172   LandauIdx                num_elements;
173   LandauIdx                num_reduced;
174   LandauIdx                num_face;  // (Q or Q^2 for 3D)
175   LandauDeviceType         deviceType;
176   PetscInt                 Nf;
177   PetscInt                 Nq;
178   pointInterpolationP4est (*c_maps)[LANDAU_MAX_Q_FACE];
179   struct _lP4estVertexMaps*d_self;
180   void                    *vp1,*vp2,*vp3;
181   PetscInt                numgrids;
182 } P4estVertexMaps;
183 
184 PETSC_EXTERN PetscErrorCode LandauCreateColoring(Mat, DM, PetscContainer *);
185 #if defined(PETSC_HAVE_CUDA)
186 PETSC_EXTERN PetscErrorCode LandauCUDAJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[], const PetscInt, const PetscScalar[],
187                                                const LandauStaticData *, const PetscInt, const PetscReal, const PetscLogEvent[],const PetscInt[], const PetscInt[], Mat[], Mat);
188 PETSC_EXTERN PetscErrorCode LandauCUDACreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
189 PETSC_EXTERN PetscErrorCode LandauCUDADestroyMatMaps(P4estVertexMaps *, PetscInt);
190 PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[],
191                                                     PetscReal[], LandauStaticData *);
192 PETSC_EXTERN PetscErrorCode LandauCUDAStaticDataClear(LandauStaticData *);
193 #endif
194 #if defined(PETSC_HAVE_KOKKOS)
195 PETSC_EXTERN PetscErrorCode LandauKokkosJacobian(DM[], const PetscInt, const PetscInt, const PetscInt, const PetscInt[], PetscReal[], PetscScalar[],  const PetscInt, const PetscScalar[],
196                                                  const LandauStaticData *, const PetscInt, const PetscReal, const PetscLogEvent[], const PetscInt[], const PetscInt[], Mat[], Mat);
197 PETSC_EXTERN PetscErrorCode LandauKokkosCreateMatMaps(P4estVertexMaps *, pointInterpolationP4est (*)[LANDAU_MAX_Q_FACE], PetscInt[], PetscInt, PetscInt);
198 PETSC_EXTERN PetscErrorCode LandauKokkosDestroyMatMaps(P4estVertexMaps *, PetscInt);
199 PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataSet(DM, const PetscInt, const PetscInt, const PetscInt, PetscInt[], PetscInt[], PetscInt[], PetscReal [], PetscReal [], PetscReal[], PetscReal[], PetscReal[], PetscReal[], PetscReal[],
200                                                       PetscReal[], LandauStaticData *);
201 PETSC_EXTERN PetscErrorCode LandauKokkosStaticDataClear(LandauStaticData*);
202 #endif
203 
204 #endif /* PETSCLANDAU_H */
205