xref: /libCEED/examples/fluids/qfunctions/newtonian.h (revision 871db79f2f3e2ed5b6659a8b21aea7cbbaad5cb5)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Operator for Navier-Stokes example using PETSc
10 
11 
12 #ifndef newtonian_h
13 #define newtonian_h
14 
15 #include <math.h>
16 #include <ceed.h>
17 
18 #ifndef M_PI
19 #define M_PI    3.14159265358979323846
20 #endif
21 
22 #ifndef setup_context_struct
23 #define setup_context_struct
24 typedef struct SetupContext_ *SetupContext;
25 struct SetupContext_ {
26   CeedScalar theta0;
27   CeedScalar thetaC;
28   CeedScalar P0;
29   CeedScalar N;
30   CeedScalar cv;
31   CeedScalar cp;
32   CeedScalar g[3];
33   CeedScalar rc;
34   CeedScalar lx;
35   CeedScalar ly;
36   CeedScalar lz;
37   CeedScalar center[3];
38   CeedScalar dc_axis[3];
39   CeedScalar wind[3];
40   CeedScalar time;
41   int wind_type;              // See WindType: 0=ROTATION, 1=TRANSLATION
42   int bubble_type;            // See BubbleType: 0=SPHERE, 1=CYLINDER
43   int bubble_continuity_type; // See BubbleContinuityType: 0=SMOOTH, 1=BACK_SHARP 2=THICK
44 };
45 #endif
46 
47 #ifndef newtonian_context_struct
48 #define newtonian_context_struct
49 typedef enum {
50   STAB_NONE = 0,
51   STAB_SU   = 1, // Streamline Upwind
52   STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
53 } StabilizationType;
54 
55 typedef struct NewtonianIdealGasContext_ *NewtonianIdealGasContext;
56 struct NewtonianIdealGasContext_ {
57   CeedScalar lambda;
58   CeedScalar mu;
59   CeedScalar k;
60   CeedScalar cv;
61   CeedScalar cp;
62   CeedScalar g[3];
63   CeedScalar c_tau;
64   CeedScalar Ctau_t;
65   CeedScalar Ctau_v;
66   CeedScalar Ctau_C;
67   CeedScalar Ctau_M;
68   CeedScalar Ctau_E;
69   CeedScalar dt;
70   StabilizationType stabilization;
71 };
72 #endif
73 
74 // *****************************************************************************
75 // Helper function for computing flux Jacobian
76 // *****************************************************************************
77 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
78     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
79     const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
80   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
81   CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
82   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
83     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
84       dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
85                       u[i]*u[j];
86       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
87         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
88         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
89                           ((i==k) ? u[j] : 0.) -
90                           ((i==j) ? u[k] : 0.) * (gamma-1.);
91         dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
92                           (gamma-1.)*u[i]*u[k];
93       }
94       dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
95     }
96     dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
97     dF[i][4][4] = u[i] * gamma;
98   }
99 }
100 
101 // *****************************************************************************
102 // Helper function for computing flux Jacobian of Primitive variables
103 // *****************************************************************************
104 CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
105     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
106     const CeedScalar Rd, const CeedScalar cv) {
107   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
108   // TODO Add in gravity's contribution
109 
110   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
111   CeedScalar drdT = -rho / T;
112   CeedScalar drdP = 1. / ( Rd * T);
113   CeedScalar etot =  E / rho ;
114   CeedScalar e2p  = drdP * etot + 1. ;
115   CeedScalar e3p  = ( E  + rho * Rd * T );
116   CeedScalar e4p  = drdT * etot + rho * cv ;
117 
118   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
119     for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
120 //        [row][col] of A_i
121       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
122       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
123         dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
124         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
125                            ((i==k) ? u[j] : 0.) ) * rho;
126         dF[i][4][k+1]   = rho * u[i] * u[k]
127                           + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
128       }
129       dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
130     }
131     dF[i][4][0] = u[i] * e2p; // F^e wrt p
132     dF[i][4][4] = u[i] * e4p; // F^e wrt T
133     dF[i][0][0] = u[i] * drdP; // F^c wrt p
134     dF[i][0][4] = u[i] * drdT; // F^c wrt T
135   }
136 }
137 
138 CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
139     const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
140     const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
141   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
142   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
143   CeedScalar drdT = -rho / T;
144   CeedScalar drdP = 1. / ( Rd * T);
145   dU[0] = drdP * dY[0] + drdT * dY[4];
146   CeedScalar de_kinetic = 0;
147   for (int i=0; i<3; i++) {
148     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
149     de_kinetic += u[i] * dY[1+i];
150   }
151   dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
152           + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
153 }
154 
155 // *****************************************************************************
156 // Helper function for computing Tau elements (stabilization constant)
157 //   Model from:
158 //     PHASTA
159 //
160 //   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
161 //
162 // Where NOT UPDATED YET
163 // *****************************************************************************
164 CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
165                                         const CeedScalar dXdx[3][3], const CeedScalar u[3],
166                                         const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
167                                         const CeedScalar mu, const CeedScalar dt,
168                                         const CeedScalar rho) {
169   // Context
170   const CeedScalar Ctau_t = newt_ctx->Ctau_t;
171   const CeedScalar Ctau_v = newt_ctx->Ctau_v;
172   const CeedScalar Ctau_C = newt_ctx->Ctau_C;
173   const CeedScalar Ctau_M = newt_ctx->Ctau_M;
174   const CeedScalar Ctau_E = newt_ctx->Ctau_E;
175   CeedScalar gijd[6];
176   CeedScalar tau;
177   CeedScalar dts;
178   CeedScalar fact;
179 
180   //*INDENT-OFF*
181   gijd[0] =   dXdx[0][0] * dXdx[0][0]
182             + dXdx[1][0] * dXdx[1][0]
183             + dXdx[2][0] * dXdx[2][0];
184 
185   gijd[1] =   dXdx[0][0] * dXdx[0][1]
186             + dXdx[1][0] * dXdx[1][1]
187             + dXdx[2][0] * dXdx[2][1];
188 
189   gijd[2] =   dXdx[0][1] * dXdx[0][1]
190             + dXdx[1][1] * dXdx[1][1]
191             + dXdx[2][1] * dXdx[2][1];
192 
193   gijd[3] =   dXdx[0][0] * dXdx[0][2]
194             + dXdx[1][0] * dXdx[1][2]
195             + dXdx[2][0] * dXdx[2][2];
196 
197   gijd[4] =   dXdx[0][1] * dXdx[0][2]
198             + dXdx[1][1] * dXdx[1][2]
199             + dXdx[2][1] * dXdx[2][2];
200 
201   gijd[5] =   dXdx[0][2] * dXdx[0][2]
202             + dXdx[1][2] * dXdx[1][2]
203             + dXdx[2][2] * dXdx[2][2];
204   //*INDENT-ON*
205 
206   dts = Ctau_t / dt ;
207 
208   tau = rho*rho*((4. * dts * dts)
209                  + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
210                  + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
211                  + u[2] *   u[2] * gijd[5])
212         + Ctau_v* mu * mu *
213         (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
214          + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
215 
216   fact=sqrt(tau);
217 
218   Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
219 
220   Tau_d[1] = Ctau_M / fact;
221   Tau_d[2] = Ctau_E / ( fact * cv );
222 
223 // consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
224 //  to avoid a division if the compiler is smart enough to see that cv IS
225 // a constant that it could invert once for all elements
226 // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
227 // OR we could absorb cv into Ctau_E but this puts more burden on user to
228 // know how to change constants with a change of fluid or units.  Same for
229 // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
230 }
231 
232 // *****************************************************************************
233 // Helper function for computing Tau elements (stabilization constant)
234 //   Model from:
235 //     Stabilized Methods for Compressible Flows, Hughes et al 2010
236 //
237 //   Spatial criterion #2 - Tau is a 3x3 diagonal matrix
238 //   Tau[i] = c_tau h[i] Xi(Pe) / rho(A[i]) (no sum)
239 //
240 // Where
241 //   c_tau     = stabilization constant (0.5 is reported as "optimal")
242 //   h[i]      = 2 length(dxdX[i])
243 //   Pe        = Peclet number ( Pe = sqrt(u u) / dot(dXdx,u) diffusivity )
244 //   Xi(Pe)    = coth Pe - 1. / Pe (1. at large local Peclet number )
245 //   rho(A[i]) = spectral radius of the convective flux Jacobian i,
246 //               wave speed in direction i
247 // *****************************************************************************
248 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
249                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
250                                        /* const CeedScalar sound_speed, const CeedScalar c_tau) { */
251                                        const CeedScalar sound_speed, const CeedScalar c_tau,
252                                        const CeedScalar viscosity) {
253   const CeedScalar mag_u_visc = sqrt(u[0]*u[0] +u[1]*u[1] +u[2]*u[2]) /
254                                 (2*viscosity);
255   for (int i=0; i<3; i++) {
256     // length of element in direction i
257     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
258                             dXdx[2][i]*dXdx[2][i]);
259     CeedScalar Pe = mag_u_visc*h;
260     CeedScalar Xi = 1/tanh(Pe) - 1/Pe;
261     // fastest wave in direction i
262     CeedScalar fastest_wave = fabs(u[i]) + sound_speed;
263     Tau_x[i] = c_tau * h * Xi / fastest_wave;
264   }
265 }
266 
267 // *****************************************************************************
268 // This QFunction sets a "still" initial condition for generic Newtonian IG problems
269 // *****************************************************************************
270 CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
271                                const CeedScalar *const *in, CeedScalar *const *out) {
272   // Inputs
273   const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
274 
275   // Outputs
276   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
277 
278   // Context
279   const SetupContext context = (SetupContext)ctx;
280   const CeedScalar theta0    = context->theta0;
281   const CeedScalar P0        = context->P0;
282   const CeedScalar cv        = context->cv;
283   const CeedScalar cp        = context->cp;
284   const CeedScalar *g        = context->g;
285   const CeedScalar Rd        = cp - cv;
286 
287   // Quadrature Point Loop
288   CeedPragmaSIMD
289   for (CeedInt i=0; i<Q; i++) {
290     CeedScalar q[5] = {0.};
291 
292     // Setup
293     // -- Coordinates
294     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
295     const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
296 
297     // -- Density
298     const CeedScalar rho = P0 / (Rd*theta0);
299 
300     // Initial Conditions
301     q[0] = rho;
302     q[1] = 0.0;
303     q[2] = 0.0;
304     q[3] = 0.0;
305     q[4] = rho * (cv*theta0 + e_potential);
306 
307     for (CeedInt j=0; j<5; j++)
308       q0[j][i] = q[j];
309   } // End of Quadrature Point Loop
310   return 0;
311 }
312 
313 // *****************************************************************************
314 // This QFunction implements the following formulation of Navier-Stokes with
315 //   explicit time stepping method
316 //
317 // This is 3D compressible Navier-Stokes in conservation form with state
318 //   variables of density, momentum density, and total energy density.
319 //
320 // State Variables: q = ( rho, U1, U2, U3, E )
321 //   rho - Mass Density
322 //   Ui  - Momentum Density,      Ui = rho ui
323 //   E   - Total Energy Density,  E  = rho (cv T + (u u)/2 + g z)
324 //
325 // Navier-Stokes Equations:
326 //   drho/dt + div( U )                               = 0
327 //   dU/dt   + div( rho (u x u) + P I3 ) + rho g khat = div( Fu )
328 //   dE/dt   + div( (E + P) u )                       = div( Fe )
329 //
330 // Viscous Stress:
331 //   Fu = mu (grad( u ) + grad( u )^T + lambda div ( u ) I3)
332 //
333 // Thermal Stress:
334 //   Fe = u Fu + k grad( T )
335 // Equation of State
336 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
337 //
338 // Stabilization:
339 //   Tau = diag(TauC, TauM, TauM, TauM, TauE)
340 //     f1 = rho  sqrt(ui uj gij)
341 //     gij = dXi/dX * dXi/dX
342 //     TauC = Cc f1 / (8 gii)
343 //     TauM = min( 1 , 1 / f1 )
344 //     TauE = TauM / (Ce cv)
345 //
346 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
347 //
348 // Constants:
349 //   lambda = - 2 / 3,  From Stokes hypothesis
350 //   mu              ,  Dynamic viscosity
351 //   k               ,  Thermal conductivity
352 //   cv              ,  Specific heat, constant volume
353 //   cp              ,  Specific heat, constant pressure
354 //   g               ,  Gravity
355 //   gamma  = cp / cv,  Specific heat ratio
356 //
357 // We require the product of the inverse of the Jacobian (dXdx_j,k) and
358 // its transpose (dXdx_k,j) to properly compute integrals of the form:
359 // int( gradv gradu )
360 //
361 // *****************************************************************************
362 CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
363                           const CeedScalar *const *in, CeedScalar *const *out) {
364   // *INDENT-OFF*
365   // Inputs
366   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
367                    (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
368                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
369                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3];
370   // Outputs
371   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
372              (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
373   // *INDENT-ON*
374 
375   // Context
376   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
377   const CeedScalar lambda = context->lambda;
378   const CeedScalar mu     = context->mu;
379   const CeedScalar k      = context->k;
380   const CeedScalar cv     = context->cv;
381   const CeedScalar cp     = context->cp;
382   const CeedScalar *g     = context->g;
383   const CeedScalar dt     = context->dt;
384   const CeedScalar gamma  = cp / cv;
385   const CeedScalar Rd     = cp - cv;
386 
387   CeedPragmaSIMD
388   // Quadrature Point Loop
389   for (CeedInt i=0; i<Q; i++) {
390     // *INDENT-OFF*
391     // Setup
392     // -- Interp in
393     const CeedScalar rho        =   q[0][i];
394     const CeedScalar u[3]       =  {q[1][i] / rho,
395                                     q[2][i] / rho,
396                                     q[3][i] / rho
397                                    };
398     const CeedScalar E          =   q[4][i];
399     // -- Grad in
400     const CeedScalar drho[3]    =  {dq[0][0][i],
401                                     dq[1][0][i],
402                                     dq[2][0][i]
403                                    };
404     const CeedScalar dU[3][3]   = {{dq[0][1][i],
405                                     dq[1][1][i],
406                                     dq[2][1][i]},
407                                    {dq[0][2][i],
408                                     dq[1][2][i],
409                                     dq[2][2][i]},
410                                    {dq[0][3][i],
411                                     dq[1][3][i],
412                                     dq[2][3][i]}
413                                   };
414     const CeedScalar dE[3]      =  {dq[0][4][i],
415                                     dq[1][4][i],
416                                     dq[2][4][i]
417                                    };
418     // -- Interp-to-Interp q_data
419     const CeedScalar wdetJ      =   q_data[0][i];
420     // -- Interp-to-Grad q_data
421     // ---- Inverse of change of coordinate matrix: X_i,j
422     // *INDENT-OFF*
423     const CeedScalar dXdx[3][3] = {{q_data[1][i],
424                                     q_data[2][i],
425                                     q_data[3][i]},
426                                    {q_data[4][i],
427                                     q_data[5][i],
428                                     q_data[6][i]},
429                                    {q_data[7][i],
430                                     q_data[8][i],
431                                     q_data[9][i]}
432                                   };
433     const CeedScalar x_i[3]       = {x[0][i], x[1][i], x[2][i]};
434     // *INDENT-ON*
435     // -- Grad-to-Grad q_data
436     // dU/dx
437     CeedScalar du[3][3] = {{0}};
438     CeedScalar drhodx[3] = {0};
439     CeedScalar dEdx[3] = {0};
440     CeedScalar dUdx[3][3] = {{0}};
441     CeedScalar dXdxdXdxT[3][3] = {{0}};
442     for (int j=0; j<3; j++) {
443       for (int k=0; k<3; k++) {
444         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
445         drhodx[j] += drho[k] * dXdx[k][j];
446         dEdx[j] += dE[k] * dXdx[k][j];
447         for (int l=0; l<3; l++) {
448           dUdx[j][k] += dU[j][l] * dXdx[l][k];
449           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
450         }
451       }
452     }
453     CeedScalar dudx[3][3] = {{0}};
454     for (int j=0; j<3; j++)
455       for (int k=0; k<3; k++)
456         for (int l=0; l<3; l++)
457           dudx[j][k] += du[j][l] * dXdx[l][k];
458     // -- grad_T
459     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
460                                     (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
461                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
462                                     (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
463                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
464                                     (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
465                                   };
466 
467     // -- Fuvisc
468     // ---- Symmetric 3x3 matrix
469     const CeedScalar Fu[6]     =  {mu*(dudx[0][0] * (2 + lambda) + /* *NOPAD* */
470                                        lambda * (dudx[1][1] + dudx[2][2])),
471                                    mu*(dudx[0][1] + dudx[1][0]), /* *NOPAD* */
472                                    mu*(dudx[0][2] + dudx[2][0]), /* *NOPAD* */
473                                    mu*(dudx[1][1] * (2 + lambda) + /* *NOPAD* */
474                                        lambda * (dudx[0][0] + dudx[2][2])),
475                                    mu*(dudx[1][2] + dudx[2][1]), /* *NOPAD* */
476                                    mu*(dudx[2][2] * (2 + lambda) + /* *NOPAD* */
477                                        lambda * (dudx[0][0] + dudx[1][1]))
478                                   };
479     // -- Fevisc
480     const CeedScalar Fe[3]     =  {u[0]*Fu[0] + u[1]*Fu[1] + u[2]*Fu[2] + /* *NOPAD* */
481                                    k*grad_T[0], /* *NOPAD* */
482                                    u[0]*Fu[1] + u[1]*Fu[3] + u[2]*Fu[4] + /* *NOPAD* */
483                                    k*grad_T[1], /* *NOPAD* */
484                                    u[0]*Fu[2] + u[1]*Fu[4] + u[2]*Fu[5] + /* *NOPAD* */
485                                    k*grad_T[2] /* *NOPAD* */
486                                   };
487     // Pressure
488     const CeedScalar
489     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
490     E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
491     E_internal  = E - E_kinetic - E_potential,
492     P           = E_internal * (gamma - 1.); // P = pressure
493 
494     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
495     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
496     computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
497 
498     // dqdx collects drhodx, dUdx and dEdx in one vector
499     CeedScalar dqdx[5][3];
500     for (int j=0; j<3; j++) {
501       dqdx[0][j] = drhodx[j];
502       dqdx[4][j] = dEdx[j];
503       for (int k=0; k<3; k++)
504         dqdx[k+1][j] = dUdx[k][j];
505     }
506 
507     // strong_conv = dF/dq * dq/dx    (Strong convection)
508     CeedScalar strong_conv[5] = {0};
509     for (int j=0; j<3; j++)
510       for (int k=0; k<5; k++)
511         for (int l=0; l<5; l++)
512           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
513 
514     // Body force
515     const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
516 
517     // The Physics
518     // Zero dv so all future terms can safely sum into it
519     for (int j=0; j<5; j++)
520       for (int k=0; k<3; k++)
521         dv[k][j][i] = 0;
522 
523     // -- Density
524     // ---- u rho
525     for (int j=0; j<3; j++)
526       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
527                              rho*u[2]*dXdx[j][2]);
528     // -- Momentum
529     // ---- rho (u x u) + P I3
530     for (int j=0; j<3; j++)
531       for (int k=0; k<3; k++)
532         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
533                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
534                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
535     // ---- Fuvisc
536     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
537     for (int j=0; j<3; j++)
538       for (int k=0; k<3; k++)
539         dv[k][j+1][i] -= wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
540                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
541                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
542     // -- Total Energy Density
543     // ---- (E + P) u
544     for (int j=0; j<3; j++)
545       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
546                                          u[2]*dXdx[j][2]);
547     // ---- Fevisc
548     for (int j=0; j<3; j++)
549       dv[j][4][i] -= wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
550                               Fe[2]*dXdx[j][2]);
551     // Body Force
552     for (int j=0; j<5; j++)
553       v[j][i] = wdetJ * body_force[j];
554 
555     // Spatial Stabilization
556     // -- Not used in favor of diagonal tau. Kept for future testing
557     // const CeedScalar sound_speed = sqrt(gamma * P / rho);
558     // CeedScalar Tau_x[3] = {0.};
559     // Tau_spatial(Tau_x, dXdx, u, sound_speed, context->c_tau, mu);
560 
561     // -- Stabilization method: none, SU, or SUPG
562     CeedScalar stab[5][3] = {{0.}};
563     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
564     CeedScalar Tau_d[3] = {0.};
565     switch (context->stabilization) {
566     case STAB_NONE:        // Galerkin
567       break;
568     case STAB_SU:        // SU
569       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
570       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
571       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
572       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
573       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
574       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
575       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
576                                   tau_strong_conv_conservative);
577       for (int j=0; j<3; j++)
578         for (int k=0; k<5; k++)
579           for (int l=0; l<5; l++)
580             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
581 
582       for (int j=0; j<5; j++)
583         for (int k=0; k<3; k++)
584           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
585                                 stab[j][1] * dXdx[k][1] +
586                                 stab[j][2] * dXdx[k][2]);
587       break;
588     case STAB_SUPG:        // SUPG is not implemented for explicit scheme
589       break;
590     }
591 
592   } // End Quadrature Point Loop
593 
594   // Return
595   return 0;
596 }
597 
598 // *****************************************************************************
599 // This QFunction implements the Navier-Stokes equations (mentioned above) with
600 //   implicit time stepping method
601 //
602 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
603 //  SUPG = Galerkin + grad(v) . ( Ai^T * Tau * (q_dot + Aj q,j - body force) )
604 //                                       (diffussive terms will be added later)
605 //
606 // *****************************************************************************
607 CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
608                                     const CeedScalar *const *in,
609                                     CeedScalar *const *out) {
610   // *INDENT-OFF*
611   // Inputs
612   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
613                    (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
614                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
615                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
616                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
617   // Outputs
618   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
619              (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
620   // *INDENT-ON*
621   // Context
622   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
623   const CeedScalar lambda = context->lambda;
624   const CeedScalar mu     = context->mu;
625   const CeedScalar k      = context->k;
626   const CeedScalar cv     = context->cv;
627   const CeedScalar cp     = context->cp;
628   const CeedScalar *g     = context->g;
629   const CeedScalar dt     = context->dt;
630   const CeedScalar gamma  = cp / cv;
631   const CeedScalar Rd     = cp-cv;
632 
633   CeedPragmaSIMD
634   // Quadrature Point Loop
635   for (CeedInt i=0; i<Q; i++) {
636     // Setup
637     // -- Interp in
638     const CeedScalar rho        =   q[0][i];
639     const CeedScalar u[3]       =  {q[1][i] / rho,
640                                     q[2][i] / rho,
641                                     q[3][i] / rho
642                                    };
643     const CeedScalar E          =   q[4][i];
644     // -- Grad in
645     const CeedScalar drho[3]    =  {dq[0][0][i],
646                                     dq[1][0][i],
647                                     dq[2][0][i]
648                                    };
649     // *INDENT-OFF*
650     const CeedScalar dU[3][3]   = {{dq[0][1][i],
651                                     dq[1][1][i],
652                                     dq[2][1][i]},
653                                    {dq[0][2][i],
654                                     dq[1][2][i],
655                                     dq[2][2][i]},
656                                    {dq[0][3][i],
657                                     dq[1][3][i],
658                                     dq[2][3][i]}
659                                   };
660     // *INDENT-ON*
661     const CeedScalar dE[3]      =  {dq[0][4][i],
662                                     dq[1][4][i],
663                                     dq[2][4][i]
664                                    };
665     // -- Interp-to-Interp q_data
666     const CeedScalar wdetJ      =   q_data[0][i];
667     // -- Interp-to-Grad q_data
668     // ---- Inverse of change of coordinate matrix: X_i,j
669     // *INDENT-OFF*
670     const CeedScalar dXdx[3][3] = {{q_data[1][i],
671                                     q_data[2][i],
672                                     q_data[3][i]},
673                                    {q_data[4][i],
674                                     q_data[5][i],
675                                     q_data[6][i]},
676                                    {q_data[7][i],
677                                     q_data[8][i],
678                                     q_data[9][i]}
679                                   };
680     const CeedScalar x_i[3]     = {x[0][i], x[1][i], x[2][i]};
681     // *INDENT-ON*
682     // -- Grad-to-Grad q_data
683     // dU/dx
684     CeedScalar du[3][3] = {{0}};
685     CeedScalar drhodx[3] = {0};
686     CeedScalar dEdx[3] = {0};
687     CeedScalar dUdx[3][3] = {{0}};
688     CeedScalar dXdxdXdxT[3][3] = {{0}};
689     for (int j=0; j<3; j++) {
690       for (int k=0; k<3; k++) {
691         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
692         drhodx[j] += drho[k] * dXdx[k][j];
693         dEdx[j] += dE[k] * dXdx[k][j];
694         for (int l=0; l<3; l++) {
695           dUdx[j][k] += dU[j][l] * dXdx[l][k];
696           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
697         }
698       }
699     }
700     CeedScalar dudx[3][3] = {{0}};
701     for (int j=0; j<3; j++)
702       for (int k=0; k<3; k++)
703         for (int l=0; l<3; l++)
704           dudx[j][k] += du[j][l] * dXdx[l][k];
705     // -- grad_T
706     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
707                                     (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
708                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
709                                     (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
710                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
711                                     (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
712                                   };
713     // -- Fuvisc
714     // ---- Symmetric 3x3 matrix
715     const CeedScalar Fu[6]     =  {mu*(dudx[0][0] * (2 + lambda) + /* *NOPAD* */
716                                        lambda * (dudx[1][1] + dudx[2][2])),
717                                    mu*(dudx[0][1] + dudx[1][0]), /* *NOPAD* */
718                                    mu*(dudx[0][2] + dudx[2][0]), /* *NOPAD* */
719                                    mu*(dudx[1][1] * (2 + lambda) + /* *NOPAD* */
720                                        lambda * (dudx[0][0] + dudx[2][2])),
721                                    mu*(dudx[1][2] + dudx[2][1]), /* *NOPAD* */
722                                    mu*(dudx[2][2] * (2 + lambda) + /* *NOPAD* */
723                                        lambda * (dudx[0][0] + dudx[1][1]))
724                                   };
725     // -- Fevisc
726     const CeedScalar Fe[3]     =  {u[0]*Fu[0] + u[1]*Fu[1] + u[2]*Fu[2] + /* *NOPAD* */
727                                    k*grad_T[0], /* *NOPAD* */
728                                    u[0]*Fu[1] + u[1]*Fu[3] + u[2]*Fu[4] + /* *NOPAD* */
729                                    k*grad_T[1], /* *NOPAD* */
730                                    u[0]*Fu[2] + u[1]*Fu[4] + u[2]*Fu[5] + /* *NOPAD* */
731                                    k*grad_T[2] /* *NOPAD* */
732                                   };
733     // Pressure
734     const CeedScalar
735     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
736     E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
737     E_internal  = E - E_kinetic - E_potential,
738     P           = E_internal * (gamma - 1.); // P = pressure
739 
740     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
741     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
742     computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
743 
744     // dqdx collects drhodx, dUdx and dEdx in one vector
745     CeedScalar dqdx[5][3];
746     for (int j=0; j<3; j++) {
747       dqdx[0][j] = drhodx[j];
748       dqdx[4][j] = dEdx[j];
749       for (int k=0; k<3; k++)
750         dqdx[k+1][j] = dUdx[k][j];
751     }
752     // strong_conv = dF/dq * dq/dx    (Strong convection)
753     CeedScalar strong_conv[5] = {0};
754     for (int j=0; j<3; j++)
755       for (int k=0; k<5; k++)
756         for (int l=0; l<5; l++)
757           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
758 
759     // Body force
760     const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
761 
762     // Strong residual
763     CeedScalar strong_res[5];
764     for (int j=0; j<5; j++)
765       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
766 
767     // The Physics
768     //-----mass matrix
769     for (int j=0; j<5; j++)
770       v[j][i] = wdetJ*q_dot[j][i];
771 
772     // Zero dv so all future terms can safely sum into it
773     for (int j=0; j<5; j++)
774       for (int k=0; k<3; k++)
775         dv[k][j][i] = 0;
776 
777     // -- Density
778     // ---- u rho
779     for (int j=0; j<3; j++)
780       dv[j][0][i]  -= wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
781                              rho*u[2]*dXdx[j][2]);
782     // -- Momentum
783     // ---- rho (u x u) + P I3
784     for (int j=0; j<3; j++)
785       for (int k=0; k<3; k++)
786         dv[k][j+1][i]  -= wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
787                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
788                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
789     // ---- Fuvisc
790     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
791     for (int j=0; j<3; j++)
792       for (int k=0; k<3; k++)
793         dv[k][j+1][i] += wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
794                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
795                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
796     // -- Total Energy Density
797     // ---- (E + P) u
798     for (int j=0; j<3; j++)
799       dv[j][4][i]  -= wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
800                                          u[2]*dXdx[j][2]);
801     // ---- Fevisc
802     for (int j=0; j<3; j++)
803       dv[j][4][i] += wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
804                               Fe[2]*dXdx[j][2]);
805     // Body Force
806     for (int j=0; j<5; j++)
807       v[j][i] -= wdetJ*body_force[j];
808 
809     // Spatial Stabilization
810     // -- Not used in favor of diagonal tau. Kept for future testing
811     // const CeedScalar sound_speed = sqrt(gamma * P / rho);
812     // CeedScalar Tau_x[3] = {0.};
813     // Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau, mu);
814 
815     // -- Stabilization method: none, SU, or SUPG
816     CeedScalar stab[5][3] = {{0.}};
817     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
818     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
819     CeedScalar Tau_d[3] = {0.};
820     switch (context->stabilization) {
821     case STAB_NONE:        // Galerkin
822       break;
823     case STAB_SU:        // SU
824       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
825       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
826       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
827       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
828       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
829       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
830       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
831                                   tau_strong_conv_conservative);
832       for (int j=0; j<3; j++)
833         for (int k=0; k<5; k++)
834           for (int l=0; l<5; l++)
835             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
836 
837       for (int j=0; j<5; j++)
838         for (int k=0; k<3; k++)
839           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
840                                 stab[j][1] * dXdx[k][1] +
841                                 stab[j][2] * dXdx[k][2]);
842       break;
843     case STAB_SUPG:        // SUPG
844       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
845       tau_strong_res[0] = Tau_d[0] * strong_res[0];
846       tau_strong_res[1] = Tau_d[1] * strong_res[1];
847       tau_strong_res[2] = Tau_d[1] * strong_res[2];
848       tau_strong_res[3] = Tau_d[1] * strong_res[3];
849       tau_strong_res[4] = Tau_d[2] * strong_res[4];
850 // Alternate route (useful later with primitive variable code)
851 // this function was verified against PHASTA for as IC that was as close as possible
852 //    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
853 // it has also been verified to compute a correct through the following
854 //   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
855 // applied in the triple loop below
856 //  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
857       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_res,
858                                   tau_strong_res_conservative);
859       for (int j=0; j<3; j++)
860         for (int k=0; k<5; k++)
861           for (int l=0; l<5; l++)
862             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
863 
864       for (int j=0; j<5; j++)
865         for (int k=0; k<3; k++)
866           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
867                                 stab[j][1] * dXdx[k][1] +
868                                 stab[j][2] * dXdx[k][2]);
869       break;
870     }
871 
872   } // End Quadrature Point Loop
873 
874   // Return
875   return 0;
876 }
877 // *****************************************************************************
878 #endif // newtonian_h
879