xref: /libCEED/examples/fluids/qfunctions/newtonian.h (revision c6e8c5707530fe7560ff4e372ff2ebcb18e91603)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Operator for Navier-Stokes example using PETSc
10 
11 
12 #ifndef newtonian_h
13 #define newtonian_h
14 
15 #include <math.h>
16 #include <ceed.h>
17 #include "newtonian_types.h"
18 #include "newtonian_state.h"
19 
20 #ifndef M_PI
21 #define M_PI    3.14159265358979323846
22 #endif
23 
24 // *****************************************************************************
25 // Helper function for computing flux Jacobian
26 // *****************************************************************************
27 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
28     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
29     const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
30   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
31   CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
32   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
33     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
34       dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
35                       u[i]*u[j];
36       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
37         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
38         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
39                           ((i==k) ? u[j] : 0.) -
40                           ((i==j) ? u[k] : 0.) * (gamma-1.);
41         dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
42                           (gamma-1.)*u[i]*u[k];
43       }
44       dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
45     }
46     dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
47     dF[i][4][4] = u[i] * gamma;
48   }
49 }
50 
51 // *****************************************************************************
52 // Helper function for computing flux Jacobian of Primitive variables
53 // *****************************************************************************
54 CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
55     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
56     const CeedScalar Rd, const CeedScalar cv) {
57   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
58   // TODO Add in gravity's contribution
59 
60   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
61   CeedScalar drdT = -rho / T;
62   CeedScalar drdP = 1. / ( Rd * T);
63   CeedScalar etot =  E / rho ;
64   CeedScalar e2p  = drdP * etot + 1. ;
65   CeedScalar e3p  = ( E  + rho * Rd * T );
66   CeedScalar e4p  = drdT * etot + rho * cv ;
67 
68   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
69     for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
70 //        [row][col] of A_i
71       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
72       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
73         dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
74         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
75                            ((i==k) ? u[j] : 0.) ) * rho;
76         dF[i][4][k+1]   = rho * u[i] * u[k]
77                           + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
78       }
79       dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
80     }
81     dF[i][4][0] = u[i] * e2p; // F^e wrt p
82     dF[i][4][4] = u[i] * e4p; // F^e wrt T
83     dF[i][0][0] = u[i] * drdP; // F^c wrt p
84     dF[i][0][4] = u[i] * drdT; // F^c wrt T
85   }
86 }
87 
88 CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
89     const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
90     const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
91   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
92   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
93   CeedScalar drdT = -rho / T;
94   CeedScalar drdP = 1. / ( Rd * T);
95   dU[0] = drdP * dY[0] + drdT * dY[4];
96   CeedScalar de_kinetic = 0;
97   for (CeedInt i=0; i<3; i++) {
98     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
99     de_kinetic += u[i] * dY[1+i];
100   }
101   dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
102           + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
103 }
104 
105 // *****************************************************************************
106 // Helper function for computing Tau elements (stabilization constant)
107 //   Model from:
108 //     PHASTA
109 //
110 //   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
111 //
112 // Where NOT UPDATED YET
113 // *****************************************************************************
114 CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
115                                         const CeedScalar dXdx[3][3], const CeedScalar u[3],
116                                         const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
117                                         const CeedScalar mu, const CeedScalar dt,
118                                         const CeedScalar rho) {
119   // Context
120   const CeedScalar Ctau_t = newt_ctx->Ctau_t;
121   const CeedScalar Ctau_v = newt_ctx->Ctau_v;
122   const CeedScalar Ctau_C = newt_ctx->Ctau_C;
123   const CeedScalar Ctau_M = newt_ctx->Ctau_M;
124   const CeedScalar Ctau_E = newt_ctx->Ctau_E;
125   CeedScalar gijd[6];
126   CeedScalar tau;
127   CeedScalar dts;
128   CeedScalar fact;
129 
130   //*INDENT-OFF*
131   gijd[0] =   dXdx[0][0] * dXdx[0][0]
132             + dXdx[1][0] * dXdx[1][0]
133             + dXdx[2][0] * dXdx[2][0];
134 
135   gijd[1] =   dXdx[0][0] * dXdx[0][1]
136             + dXdx[1][0] * dXdx[1][1]
137             + dXdx[2][0] * dXdx[2][1];
138 
139   gijd[2] =   dXdx[0][1] * dXdx[0][1]
140             + dXdx[1][1] * dXdx[1][1]
141             + dXdx[2][1] * dXdx[2][1];
142 
143   gijd[3] =   dXdx[0][0] * dXdx[0][2]
144             + dXdx[1][0] * dXdx[1][2]
145             + dXdx[2][0] * dXdx[2][2];
146 
147   gijd[4] =   dXdx[0][1] * dXdx[0][2]
148             + dXdx[1][1] * dXdx[1][2]
149             + dXdx[2][1] * dXdx[2][2];
150 
151   gijd[5] =   dXdx[0][2] * dXdx[0][2]
152             + dXdx[1][2] * dXdx[1][2]
153             + dXdx[2][2] * dXdx[2][2];
154   //*INDENT-ON*
155 
156   dts = Ctau_t / dt ;
157 
158   tau = rho*rho*((4. * dts * dts)
159                  + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
160                  + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
161                  + u[2] *   u[2] * gijd[5])
162         + Ctau_v* mu * mu *
163         (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
164          + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
165 
166   fact=sqrt(tau);
167 
168   Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
169 
170   Tau_d[1] = Ctau_M / fact;
171   Tau_d[2] = Ctau_E / ( fact * cv );
172 
173 // consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
174 //  to avoid a division if the compiler is smart enough to see that cv IS
175 // a constant that it could invert once for all elements
176 // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
177 // OR we could absorb cv into Ctau_E but this puts more burden on user to
178 // know how to change constants with a change of fluid or units.  Same for
179 // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
180 }
181 
182 // *****************************************************************************
183 // This QFunction sets a "still" initial condition for generic Newtonian IG problems
184 // *****************************************************************************
185 CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
186                                const CeedScalar *const *in, CeedScalar *const *out) {
187   // Inputs
188   const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
189 
190   // Outputs
191   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
192 
193   // Context
194   const SetupContext context = (SetupContext)ctx;
195   const CeedScalar theta0    = context->theta0;
196   const CeedScalar P0        = context->P0;
197   const CeedScalar cv        = context->cv;
198   const CeedScalar cp        = context->cp;
199   const CeedScalar *g        = context->g;
200   const CeedScalar Rd        = cp - cv;
201 
202   // Quadrature Point Loop
203   CeedPragmaSIMD
204   for (CeedInt i=0; i<Q; i++) {
205     CeedScalar q[5] = {0.};
206 
207     // Setup
208     // -- Coordinates
209     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
210     const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
211 
212     // -- Density
213     const CeedScalar rho = P0 / (Rd*theta0);
214 
215     // Initial Conditions
216     q[0] = rho;
217     q[1] = 0.0;
218     q[2] = 0.0;
219     q[3] = 0.0;
220     q[4] = rho * (cv*theta0 + e_potential);
221 
222     for (CeedInt j=0; j<5; j++)
223       q0[j][i] = q[j];
224   } // End of Quadrature Point Loop
225   return 0;
226 }
227 
228 // *****************************************************************************
229 // This QFunction implements the following formulation of Navier-Stokes with
230 //   explicit time stepping method
231 //
232 // This is 3D compressible Navier-Stokes in conservation form with state
233 //   variables of density, momentum density, and total energy density.
234 //
235 // State Variables: q = ( rho, U1, U2, U3, E )
236 //   rho - Mass Density
237 //   Ui  - Momentum Density,      Ui = rho ui
238 //   E   - Total Energy Density,  E  = rho (cv T + (u u)/2 + g z)
239 //
240 // Navier-Stokes Equations:
241 //   drho/dt + div( U )                               = 0
242 //   dU/dt   + div( rho (u x u) + P I3 ) + rho g khat = div( Fu )
243 //   dE/dt   + div( (E + P) u )                       = div( Fe )
244 //
245 // Viscous Stress:
246 //   Fu = mu (grad( u ) + grad( u )^T + lambda div ( u ) I3)
247 //
248 // Thermal Stress:
249 //   Fe = u Fu + k grad( T )
250 // Equation of State
251 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
252 //
253 // Stabilization:
254 //   Tau = diag(TauC, TauM, TauM, TauM, TauE)
255 //     f1 = rho  sqrt(ui uj gij)
256 //     gij = dXi/dX * dXi/dX
257 //     TauC = Cc f1 / (8 gii)
258 //     TauM = min( 1 , 1 / f1 )
259 //     TauE = TauM / (Ce cv)
260 //
261 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
262 //
263 // Constants:
264 //   lambda = - 2 / 3,  From Stokes hypothesis
265 //   mu              ,  Dynamic viscosity
266 //   k               ,  Thermal conductivity
267 //   cv              ,  Specific heat, constant volume
268 //   cp              ,  Specific heat, constant pressure
269 //   g               ,  Gravity
270 //   gamma  = cp / cv,  Specific heat ratio
271 //
272 // We require the product of the inverse of the Jacobian (dXdx_j,k) and
273 // its transpose (dXdx_k,j) to properly compute integrals of the form:
274 // int( gradv gradu )
275 //
276 // *****************************************************************************
277 CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q,
278                                       const CeedScalar *const *in, CeedScalar *const *out) {
279   // *INDENT-OFF*
280   // Inputs
281   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
282                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
283                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
284                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3];
285   // Outputs
286   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
287              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
288   // *INDENT-ON*
289 
290   // Context
291   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
292   const CeedScalar mu     = context->mu;
293   const CeedScalar cv     = context->cv;
294   const CeedScalar cp     = context->cp;
295   const CeedScalar *g     = context->g;
296   const CeedScalar dt     = context->dt;
297   const CeedScalar gamma  = cp / cv;
298   const CeedScalar Rd     = cp - cv;
299 
300   CeedPragmaSIMD
301   // Quadrature Point Loop
302   for (CeedInt i=0; i<Q; i++) {
303     CeedScalar U[5];
304     for (int j=0; j<5; j++) U[j] = q[j][i];
305     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
306     State s = StateFromU(context, U, x_i);
307 
308     // -- Interp-to-Interp q_data
309     const CeedScalar wdetJ      =   q_data[0][i];
310     // -- Interp-to-Grad q_data
311     // ---- Inverse of change of coordinate matrix: X_i,j
312     // *INDENT-OFF*
313     const CeedScalar dXdx[3][3] = {{q_data[1][i],
314                                     q_data[2][i],
315                                     q_data[3][i]},
316                                    {q_data[4][i],
317                                     q_data[5][i],
318                                     q_data[6][i]},
319                                    {q_data[7][i],
320                                     q_data[8][i],
321                                     q_data[9][i]}
322                                   };
323     // *INDENT-ON*
324 
325     State grad_s[3];
326     for (CeedInt j=0; j<3; j++) {
327       CeedScalar dx_i[3] = {0}, dU[5];
328       for (CeedInt k=0; k<5; k++)
329         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
330                 Grad_q[1][k][i] * dXdx[1][j] +
331                 Grad_q[2][k][i] * dXdx[2][j];
332       dx_i[j] = 1.;
333       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
334     }
335 
336     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
337     KMStrainRate(grad_s, strain_rate);
338     NewtonianStress(context, strain_rate, kmstress);
339     KMUnpack(kmstress, stress);
340     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
341 
342     StateConservative F_inviscid[3];
343     FluxInviscid(context, s, F_inviscid);
344 
345     // Total flux
346     CeedScalar Flux[5][3];
347     for (CeedInt j=0; j<3; j++) {
348       Flux[0][j] = F_inviscid[j].density;
349       for (CeedInt k=0; k<3; k++)
350         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
351       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
352     }
353 
354     for (CeedInt j=0; j<3; j++) {
355       for (CeedInt k=0; k<5; k++) {
356         Grad_v[j][k][i] = wdetJ * (dXdx[j][0] * Flux[k][0] +
357                                    dXdx[j][1] * Flux[k][1] +
358                                    dXdx[j][2] * Flux[k][2]);
359       }
360     }
361 
362     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
363     for (int j=0; j<5; j++)
364       v[j][i] = wdetJ * body_force[j];
365 
366     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
367     CeedScalar jacob_F_conv[3][5][5] = {0};
368     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
369                            gamma, g, x_i);
370     CeedScalar grad_U[5][3];
371     for (CeedInt j=0; j<3; j++) {
372       grad_U[0][j] = grad_s[j].U.density;
373       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
374       grad_U[4][j] = grad_s[j].U.E_total;
375     }
376 
377     // strong_conv = dF/dq * dq/dx    (Strong convection)
378     CeedScalar strong_conv[5] = {0};
379     for (CeedInt j=0; j<3; j++)
380       for (CeedInt k=0; k<5; k++)
381         for (CeedInt l=0; l<5; l++)
382           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
383 
384     // -- Stabilization method: none, SU, or SUPG
385     CeedScalar stab[5][3] = {{0.}};
386     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
387     CeedScalar Tau_d[3] = {0.};
388     switch (context->stabilization) {
389     case STAB_NONE:        // Galerkin
390       break;
391     case STAB_SU:        // SU
392       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
393       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
394       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
395       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
396       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
397       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
398       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
399                                   tau_strong_conv,
400                                   tau_strong_conv_conservative);
401       for (CeedInt j=0; j<3; j++)
402         for (CeedInt k=0; k<5; k++)
403           for (CeedInt l=0; l<5; l++)
404             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
405 
406       for (CeedInt j=0; j<5; j++)
407         for (CeedInt k=0; k<3; k++)
408           Grad_v[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
409                                     stab[j][1] * dXdx[k][1] +
410                                     stab[j][2] * dXdx[k][2]);
411       break;
412     case STAB_SUPG:        // SUPG is not implemented for explicit scheme
413       break;
414     }
415 
416   } // End Quadrature Point Loop
417 
418   // Return
419   return 0;
420 }
421 
422 // *****************************************************************************
423 // This QFunction implements the Navier-Stokes equations (mentioned above) with
424 //   implicit time stepping method
425 //
426 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
427 //  SUPG = Galerkin + grad(v) . ( Ai^T * Tau * (q_dot + Aj q,j - body force) )
428 //                                       (diffussive terms will be added later)
429 //
430 // *****************************************************************************
431 CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
432                                     const CeedScalar *const *in,
433                                     CeedScalar *const *out) {
434   // *INDENT-OFF*
435   // Inputs
436   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
437                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
438                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
439                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
440                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
441   // Outputs
442   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
443              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1],
444              (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2];
445   // *INDENT-ON*
446   // Context
447   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
448   const CeedScalar mu     = context->mu;
449   const CeedScalar cv     = context->cv;
450   const CeedScalar cp     = context->cp;
451   const CeedScalar *g     = context->g;
452   const CeedScalar dt     = context->dt;
453   const CeedScalar gamma  = cp / cv;
454   const CeedScalar Rd     = cp-cv;
455 
456   CeedPragmaSIMD
457   // Quadrature Point Loop
458   for (CeedInt i=0; i<Q; i++) {
459     CeedScalar U[5];
460     for (CeedInt j=0; j<5; j++) U[j] = q[j][i];
461     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
462     State s = StateFromU(context, U, x_i);
463 
464     // -- Interp-to-Interp q_data
465     const CeedScalar wdetJ      =   q_data[0][i];
466     // -- Interp-to-Grad q_data
467     // ---- Inverse of change of coordinate matrix: X_i,j
468     // *INDENT-OFF*
469     const CeedScalar dXdx[3][3] = {{q_data[1][i],
470                                     q_data[2][i],
471                                     q_data[3][i]},
472                                    {q_data[4][i],
473                                     q_data[5][i],
474                                     q_data[6][i]},
475                                    {q_data[7][i],
476                                     q_data[8][i],
477                                     q_data[9][i]}
478                                   };
479     // *INDENT-ON*
480     State grad_s[3];
481     for (CeedInt j=0; j<3; j++) {
482       CeedScalar dx_i[3] = {0}, dU[5];
483       for (CeedInt k=0; k<5; k++)
484         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
485                 Grad_q[1][k][i] * dXdx[1][j] +
486                 Grad_q[2][k][i] * dXdx[2][j];
487       dx_i[j] = 1.;
488       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
489     }
490 
491     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
492     KMStrainRate(grad_s, strain_rate);
493     NewtonianStress(context, strain_rate, kmstress);
494     KMUnpack(kmstress, stress);
495     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
496 
497     StateConservative F_inviscid[3];
498     FluxInviscid(context, s, F_inviscid);
499 
500 
501     // Total flux
502     CeedScalar Flux[5][3];
503     for (CeedInt j=0; j<3; j++) {
504       Flux[0][j] = F_inviscid[j].density;
505       for (CeedInt k=0; k<3; k++)
506         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
507       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
508     }
509 
510     for (CeedInt j=0; j<3; j++) {
511       for (CeedInt k=0; k<5; k++) {
512         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] +
513                                     dXdx[j][1] * Flux[k][1] +
514                                     dXdx[j][2] * Flux[k][2]);
515       }
516     }
517 
518     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
519     for (CeedInt j=0; j<5; j++)
520       v[j][i] = wdetJ * (q_dot[j][i] - body_force[j]);
521 
522     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
523     CeedScalar jacob_F_conv[3][5][5] = {0};
524     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
525                            gamma, g, x_i);
526     CeedScalar grad_U[5][3];
527     for (CeedInt j=0; j<3; j++) {
528       grad_U[0][j] = grad_s[j].U.density;
529       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
530       grad_U[4][j] = grad_s[j].U.E_total;
531     }
532 
533     // strong_conv = dF/dq * dq/dx    (Strong convection)
534     CeedScalar strong_conv[5] = {0};
535     for (CeedInt j=0; j<3; j++)
536       for (CeedInt k=0; k<5; k++)
537         for (CeedInt l=0; l<5; l++)
538           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
539 
540     // Strong residual
541     CeedScalar strong_res[5];
542     for (CeedInt j=0; j<5; j++)
543       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
544 
545     // -- Stabilization method: none, SU, or SUPG
546     CeedScalar stab[5][3] = {{0.}};
547     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
548     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
549     CeedScalar Tau_d[3] = {0.};
550     switch (context->stabilization) {
551     case STAB_NONE:        // Galerkin
552       break;
553     case STAB_SU:        // SU
554       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
555       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
556       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
557       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
558       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
559       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
560       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
561                                   tau_strong_conv, tau_strong_conv_conservative);
562       for (CeedInt j=0; j<3; j++)
563         for (CeedInt k=0; k<5; k++)
564           for (CeedInt l=0; l<5; l++)
565             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
566 
567       for (CeedInt j=0; j<5; j++)
568         for (CeedInt k=0; k<3; k++)
569           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
570                                     stab[j][1] * dXdx[k][1] +
571                                     stab[j][2] * dXdx[k][2]);
572 
573       break;
574     case STAB_SUPG:        // SUPG
575       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
576       tau_strong_res[0] = Tau_d[0] * strong_res[0];
577       tau_strong_res[1] = Tau_d[1] * strong_res[1];
578       tau_strong_res[2] = Tau_d[1] * strong_res[2];
579       tau_strong_res[3] = Tau_d[1] * strong_res[3];
580       tau_strong_res[4] = Tau_d[2] * strong_res[4];
581 // Alternate route (useful later with primitive variable code)
582 // this function was verified against PHASTA for as IC that was as close as possible
583 //    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
584 // it has also been verified to compute a correct through the following
585 //   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
586 // applied in the triple loop below
587 //  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
588       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
589                                   tau_strong_res, tau_strong_res_conservative);
590       for (CeedInt j=0; j<3; j++)
591         for (CeedInt k=0; k<5; k++)
592           for (CeedInt l=0; l<5; l++)
593             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
594 
595       for (CeedInt j=0; j<5; j++)
596         for (CeedInt k=0; k<3; k++)
597           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
598                                     stab[j][1] * dXdx[k][1] +
599                                     stab[j][2] * dXdx[k][2]);
600       break;
601     }
602     for (CeedInt j=0; j<5; j++) jac_data[j][i] = U[j];
603     for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j];
604     for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j];
605 
606   } // End Quadrature Point Loop
607 
608   // Return
609   return 0;
610 }
611 
612 CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q,
613                                     const CeedScalar *const *in,
614                                     CeedScalar *const *out) {
615   // *INDENT-OFF*
616   // Inputs
617   const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
618                    (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
619                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
620                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
621                    (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
622   // Outputs
623   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
624              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
625   // *INDENT-ON*
626   // Context
627   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
628   const CeedScalar *g = context->g;
629   const CeedScalar cp = context->cp;
630   const CeedScalar cv = context->cv;
631   const CeedScalar Rd = cp - cv;
632   const CeedScalar gamma = cp / cv;
633 
634   CeedPragmaSIMD
635   // Quadrature Point Loop
636   for (CeedInt i=0; i<Q; i++) {
637     // -- Interp-to-Interp q_data
638     const CeedScalar wdetJ      =   q_data[0][i];
639     // -- Interp-to-Grad q_data
640     // ---- Inverse of change of coordinate matrix: X_i,j
641     // *INDENT-OFF*
642     const CeedScalar dXdx[3][3] = {{q_data[1][i],
643                                     q_data[2][i],
644                                     q_data[3][i]},
645                                    {q_data[4][i],
646                                     q_data[5][i],
647                                     q_data[6][i]},
648                                    {q_data[7][i],
649                                     q_data[8][i],
650                                     q_data[9][i]}
651                                   };
652     // *INDENT-ON*
653 
654     CeedScalar U[5], kmstress[6], Tau_d[3] __attribute((unused));
655     for (int j=0; j<5; j++) U[j] = jac_data[j][i];
656     for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i];
657     for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i];
658     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
659     State s = StateFromU(context, U, x_i);
660 
661     CeedScalar dU[5], dx0[3] = {0};
662     for (int j=0; j<5; j++) dU[j] = dq[j][i];
663     State ds = StateFromU_fwd(context, s, dU, x_i, dx0);
664 
665     State grad_ds[3];
666     for (int j=0; j<3; j++) {
667       CeedScalar dUj[5];
668       for (int k=0; k<5; k++) dUj[k] = Grad_dq[0][k][i] * dXdx[0][j]
669                                          + Grad_dq[1][k][i] * dXdx[1][j]
670                                          + Grad_dq[2][k][i] * dXdx[2][j];
671       grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx0);
672     }
673 
674     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
675     KMStrainRate(grad_ds, dstrain_rate);
676     NewtonianStress(context, dstrain_rate, dkmstress);
677     KMUnpack(dkmstress, dstress);
678     KMUnpack(kmstress, stress);
679     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
680 
681     StateConservative dF_inviscid[3];
682     FluxInviscid_fwd(context, s, ds, dF_inviscid);
683 
684     // Total flux
685     CeedScalar dFlux[5][3];
686     for (int j=0; j<3; j++) {
687       dFlux[0][j] = dF_inviscid[j].density;
688       for (int k=0; k<3; k++)
689         dFlux[k+1][j] = dF_inviscid[j].momentum[k] - dstress[k][j];
690       dFlux[4][j] = dF_inviscid[j].E_total + dFe[j];
691     }
692 
693     for (int j=0; j<3; j++) {
694       for (int k=0; k<5; k++) {
695         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * dFlux[k][0] +
696                                     dXdx[j][1] * dFlux[k][1] +
697                                     dXdx[j][2] * dFlux[k][2]);
698       }
699     }
700 
701     const CeedScalar dbody_force[5] = {0, ds.U.density *g[0], ds.U.density *g[1], ds.U.density *g[2], 0};
702     for (int j=0; j<5; j++)
703       v[j][i] = wdetJ * (context->ijacobian_time_shift * dU[j] - dbody_force[j]);
704 
705     if (1) {
706       CeedScalar jacob_F_conv[3][5][5] = {0};
707       computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
708                              gamma, g, x_i);
709       CeedScalar grad_dU[5][3];
710       for (int j=0; j<3; j++) {
711         grad_dU[0][j] = grad_ds[j].U.density;
712         for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k];
713         grad_dU[4][j] = grad_ds[j].U.E_total;
714       }
715       CeedScalar dstrong_conv[5] = {0};
716       for (int j=0; j<3; j++)
717         for (int k=0; k<5; k++)
718           for (int l=0; l<5; l++)
719             dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j];
720       CeedScalar dstrong_res[5];
721       for (int j=0; j<5; j++)
722         dstrong_res[j] = context->ijacobian_time_shift * dU[j] + dstrong_conv[j] -
723                          dbody_force[j];
724       CeedScalar dtau_strong_res[5] = {0.}, dtau_strong_res_conservative[5] = {0};
725       dtau_strong_res[0] = Tau_d[0] * dstrong_res[0];
726       dtau_strong_res[1] = Tau_d[1] * dstrong_res[1];
727       dtau_strong_res[2] = Tau_d[1] * dstrong_res[2];
728       dtau_strong_res[3] = Tau_d[1] * dstrong_res[3];
729       dtau_strong_res[4] = Tau_d[2] * dstrong_res[4];
730       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
731                                   dtau_strong_res, dtau_strong_res_conservative);
732       CeedScalar dstab[5][3] = {0};
733       for (int j=0; j<3; j++)
734         for (int k=0; k<5; k++)
735           for (int l=0; l<5; l++)
736             dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l];
737       for (int j=0; j<5; j++)
738         for (int k=0; k<3; k++)
739           Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] +
740                                     dstab[j][1] * dXdx[k][1] +
741                                     dstab[j][2] * dXdx[k][2]);
742 
743     }
744   } // End Quadrature Point Loop
745   return 0;
746 }
747 // *****************************************************************************
748 #endif // newtonian_h
749