xref: /libCEED/examples/fluids/qfunctions/newtonian.h (revision efe9d856d652f85e9e9f33171e1b615db5aa2505)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Operator for Navier-Stokes example using PETSc
10 
11 
12 #ifndef newtonian_h
13 #define newtonian_h
14 
15 #include <math.h>
16 #include <ceed.h>
17 #include "newtonian_types.h"
18 #include "newtonian_state.h"
19 #include "utils.h"
20 
21 // *****************************************************************************
22 // Helper function for computing flux Jacobian
23 // *****************************************************************************
24 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
25     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
26     const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
27   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
28   CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
29   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
30     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
31       dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
32                       u[i]*u[j];
33       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
34         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
35         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
36                           ((i==k) ? u[j] : 0.) -
37                           ((i==j) ? u[k] : 0.) * (gamma-1.);
38         dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
39                           (gamma-1.)*u[i]*u[k];
40       }
41       dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
42     }
43     dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
44     dF[i][4][4] = u[i] * gamma;
45   }
46 }
47 
48 // *****************************************************************************
49 // Helper function for computing flux Jacobian of Primitive variables
50 // *****************************************************************************
51 CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
52     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
53     const CeedScalar Rd, const CeedScalar cv) {
54   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
55   // TODO Add in gravity's contribution
56 
57   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
58   CeedScalar drdT = -rho / T;
59   CeedScalar drdP = 1. / ( Rd * T);
60   CeedScalar etot =  E / rho ;
61   CeedScalar e2p  = drdP * etot + 1. ;
62   CeedScalar e3p  = ( E  + rho * Rd * T );
63   CeedScalar e4p  = drdT * etot + rho * cv ;
64 
65   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
66     for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
67 //        [row][col] of A_i
68       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
69       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
70         dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
71         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
72                            ((i==k) ? u[j] : 0.) ) * rho;
73         dF[i][4][k+1]   = rho * u[i] * u[k]
74                           + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
75       }
76       dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
77     }
78     dF[i][4][0] = u[i] * e2p; // F^e wrt p
79     dF[i][4][4] = u[i] * e4p; // F^e wrt T
80     dF[i][0][0] = u[i] * drdP; // F^c wrt p
81     dF[i][0][4] = u[i] * drdT; // F^c wrt T
82   }
83 }
84 
85 CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
86     const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
87     const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
88   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
89   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
90   CeedScalar drdT = -rho / T;
91   CeedScalar drdP = 1. / ( Rd * T);
92   dU[0] = drdP * dY[0] + drdT * dY[4];
93   CeedScalar de_kinetic = 0;
94   for (CeedInt i=0; i<3; i++) {
95     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
96     de_kinetic += u[i] * dY[1+i];
97   }
98   dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
99           + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
100 }
101 
102 // *****************************************************************************
103 // Helper function for computing Tau elements (stabilization constant)
104 //   Model from:
105 //     PHASTA
106 //
107 //   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
108 //
109 // Where NOT UPDATED YET
110 // *****************************************************************************
111 CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
112                                         const CeedScalar dXdx[3][3], const CeedScalar u[3],
113                                         const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
114                                         const CeedScalar mu, const CeedScalar dt,
115                                         const CeedScalar rho) {
116   // Context
117   const CeedScalar Ctau_t = newt_ctx->Ctau_t;
118   const CeedScalar Ctau_v = newt_ctx->Ctau_v;
119   const CeedScalar Ctau_C = newt_ctx->Ctau_C;
120   const CeedScalar Ctau_M = newt_ctx->Ctau_M;
121   const CeedScalar Ctau_E = newt_ctx->Ctau_E;
122   CeedScalar gijd[6];
123   CeedScalar tau;
124   CeedScalar dts;
125   CeedScalar fact;
126 
127   //*INDENT-OFF*
128   gijd[0] =   dXdx[0][0] * dXdx[0][0]
129             + dXdx[1][0] * dXdx[1][0]
130             + dXdx[2][0] * dXdx[2][0];
131 
132   gijd[1] =   dXdx[0][0] * dXdx[0][1]
133             + dXdx[1][0] * dXdx[1][1]
134             + dXdx[2][0] * dXdx[2][1];
135 
136   gijd[2] =   dXdx[0][1] * dXdx[0][1]
137             + dXdx[1][1] * dXdx[1][1]
138             + dXdx[2][1] * dXdx[2][1];
139 
140   gijd[3] =   dXdx[0][0] * dXdx[0][2]
141             + dXdx[1][0] * dXdx[1][2]
142             + dXdx[2][0] * dXdx[2][2];
143 
144   gijd[4] =   dXdx[0][1] * dXdx[0][2]
145             + dXdx[1][1] * dXdx[1][2]
146             + dXdx[2][1] * dXdx[2][2];
147 
148   gijd[5] =   dXdx[0][2] * dXdx[0][2]
149             + dXdx[1][2] * dXdx[1][2]
150             + dXdx[2][2] * dXdx[2][2];
151   //*INDENT-ON*
152 
153   dts = Ctau_t / dt ;
154 
155   tau = rho*rho*((4. * dts * dts)
156                  + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
157                  + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
158                  + u[2] *   u[2] * gijd[5])
159         + Ctau_v* mu * mu *
160         (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
161          + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
162 
163   fact=sqrt(tau);
164 
165   Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
166 
167   Tau_d[1] = Ctau_M / fact;
168   Tau_d[2] = Ctau_E / ( fact * cv );
169 
170 // consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
171 //  to avoid a division if the compiler is smart enough to see that cv IS
172 // a constant that it could invert once for all elements
173 // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
174 // OR we could absorb cv into Ctau_E but this puts more burden on user to
175 // know how to change constants with a change of fluid or units.  Same for
176 // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
177 }
178 
179 // *****************************************************************************
180 // This QFunction sets a "still" initial condition for generic Newtonian IG problems
181 // *****************************************************************************
182 CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
183                                const CeedScalar *const *in, CeedScalar *const *out) {
184   // Inputs
185   const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
186 
187   // Outputs
188   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
189 
190   // Context
191   const SetupContext context = (SetupContext)ctx;
192   const CeedScalar theta0    = context->theta0;
193   const CeedScalar P0        = context->P0;
194   const CeedScalar cv        = context->cv;
195   const CeedScalar cp        = context->cp;
196   const CeedScalar *g        = context->g;
197   const CeedScalar Rd        = cp - cv;
198 
199   // Quadrature Point Loop
200   CeedPragmaSIMD
201   for (CeedInt i=0; i<Q; i++) {
202     CeedScalar q[5] = {0.};
203 
204     // Setup
205     // -- Coordinates
206     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
207     const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
208 
209     // -- Density
210     const CeedScalar rho = P0 / (Rd*theta0);
211 
212     // Initial Conditions
213     q[0] = rho;
214     q[1] = 0.0;
215     q[2] = 0.0;
216     q[3] = 0.0;
217     q[4] = rho * (cv*theta0 + e_potential);
218 
219     for (CeedInt j=0; j<5; j++)
220       q0[j][i] = q[j];
221   } // End of Quadrature Point Loop
222   return 0;
223 }
224 
225 // *****************************************************************************
226 // This QFunction sets a "still" initial condition for generic Newtonian IG
227 //   problems in primitive variables
228 // *****************************************************************************
229 CEED_QFUNCTION(ICsNewtonianIG_Prim)(void *ctx, CeedInt Q,
230                                     const CeedScalar *const *in, CeedScalar *const *out) {
231   // Outputs
232   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
233 
234   // Context
235   const SetupContext context = (SetupContext)ctx;
236   const CeedScalar theta0    = context->theta0;
237   const CeedScalar P0        = context->P0;
238 
239   // Quadrature Point Loop
240   CeedPragmaSIMD
241   for (CeedInt i=0; i<Q; i++) {
242     CeedScalar q[5] = {0.};
243 
244     // Initial Conditions
245     q[0] = P0;
246     q[1] = 0.0;
247     q[2] = 0.0;
248     q[3] = 0.0;
249     q[4] = theta0;
250 
251     for (CeedInt j=0; j<5; j++)
252       q0[j][i] = q[j];
253 
254   } // End of Quadrature Point Loop
255   return 0;
256 }
257 
258 // *****************************************************************************
259 // This QFunction implements the following formulation of Navier-Stokes with
260 //   explicit time stepping method
261 //
262 // This is 3D compressible Navier-Stokes in conservation form with state
263 //   variables of density, momentum density, and total energy density.
264 //
265 // State Variables: q = ( rho, U1, U2, U3, E )
266 //   rho - Mass Density
267 //   Ui  - Momentum Density,      Ui = rho ui
268 //   E   - Total Energy Density,  E  = rho (cv T + (u u)/2 + g z)
269 //
270 // Navier-Stokes Equations:
271 //   drho/dt + div( U )                               = 0
272 //   dU/dt   + div( rho (u x u) + P I3 ) + rho g khat = div( Fu )
273 //   dE/dt   + div( (E + P) u )                       = div( Fe )
274 //
275 // Viscous Stress:
276 //   Fu = mu (grad( u ) + grad( u )^T + lambda div ( u ) I3)
277 //
278 // Thermal Stress:
279 //   Fe = u Fu + k grad( T )
280 // Equation of State
281 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
282 //
283 // Stabilization:
284 //   Tau = diag(TauC, TauM, TauM, TauM, TauE)
285 //     f1 = rho  sqrt(ui uj gij)
286 //     gij = dXi/dX * dXi/dX
287 //     TauC = Cc f1 / (8 gii)
288 //     TauM = min( 1 , 1 / f1 )
289 //     TauE = TauM / (Ce cv)
290 //
291 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
292 //
293 // Constants:
294 //   lambda = - 2 / 3,  From Stokes hypothesis
295 //   mu              ,  Dynamic viscosity
296 //   k               ,  Thermal conductivity
297 //   cv              ,  Specific heat, constant volume
298 //   cp              ,  Specific heat, constant pressure
299 //   g               ,  Gravity
300 //   gamma  = cp / cv,  Specific heat ratio
301 //
302 // We require the product of the inverse of the Jacobian (dXdx_j,k) and
303 // its transpose (dXdx_k,j) to properly compute integrals of the form:
304 // int( gradv gradu )
305 //
306 // *****************************************************************************
307 CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q,
308                                       const CeedScalar *const *in, CeedScalar *const *out) {
309   // *INDENT-OFF*
310   // Inputs
311   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
312                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
313                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
314                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3];
315   // Outputs
316   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
317              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
318   // *INDENT-ON*
319 
320   // Context
321   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
322   const CeedScalar mu     = context->mu;
323   const CeedScalar cv     = context->cv;
324   const CeedScalar cp     = context->cp;
325   const CeedScalar *g     = context->g;
326   const CeedScalar dt     = context->dt;
327   const CeedScalar gamma  = cp / cv;
328   const CeedScalar Rd     = cp - cv;
329 
330   CeedPragmaSIMD
331   // Quadrature Point Loop
332   for (CeedInt i=0; i<Q; i++) {
333     CeedScalar U[5];
334     for (int j=0; j<5; j++) U[j] = q[j][i];
335     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
336     State s = StateFromU(context, U, x_i);
337 
338     // -- Interp-to-Interp q_data
339     const CeedScalar wdetJ      =   q_data[0][i];
340     // -- Interp-to-Grad q_data
341     // ---- Inverse of change of coordinate matrix: X_i,j
342     // *INDENT-OFF*
343     const CeedScalar dXdx[3][3] = {{q_data[1][i],
344                                     q_data[2][i],
345                                     q_data[3][i]},
346                                    {q_data[4][i],
347                                     q_data[5][i],
348                                     q_data[6][i]},
349                                    {q_data[7][i],
350                                     q_data[8][i],
351                                     q_data[9][i]}
352                                   };
353     // *INDENT-ON*
354     State grad_s[3];
355     for (CeedInt j=0; j<3; j++) {
356       CeedScalar dx_i[3] = {0}, dU[5];
357       for (CeedInt k=0; k<5; k++)
358         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
359                 Grad_q[1][k][i] * dXdx[1][j] +
360                 Grad_q[2][k][i] * dXdx[2][j];
361       dx_i[j] = 1.;
362       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
363     }
364 
365     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
366     KMStrainRate(grad_s, strain_rate);
367     NewtonianStress(context, strain_rate, kmstress);
368     KMUnpack(kmstress, stress);
369     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
370 
371     StateConservative F_inviscid[3];
372     FluxInviscid(context, s, F_inviscid);
373 
374     // Total flux
375     CeedScalar Flux[5][3];
376     for (CeedInt j=0; j<3; j++) {
377       Flux[0][j] = F_inviscid[j].density;
378       for (CeedInt k=0; k<3; k++)
379         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
380       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
381     }
382 
383     for (CeedInt j=0; j<3; j++) {
384       for (CeedInt k=0; k<5; k++) {
385         Grad_v[j][k][i] = wdetJ * (dXdx[j][0] * Flux[k][0] +
386                                    dXdx[j][1] * Flux[k][1] +
387                                    dXdx[j][2] * Flux[k][2]);
388       }
389     }
390 
391     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
392     for (int j=0; j<5; j++)
393       v[j][i] = wdetJ * body_force[j];
394 
395     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
396     CeedScalar jacob_F_conv[3][5][5] = {0};
397     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
398                            gamma, g, x_i);
399     CeedScalar grad_U[5][3];
400     for (CeedInt j=0; j<3; j++) {
401       grad_U[0][j] = grad_s[j].U.density;
402       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
403       grad_U[4][j] = grad_s[j].U.E_total;
404     }
405 
406     // strong_conv = dF/dq * dq/dx    (Strong convection)
407     CeedScalar strong_conv[5] = {0};
408     for (CeedInt j=0; j<3; j++)
409       for (CeedInt k=0; k<5; k++)
410         for (CeedInt l=0; l<5; l++)
411           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
412 
413     // -- Stabilization method: none, SU, or SUPG
414     CeedScalar stab[5][3] = {{0.}};
415     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
416     CeedScalar Tau_d[3] = {0.};
417     switch (context->stabilization) {
418     case STAB_NONE:        // Galerkin
419       break;
420     case STAB_SU:        // SU
421       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
422       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
423       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
424       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
425       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
426       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
427       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
428                                   tau_strong_conv,
429                                   tau_strong_conv_conservative);
430       for (CeedInt j=0; j<3; j++)
431         for (CeedInt k=0; k<5; k++)
432           for (CeedInt l=0; l<5; l++)
433             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
434 
435       for (CeedInt j=0; j<5; j++)
436         for (CeedInt k=0; k<3; k++)
437           Grad_v[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
438                                     stab[j][1] * dXdx[k][1] +
439                                     stab[j][2] * dXdx[k][2]);
440       break;
441     case STAB_SUPG:        // SUPG is not implemented for explicit scheme
442       break;
443     }
444 
445   } // End Quadrature Point Loop
446 
447   // Return
448   return 0;
449 }
450 
451 // *****************************************************************************
452 // This QFunction implements the Navier-Stokes equations (mentioned above) with
453 //   implicit time stepping method
454 //
455 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
456 //  SUPG = Galerkin + grad(v) . ( Ai^T * Tau * (q_dot + Aj q,j - body force) )
457 //                                       (diffussive terms will be added later)
458 //
459 // *****************************************************************************
460 CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
461                                     const CeedScalar *const *in, CeedScalar *const *out) {
462   // *INDENT-OFF*
463   // Inputs
464   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
465                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
466                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
467                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
468                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
469   // Outputs
470   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
471              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1],
472              (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2];
473   // *INDENT-ON*
474   // Context
475   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
476   const CeedScalar mu     = context->mu;
477   const CeedScalar cv     = context->cv;
478   const CeedScalar cp     = context->cp;
479   const CeedScalar *g     = context->g;
480   const CeedScalar dt     = context->dt;
481   const CeedScalar gamma  = cp / cv;
482   const CeedScalar Rd     = cp - cv;
483 
484   CeedPragmaSIMD
485   // Quadrature Point Loop
486   for (CeedInt i=0; i<Q; i++) {
487     CeedScalar U[5];
488     for (CeedInt j=0; j<5; j++) U[j] = q[j][i];
489     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
490     State s = StateFromU(context, U, x_i);
491 
492     // -- Interp-to-Interp q_data
493     const CeedScalar wdetJ      =   q_data[0][i];
494     // -- Interp-to-Grad q_data
495     // ---- Inverse of change of coordinate matrix: X_i,j
496     // *INDENT-OFF*
497     const CeedScalar dXdx[3][3] = {{q_data[1][i],
498                                     q_data[2][i],
499                                     q_data[3][i]},
500                                    {q_data[4][i],
501                                     q_data[5][i],
502                                     q_data[6][i]},
503                                    {q_data[7][i],
504                                     q_data[8][i],
505                                     q_data[9][i]}
506                                   };
507     // *INDENT-ON*
508     State grad_s[3];
509     for (CeedInt j=0; j<3; j++) {
510       CeedScalar dx_i[3] = {0}, dU[5];
511       for (CeedInt k=0; k<5; k++)
512         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
513                 Grad_q[1][k][i] * dXdx[1][j] +
514                 Grad_q[2][k][i] * dXdx[2][j];
515       dx_i[j] = 1.;
516       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
517     }
518 
519     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
520     KMStrainRate(grad_s, strain_rate);
521     NewtonianStress(context, strain_rate, kmstress);
522     KMUnpack(kmstress, stress);
523     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
524 
525     StateConservative F_inviscid[3];
526     FluxInviscid(context, s, F_inviscid);
527 
528 
529     // Total flux
530     CeedScalar Flux[5][3];
531     for (CeedInt j=0; j<3; j++) {
532       Flux[0][j] = F_inviscid[j].density;
533       for (CeedInt k=0; k<3; k++)
534         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
535       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
536     }
537 
538     for (CeedInt j=0; j<3; j++) {
539       for (CeedInt k=0; k<5; k++) {
540         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] +
541                                     dXdx[j][1] * Flux[k][1] +
542                                     dXdx[j][2] * Flux[k][2]);
543       }
544     }
545 
546     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
547     for (CeedInt j=0; j<5; j++)
548       v[j][i] = wdetJ * (q_dot[j][i] - body_force[j]);
549 
550     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
551     CeedScalar jacob_F_conv[3][5][5] = {0};
552     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
553                            gamma, g, x_i);
554     CeedScalar grad_U[5][3];
555     for (CeedInt j=0; j<3; j++) {
556       grad_U[0][j] = grad_s[j].U.density;
557       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
558       grad_U[4][j] = grad_s[j].U.E_total;
559     }
560 
561     // strong_conv = dF/dq * dq/dx    (Strong convection)
562     CeedScalar strong_conv[5] = {0};
563     for (CeedInt j=0; j<3; j++)
564       for (CeedInt k=0; k<5; k++)
565         for (CeedInt l=0; l<5; l++)
566           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
567 
568     // Strong residual
569     CeedScalar strong_res[5];
570     for (CeedInt j=0; j<5; j++)
571       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
572 
573     // -- Stabilization method: none, SU, or SUPG
574     CeedScalar stab[5][3] = {{0.}};
575     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
576     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
577     CeedScalar Tau_d[3] = {0.};
578     switch (context->stabilization) {
579     case STAB_NONE:        // Galerkin
580       break;
581     case STAB_SU:        // SU
582       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
583       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
584       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
585       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
586       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
587       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
588       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
589                                   tau_strong_conv, tau_strong_conv_conservative);
590       for (CeedInt j=0; j<3; j++)
591         for (CeedInt k=0; k<5; k++)
592           for (CeedInt l=0; l<5; l++)
593             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
594 
595       for (CeedInt j=0; j<5; j++)
596         for (CeedInt k=0; k<3; k++)
597           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
598                                     stab[j][1] * dXdx[k][1] +
599                                     stab[j][2] * dXdx[k][2]);
600 
601       break;
602     case STAB_SUPG:        // SUPG
603       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
604       tau_strong_res[0] = Tau_d[0] * strong_res[0];
605       tau_strong_res[1] = Tau_d[1] * strong_res[1];
606       tau_strong_res[2] = Tau_d[1] * strong_res[2];
607       tau_strong_res[3] = Tau_d[1] * strong_res[3];
608       tau_strong_res[4] = Tau_d[2] * strong_res[4];
609 
610       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
611                                   tau_strong_res, tau_strong_res_conservative);
612       for (CeedInt j=0; j<3; j++)
613         for (CeedInt k=0; k<5; k++)
614           for (CeedInt l=0; l<5; l++)
615             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
616 
617       for (CeedInt j=0; j<5; j++)
618         for (CeedInt k=0; k<3; k++)
619           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
620                                     stab[j][1] * dXdx[k][1] +
621                                     stab[j][2] * dXdx[k][2]);
622       break;
623     }
624     for (CeedInt j=0; j<5; j++) jac_data[j][i] = U[j];
625     for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j];
626     for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j];
627 
628   } // End Quadrature Point Loop
629 
630   // Return
631   return 0;
632 }
633 
634 // *****************************************************************************
635 // This QFunction implements the jacobean of the Navier-Stokes equations
636 //   for implicit time stepping method.
637 //
638 // *****************************************************************************
639 CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q,
640                                     const CeedScalar *const *in,
641                                     CeedScalar *const *out) {
642   // *INDENT-OFF*
643   // Inputs
644   const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
645                    (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
646                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
647                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
648                    (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
649   // Outputs
650   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
651              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
652   // *INDENT-ON*
653   // Context
654   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
655   const CeedScalar *g = context->g;
656   const CeedScalar cp = context->cp;
657   const CeedScalar cv = context->cv;
658   const CeedScalar Rd = cp - cv;
659   const CeedScalar gamma = cp / cv;
660 
661   CeedPragmaSIMD
662   // Quadrature Point Loop
663   for (CeedInt i=0; i<Q; i++) {
664     // -- Interp-to-Interp q_data
665     const CeedScalar wdetJ      =   q_data[0][i];
666     // -- Interp-to-Grad q_data
667     // ---- Inverse of change of coordinate matrix: X_i,j
668     // *INDENT-OFF*
669     const CeedScalar dXdx[3][3] = {{q_data[1][i],
670                                     q_data[2][i],
671                                     q_data[3][i]},
672                                    {q_data[4][i],
673                                     q_data[5][i],
674                                     q_data[6][i]},
675                                    {q_data[7][i],
676                                     q_data[8][i],
677                                     q_data[9][i]}
678                                   };
679     // *INDENT-ON*
680 
681     CeedScalar U[5], kmstress[6], Tau_d[3] __attribute((unused));
682     for (int j=0; j<5; j++) U[j] = jac_data[j][i];
683     for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i];
684     for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i];
685     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
686     State s = StateFromU(context, U, x_i);
687 
688     CeedScalar dU[5], dx0[3] = {0};
689     for (int j=0; j<5; j++) dU[j] = dq[j][i];
690     State ds = StateFromU_fwd(context, s, dU, x_i, dx0);
691 
692     State grad_ds[3];
693     for (int j=0; j<3; j++) {
694       CeedScalar dUj[5];
695       for (int k=0; k<5; k++) dUj[k] = Grad_dq[0][k][i] * dXdx[0][j]
696                                          + Grad_dq[1][k][i] * dXdx[1][j]
697                                          + Grad_dq[2][k][i] * dXdx[2][j];
698       grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx0);
699     }
700 
701     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
702     KMStrainRate(grad_ds, dstrain_rate);
703     NewtonianStress(context, dstrain_rate, dkmstress);
704     KMUnpack(dkmstress, dstress);
705     KMUnpack(kmstress, stress);
706     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
707 
708     StateConservative dF_inviscid[3];
709     FluxInviscid_fwd(context, s, ds, dF_inviscid);
710 
711     // Total flux
712     CeedScalar dFlux[5][3];
713     for (int j=0; j<3; j++) {
714       dFlux[0][j] = dF_inviscid[j].density;
715       for (int k=0; k<3; k++)
716         dFlux[k+1][j] = dF_inviscid[j].momentum[k] - dstress[k][j];
717       dFlux[4][j] = dF_inviscid[j].E_total + dFe[j];
718     }
719 
720     for (int j=0; j<3; j++) {
721       for (int k=0; k<5; k++) {
722         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * dFlux[k][0] +
723                                     dXdx[j][1] * dFlux[k][1] +
724                                     dXdx[j][2] * dFlux[k][2]);
725       }
726     }
727 
728     const CeedScalar dbody_force[5] = {0, ds.U.density *g[0], ds.U.density *g[1], ds.U.density *g[2], 0};
729     for (int j=0; j<5; j++)
730       v[j][i] = wdetJ * (context->ijacobian_time_shift * dU[j] - dbody_force[j]);
731 
732     if (1) {
733       CeedScalar jacob_F_conv[3][5][5] = {0};
734       computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
735                              gamma, g, x_i);
736       CeedScalar grad_dU[5][3];
737       for (int j=0; j<3; j++) {
738         grad_dU[0][j] = grad_ds[j].U.density;
739         for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k];
740         grad_dU[4][j] = grad_ds[j].U.E_total;
741       }
742       CeedScalar dstrong_conv[5] = {0};
743       for (int j=0; j<3; j++)
744         for (int k=0; k<5; k++)
745           for (int l=0; l<5; l++)
746             dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j];
747       CeedScalar dstrong_res[5];
748       for (int j=0; j<5; j++)
749         dstrong_res[j] = context->ijacobian_time_shift * dU[j] + dstrong_conv[j] -
750                          dbody_force[j];
751       CeedScalar dtau_strong_res[5] = {0.}, dtau_strong_res_conservative[5] = {0};
752       dtau_strong_res[0] = Tau_d[0] * dstrong_res[0];
753       dtau_strong_res[1] = Tau_d[1] * dstrong_res[1];
754       dtau_strong_res[2] = Tau_d[1] * dstrong_res[2];
755       dtau_strong_res[3] = Tau_d[1] * dstrong_res[3];
756       dtau_strong_res[4] = Tau_d[2] * dstrong_res[4];
757       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
758                                   dtau_strong_res, dtau_strong_res_conservative);
759       CeedScalar dstab[5][3] = {0};
760       for (int j=0; j<3; j++)
761         for (int k=0; k<5; k++)
762           for (int l=0; l<5; l++)
763             dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l];
764       for (int j=0; j<5; j++)
765         for (int k=0; k<3; k++)
766           Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] +
767                                     dstab[j][1] * dXdx[k][1] +
768                                     dstab[j][2] * dXdx[k][2]);
769 
770     }
771   } // End Quadrature Point Loop
772   return 0;
773 }
774 
775 // Compute boundary integral (ie. for strongly set inflows)
776 CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q,
777                                  const CeedScalar *const *in,
778                                  CeedScalar *const *out) {
779 
780   //*INDENT-OFF*
781   const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
782                    (*Grad_q)[5][CEED_Q_VLA]  = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
783                    (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
784                    (*x)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[3];
785 
786   CeedScalar (*v)[CEED_Q_VLA]            = (CeedScalar(*)[CEED_Q_VLA]) out[0],
787              (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[1];
788 
789   //*INDENT-ON*
790 
791   const NewtonianIdealGasContext context = (NewtonianIdealGasContext) ctx;
792   const bool is_implicit  = context->is_implicit;
793   State (*StateFromQi)(NewtonianIdealGasContext gas,
794                        const CeedScalar qi[5], const CeedScalar x[3]);
795   State (*StateFromQi_fwd)(NewtonianIdealGasContext gas,
796                            State s, const CeedScalar dqi[5],
797                            const CeedScalar x[3], const CeedScalar dx[3]);
798   StateFromQi     = context->is_primitive ? &StateFromY     : &StateFromU;
799   StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd;
800 
801 
802   CeedPragmaSIMD
803   for(CeedInt i=0; i<Q; i++) {
804     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
805     const CeedScalar qi[5]  = {q[0][i], q[1][i], q[2][i], q[3][i], q[4][i]};
806     State s = StateFromQi(context, qi, x_i);
807 
808     const CeedScalar wdetJb  = (is_implicit ? -1. : 1.) * q_data_sur[0][i];
809     // ---- Normal vect
810     const CeedScalar norm[3] = {q_data_sur[1][i],
811                                 q_data_sur[2][i],
812                                 q_data_sur[3][i]
813                                };
814 
815     const CeedScalar dXdx[2][3] = {
816       {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
817       {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
818     };
819 
820     State grad_s[3];
821     for (CeedInt j=0; j<3; j++) {
822       CeedScalar dx_i[3] = {0}, dqi[5];
823       for (CeedInt k=0; k<5; k++)
824         dqi[k] = Grad_q[0][k][i] * dXdx[0][j] +
825                  Grad_q[1][k][i] * dXdx[1][j];
826       dx_i[j] = 1.;
827       grad_s[j] = StateFromQi_fwd(context, s, dqi, x_i, dx_i);
828     }
829 
830     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
831     KMStrainRate(grad_s, strain_rate);
832     NewtonianStress(context, strain_rate, kmstress);
833     KMUnpack(kmstress, stress);
834     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
835 
836     StateConservative F_inviscid[3];
837     FluxInviscid(context, s, F_inviscid);
838 
839     CeedScalar Flux[5] = {0.};
840     for (int j=0; j<3; j++) {
841       Flux[0] += F_inviscid[j].density * norm[j];
842       for (int k=0; k<3; k++)
843         Flux[k+1] += (F_inviscid[j].momentum[k] - stress[k][j]) * norm[j];
844       Flux[4] += (F_inviscid[j].E_total + Fe[j])*norm[j];
845     }
846 
847     // -- Density
848     v[0][i] = -wdetJb * Flux[0];
849 
850     // -- Momentum
851     for (CeedInt j=0; j<3; j++)
852       v[j+1][i] = -wdetJb * Flux[j+1];
853 
854     // -- Total Energy Density
855     v[4][i] = -wdetJb * Flux[4];
856 
857     if (context->is_primitive) {
858       jac_data_sur[0][i] = s.Y.pressure;
859       for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.Y.velocity[j];
860       jac_data_sur[4][i] = s.Y.temperature;
861     } else {
862       jac_data_sur[0][i] = s.U.density;
863       for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.U.momentum[j];
864       jac_data_sur[4][i] = s.U.E_total;
865     }
866     for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j];
867   }
868   return 0;
869 }
870 
871 // Jacobian for "set nothing" boundary integral
872 CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q,
873     const CeedScalar *const *in,
874     CeedScalar *const *out) {
875   // *INDENT-OFF*
876   // Inputs
877   const CeedScalar (*dq)[CEED_Q_VLA]           = (const CeedScalar(*)[CEED_Q_VLA])in[0],
878                    (*Grad_dq)[5][CEED_Q_VLA]   = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
879                    (*q_data_sur)[CEED_Q_VLA]   = (const CeedScalar(*)[CEED_Q_VLA])in[2],
880                    (*x)[CEED_Q_VLA]            = (const CeedScalar(*)[CEED_Q_VLA])in[3],
881                    (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
882   // Outputs
883   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
884   // *INDENT-ON*
885 
886   const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
887   const bool implicit     = context->is_implicit;
888   State (*StateFromQi)(NewtonianIdealGasContext gas,
889                        const CeedScalar qi[5], const CeedScalar x[3]);
890   State (*StateFromQi_fwd)(NewtonianIdealGasContext gas,
891                            State s, const CeedScalar dqi[5],
892                            const CeedScalar x[3], const CeedScalar dx[3]);
893   StateFromQi     = context->is_primitive ? &StateFromY     : &StateFromU;
894   StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd;
895 
896   CeedPragmaSIMD
897   // Quadrature Point Loop
898   for (CeedInt i=0; i<Q; i++) {
899     const CeedScalar x_i[3]  = {x[0][i], x[1][i], x[2][i]};
900     const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
901     const CeedScalar norm[3] = {q_data_sur[1][i],
902                                 q_data_sur[2][i],
903                                 q_data_sur[3][i]
904                                };
905     const CeedScalar dXdx[2][3] = {
906       {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
907       {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
908     };
909 
910     CeedScalar qi[5], kmstress[6], dqi[5], dx_i[3] = {0.};
911     for (int j=0; j<5; j++) qi[j]    = jac_data_sur[j][i];
912     for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i];
913     for (int j=0; j<5; j++) dqi[j]   = dq[j][i];
914 
915     State s  = StateFromQi(context, qi, x_i);
916     State ds = StateFromQi_fwd(context, s, dqi, x_i, dx_i);
917 
918     State grad_ds[3];
919     for (CeedInt j=0; j<3; j++) {
920       CeedScalar dx_i[3] = {0}, dqi_j[5];
921       for (CeedInt k=0; k<5; k++)
922         dqi_j[k] = Grad_dq[0][k][i] * dXdx[0][j] +
923                    Grad_dq[1][k][i] * dXdx[1][j];
924       dx_i[j] = 1.;
925       grad_ds[j] = StateFromQi_fwd(context, s, dqi_j, x_i, dx_i);
926     }
927 
928     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
929     KMStrainRate(grad_ds, dstrain_rate);
930     NewtonianStress(context, dstrain_rate, dkmstress);
931     KMUnpack(dkmstress, dstress);
932     KMUnpack(kmstress, stress);
933     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
934 
935     StateConservative dF_inviscid[3];
936     FluxInviscid_fwd(context, s, ds, dF_inviscid);
937 
938     CeedScalar dFlux[5] = {0.};
939     for (int j=0; j<3; j++) {
940       dFlux[0] += dF_inviscid[j].density * norm[j];
941       for (int k=0; k<3; k++)
942         dFlux[k+1] += (dF_inviscid[j].momentum[k] - dstress[k][j]) * norm[j];
943       dFlux[4] += (dF_inviscid[j].E_total + dFe[j]) * norm[j];
944     }
945 
946     for (int j=0; j<5; j++)
947       v[j][i] = -wdetJb * dFlux[j];
948   } // End Quadrature Point Loop
949   return 0;
950 }
951 
952 // Outflow boundary condition, weakly setting a constant pressure
953 CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q,
954                                 const CeedScalar *const *in,
955                                 CeedScalar *const *out) {
956   // *INDENT-OFF*
957   // Inputs
958   const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
959                    (*Grad_q)[5][CEED_Q_VLA]  = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
960                    (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
961                    (*x)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[3];
962   // Outputs
963   CeedScalar (*v)[CEED_Q_VLA]            = (CeedScalar(*)[CEED_Q_VLA])out[0],
964              (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[1];
965   // *INDENT-ON*
966 
967   const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
968   const bool       implicit = context->is_implicit;
969   const CeedScalar P0       = context->P0;
970 
971   State (*StateFromQi)(NewtonianIdealGasContext gas,
972                        const CeedScalar qi[5], const CeedScalar x[3]);
973   State (*StateFromQi_fwd)(NewtonianIdealGasContext gas,
974                            State s, const CeedScalar dqi[5],
975                            const CeedScalar x[3], const CeedScalar dx[3]);
976   StateFromQi     = context->is_primitive ? &StateFromY     : &StateFromU;
977   StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd;
978 
979   CeedPragmaSIMD
980   // Quadrature Point Loop
981   for (CeedInt i=0; i<Q; i++) {
982     // Setup
983     // -- Interp in
984     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
985     const CeedScalar qi[5]  = {q[0][i], q[1][i], q[2][i], q[3][i], q[4][i]};
986     State s = StateFromQi(context, qi, x_i);
987     s.Y.pressure = P0;
988 
989     // -- Interp-to-Interp q_data
990     // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
991     // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
992     // We can effect this by swapping the sign on this weight
993     const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
994 
995     // ---- Normal vect
996     const CeedScalar norm[3] = {q_data_sur[1][i],
997                                 q_data_sur[2][i],
998                                 q_data_sur[3][i]
999                                };
1000 
1001     const CeedScalar dXdx[2][3] = {
1002       {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
1003       {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
1004     };
1005 
1006     State grad_s[3];
1007     for (CeedInt j=0; j<3; j++) {
1008       CeedScalar dx_i[3] = {0}, dqi[5];
1009       for (CeedInt k=0; k<5; k++)
1010         dqi[k] = Grad_q[0][k][i] * dXdx[0][j] +
1011                  Grad_q[1][k][i] * dXdx[1][j];
1012       dx_i[j] = 1.;
1013       grad_s[j] = StateFromQi_fwd(context, s, dqi, x_i, dx_i);
1014     }
1015 
1016     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
1017     KMStrainRate(grad_s, strain_rate);
1018     NewtonianStress(context, strain_rate, kmstress);
1019     KMUnpack(kmstress, stress);
1020     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
1021 
1022     StateConservative F_inviscid[3];
1023     FluxInviscid(context, s, F_inviscid);
1024 
1025     CeedScalar Flux[5] = {0.};
1026     for (int j=0; j<3; j++) {
1027       Flux[0] += F_inviscid[j].density * norm[j];
1028       for (int k=0; k<3; k++)
1029         Flux[k+1] += (F_inviscid[j].momentum[k] - stress[k][j]) * norm[j];
1030       Flux[4] += (F_inviscid[j].E_total + Fe[j])*norm[j];
1031     }
1032 
1033     // -- Density
1034     v[0][i] = -wdetJb * Flux[0];
1035 
1036     // -- Momentum
1037     for (CeedInt j=0; j<3; j++)
1038       v[j+1][i] = -wdetJb * Flux[j+1];
1039 
1040     // -- Total Energy Density
1041     v[4][i] = -wdetJb * Flux[4];
1042 
1043     // Save values for Jacobian
1044     if (context->is_primitive) {
1045       jac_data_sur[0][i] = s.Y.pressure;
1046       for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.Y.velocity[j];
1047       jac_data_sur[4][i] = s.Y.temperature;
1048     } else {
1049       jac_data_sur[0][i] = s.U.density;
1050       for (int j=0; j<3; j++) jac_data_sur[j+1][i] = s.U.momentum[j];
1051       jac_data_sur[4][i] = s.U.E_total;
1052     }
1053     for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j];
1054   } // End Quadrature Point Loop
1055   return 0;
1056 }
1057 
1058 // Jacobian for weak-pressure outflow boundary condition
1059 CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q,
1060     const CeedScalar *const *in, CeedScalar *const *out) {
1061   // *INDENT-OFF*
1062   // Inputs
1063   const CeedScalar (*dq)[CEED_Q_VLA]           = (const CeedScalar(*)[CEED_Q_VLA])in[0],
1064                    (*Grad_dq)[5][CEED_Q_VLA]   = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
1065                    (*q_data_sur)[CEED_Q_VLA]   = (const CeedScalar(*)[CEED_Q_VLA])in[2],
1066                    (*x)[CEED_Q_VLA]            = (const CeedScalar(*)[CEED_Q_VLA])in[3],
1067                    (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
1068   // Outputs
1069   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
1070   // *INDENT-ON*
1071 
1072   const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
1073   const bool implicit     = context->is_implicit;
1074 
1075   State (*StateFromQi)(NewtonianIdealGasContext gas,
1076                        const CeedScalar qi[5], const CeedScalar x[3]);
1077   State (*StateFromQi_fwd)(NewtonianIdealGasContext gas,
1078                            State s, const CeedScalar dQi[5],
1079                            const CeedScalar x[3], const CeedScalar dx[3]);
1080   StateFromQi     = context->is_primitive ? &StateFromY     : &StateFromU;
1081   StateFromQi_fwd = context->is_primitive ? &StateFromY_fwd : &StateFromU_fwd;
1082 
1083   CeedPragmaSIMD
1084   // Quadrature Point Loop
1085   for (CeedInt i=0; i<Q; i++) {
1086     const CeedScalar x_i[3]  = {x[0][i], x[1][i], x[2][i]};
1087     const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
1088     const CeedScalar norm[3] = {q_data_sur[1][i],
1089                                 q_data_sur[2][i],
1090                                 q_data_sur[3][i]
1091                                };
1092     const CeedScalar dXdx[2][3] = {
1093       {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
1094       {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
1095     };
1096 
1097     CeedScalar qi[5], kmstress[6], dqi[5], dx_i[3] = {0.};
1098     for (int j=0; j<5; j++) qi[j]    = jac_data_sur[j][i];
1099     for (int j=0; j<6; j++) kmstress[j] = jac_data_sur[5+j][i];
1100     for (int j=0; j<5; j++) dqi[j]   = dq[j][i];
1101 
1102     State s  = StateFromQi(context, qi, x_i);
1103     State ds = StateFromQi_fwd(context, s, dqi, x_i, dx_i);
1104     s.Y.pressure  = context->P0;
1105     ds.Y.pressure = 0.;
1106 
1107     State grad_ds[3];
1108     for (CeedInt j=0; j<3; j++) {
1109       CeedScalar dx_i[3] = {0}, dqi_j[5];
1110       for (CeedInt k=0; k<5; k++)
1111         dqi_j[k] = Grad_dq[0][k][i] * dXdx[0][j] +
1112                    Grad_dq[1][k][i] * dXdx[1][j];
1113       dx_i[j] = 1.;
1114       grad_ds[j] = StateFromQi_fwd(context, s, dqi_j, x_i, dx_i);
1115     }
1116 
1117     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
1118     KMStrainRate(grad_ds, dstrain_rate);
1119     NewtonianStress(context, dstrain_rate, dkmstress);
1120     KMUnpack(dkmstress, dstress);
1121     KMUnpack(kmstress, stress);
1122     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
1123 
1124     StateConservative dF_inviscid[3];
1125     FluxInviscid_fwd(context, s, ds, dF_inviscid);
1126 
1127     CeedScalar dFlux[5] = {0.};
1128     for (int j=0; j<3; j++) {
1129       dFlux[0] += dF_inviscid[j].density * norm[j];
1130       for (int k=0; k<3; k++)
1131         dFlux[k+1] += (dF_inviscid[j].momentum[k] - dstress[k][j]) * norm[j];
1132       dFlux[4] += (dF_inviscid[j].E_total + dFe[j]) * norm[j];
1133     }
1134 
1135     for (int j=0; j<5; j++)
1136       v[j][i] = -wdetJb * dFlux[j];
1137   } // End Quadrature Point Loop
1138   return 0;
1139 }
1140 
1141 // *****************************************************************************
1142 // This QFunction implements the Navier-Stokes equations (mentioned above) in
1143 //   primitive variables and with implicit time stepping method
1144 //
1145 // *****************************************************************************
1146 CEED_QFUNCTION(IFunction_Newtonian_Prim)(void *ctx, CeedInt Q,
1147     const CeedScalar *const *in, CeedScalar *const *out) {
1148   // *INDENT-OFF*
1149   // Inputs
1150   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
1151                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
1152                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
1153                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
1154                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
1155   // Outputs
1156   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
1157              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1],
1158              (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2];
1159   // *INDENT-ON*
1160   // Context
1161   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
1162   const CeedScalar mu     = context->mu;
1163   const CeedScalar cv     = context->cv;
1164   const CeedScalar cp     = context->cp;
1165   const CeedScalar *g     = context->g;
1166   const CeedScalar dt     = context->dt;
1167   const CeedScalar gamma  = cp / cv;
1168   const CeedScalar Rd     = cp - cv;
1169 
1170   CeedPragmaSIMD
1171   // Quadrature Point Loop
1172   for (CeedInt i=0; i<Q; i++) {
1173     CeedScalar Y[5];
1174     for (CeedInt j=0; j<5; j++) Y[j] = q[j][i];
1175     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
1176     State s = StateFromY(context, Y, x_i);
1177 
1178     // -- Interp-to-Interp q_data
1179     const CeedScalar wdetJ      =   q_data[0][i];
1180     // -- Interp-to-Grad q_data
1181     // ---- Inverse of change of coordinate matrix: X_i,j
1182     // *INDENT-OFF*
1183     const CeedScalar dXdx[3][3] = {{q_data[1][i],
1184                                     q_data[2][i],
1185                                     q_data[3][i]},
1186                                    {q_data[4][i],
1187                                     q_data[5][i],
1188                                     q_data[6][i]},
1189                                    {q_data[7][i],
1190                                     q_data[8][i],
1191                                     q_data[9][i]}
1192                                   };
1193     // *INDENT-ON*
1194     State grad_s[3];
1195     for (CeedInt j=0; j<3; j++) {
1196       CeedScalar dx_i[3] = {0}, dY[5];
1197       for (CeedInt k=0; k<5; k++)
1198         dY[k] = Grad_q[0][k][i] * dXdx[0][j] +
1199                 Grad_q[1][k][i] * dXdx[1][j] +
1200                 Grad_q[2][k][i] * dXdx[2][j];
1201       dx_i[j] = 1.;
1202       grad_s[j] = StateFromY_fwd(context, s, dY, x_i, dx_i);
1203     }
1204 
1205     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
1206     KMStrainRate(grad_s, strain_rate);
1207     NewtonianStress(context, strain_rate, kmstress);
1208     KMUnpack(kmstress, stress);
1209     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
1210 
1211     StateConservative F_inviscid[3];
1212     FluxInviscid(context, s, F_inviscid);
1213 
1214     // Total flux
1215     CeedScalar Flux[5][3];
1216     for (CeedInt j=0; j<3; j++) {
1217       Flux[0][j] = F_inviscid[j].density;
1218       for (CeedInt k=0; k<3; k++)
1219         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
1220       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
1221     }
1222 
1223     for (CeedInt j=0; j<3; j++) {
1224       for (CeedInt k=0; k<5; k++) {
1225         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] +
1226                                     dXdx[j][1] * Flux[k][1] +
1227                                     dXdx[j][2] * Flux[k][2]);
1228       }
1229     }
1230 
1231     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
1232 
1233     CeedScalar Y_dot[5], dx0[3] = {0};
1234     for (int j=0; j<5; j++) Y_dot[j] = q_dot[j][i];
1235     State s_dot = StateFromY_fwd(context, s, Y_dot, x_i, dx0);
1236 
1237     CeedScalar U_dot[5] = {0.};
1238     U_dot[0] = s_dot.U.density;
1239     for (CeedInt j=0; j<3; j++)
1240       U_dot[j+1] = s_dot.U.momentum[j];
1241     U_dot[4] = s_dot.U.E_total;
1242 
1243     for (CeedInt j=0; j<5; j++)
1244       v[j][i] = wdetJ * (U_dot[j] - body_force[j]);
1245 
1246     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
1247     CeedScalar jacob_F_conv[3][5][5] = {0};
1248     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
1249                            gamma, g, x_i);
1250     CeedScalar grad_U[5][3];
1251     for (CeedInt j=0; j<3; j++) {
1252       grad_U[0][j] = grad_s[j].U.density;
1253       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
1254       grad_U[4][j] = grad_s[j].U.E_total;
1255     }
1256 
1257     // strong_conv = dF/dq * dq/dx    (Strong convection)
1258     CeedScalar strong_conv[5] = {0};
1259     for (CeedInt j=0; j<3; j++)
1260       for (CeedInt k=0; k<5; k++)
1261         for (CeedInt l=0; l<5; l++)
1262           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
1263 
1264     // Strong residual
1265     CeedScalar strong_res[5];
1266     for (CeedInt j=0; j<5; j++)
1267       strong_res[j] = U_dot[j] + strong_conv[j] - body_force[j];
1268 
1269     // -- Stabilization method: none, SU, or SUPG
1270     CeedScalar stab[5][3] = {{0.}};
1271     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
1272     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
1273     CeedScalar Tau_d[3] = {0.};
1274     switch (context->stabilization) {
1275     case STAB_NONE:        // Galerkin
1276       break;
1277     case STAB_SU:        // SU
1278       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
1279       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
1280       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
1281       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
1282       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
1283       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
1284       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
1285                                   tau_strong_conv, tau_strong_conv_conservative);
1286       for (CeedInt j=0; j<3; j++)
1287         for (CeedInt k=0; k<5; k++)
1288           for (CeedInt l=0; l<5; l++)
1289             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
1290 
1291       for (CeedInt j=0; j<5; j++)
1292         for (CeedInt k=0; k<3; k++)
1293           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
1294                                     stab[j][1] * dXdx[k][1] +
1295                                     stab[j][2] * dXdx[k][2]);
1296 
1297       break;
1298     case STAB_SUPG:        // SUPG
1299       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
1300       tau_strong_res[0] = Tau_d[0] * strong_res[0];
1301       tau_strong_res[1] = Tau_d[1] * strong_res[1];
1302       tau_strong_res[2] = Tau_d[1] * strong_res[2];
1303       tau_strong_res[3] = Tau_d[1] * strong_res[3];
1304       tau_strong_res[4] = Tau_d[2] * strong_res[4];
1305 
1306       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
1307                                   tau_strong_res, tau_strong_res_conservative);
1308       for (CeedInt j=0; j<3; j++)
1309         for (CeedInt k=0; k<5; k++)
1310           for (CeedInt l=0; l<5; l++)
1311             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
1312 
1313       for (CeedInt j=0; j<5; j++)
1314         for (CeedInt k=0; k<3; k++)
1315           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
1316                                     stab[j][1] * dXdx[k][1] +
1317                                     stab[j][2] * dXdx[k][2]);
1318       break;
1319     }
1320     for (CeedInt j=0; j<5; j++) jac_data[j][i] = Y[j];
1321     for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j];
1322     for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j];
1323 
1324   } // End Quadrature Point Loop
1325 
1326   // Return
1327   return 0;
1328 }
1329 
1330 // *****************************************************************************
1331 // This QFunction implements the jacobean of the Navier-Stokes equations
1332 //   in primitive variables for implicit time stepping method.
1333 //
1334 // *****************************************************************************
1335 CEED_QFUNCTION(IJacobian_Newtonian_Prim)(void *ctx, CeedInt Q,
1336     const CeedScalar *const *in, CeedScalar *const *out) {
1337   // *INDENT-OFF*
1338   // Inputs
1339   const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
1340                    (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
1341                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
1342                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
1343                    (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
1344   // Outputs
1345   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
1346              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
1347   // *INDENT-ON*
1348   // Context
1349   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
1350   const CeedScalar *g = context->g;
1351   const CeedScalar cp = context->cp;
1352   const CeedScalar cv = context->cv;
1353   const CeedScalar Rd = cp - cv;
1354   const CeedScalar gamma = cp / cv;
1355 
1356   CeedPragmaSIMD
1357   // Quadrature Point Loop
1358   for (CeedInt i=0; i<Q; i++) {
1359     // -- Interp-to-Interp q_data
1360     const CeedScalar wdetJ      =   q_data[0][i];
1361     // -- Interp-to-Grad q_data
1362     // ---- Inverse of change of coordinate matrix: X_i,j
1363     // *INDENT-OFF*
1364     const CeedScalar dXdx[3][3] = {{q_data[1][i],
1365                                     q_data[2][i],
1366                                     q_data[3][i]},
1367                                    {q_data[4][i],
1368                                     q_data[5][i],
1369                                     q_data[6][i]},
1370                                    {q_data[7][i],
1371                                     q_data[8][i],
1372                                     q_data[9][i]}
1373                                   };
1374     // *INDENT-ON*
1375 
1376     CeedScalar Y[5], kmstress[6], Tau_d[3] __attribute((unused));
1377     for (int j=0; j<5; j++) Y[j] = jac_data[j][i];
1378     for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i];
1379     for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i];
1380     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
1381     State s = StateFromY(context, Y, x_i);
1382 
1383     CeedScalar dY[5], dx0[3] = {0};
1384     for (int j=0; j<5; j++) dY[j] = dq[j][i];
1385     State ds = StateFromY_fwd(context, s, dY, x_i, dx0);
1386 
1387     State grad_ds[3];
1388     for (int j=0; j<3; j++) {
1389       CeedScalar dYj[5];
1390       for (int k=0; k<5; k++)
1391         dYj[k] = Grad_dq[0][k][i] * dXdx[0][j] +
1392                  Grad_dq[1][k][i] * dXdx[1][j] +
1393                  Grad_dq[2][k][i] * dXdx[2][j];
1394       grad_ds[j] = StateFromY_fwd(context, s, dYj, x_i, dx0);
1395     }
1396 
1397     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
1398     KMStrainRate(grad_ds, dstrain_rate);
1399     NewtonianStress(context, dstrain_rate, dkmstress);
1400     KMUnpack(dkmstress, dstress);
1401     KMUnpack(kmstress, stress);
1402     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
1403 
1404     StateConservative dF_inviscid[3];
1405     FluxInviscid_fwd(context, s, ds, dF_inviscid);
1406 
1407     // Total flux
1408     CeedScalar dFlux[5][3];
1409     for (int j=0; j<3; j++) {
1410       dFlux[0][j] = dF_inviscid[j].density;
1411       for (int k=0; k<3; k++)
1412         dFlux[k+1][j] = dF_inviscid[j].momentum[k] - dstress[k][j];
1413       dFlux[4][j] = dF_inviscid[j].E_total + dFe[j];
1414     }
1415 
1416     for (int j=0; j<3; j++) {
1417       for (int k=0; k<5; k++) {
1418         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * dFlux[k][0] +
1419                                     dXdx[j][1] * dFlux[k][1] +
1420                                     dXdx[j][2] * dFlux[k][2]);
1421       }
1422     }
1423 
1424     const CeedScalar dbody_force[5] = {0,
1425                                        ds.U.density *g[0],
1426                                        ds.U.density *g[1],
1427                                        ds.U.density *g[2],
1428                                        0
1429                                       };
1430     CeedScalar dU[5] = {0.};
1431     dU[0] = ds.U.density;
1432     for (CeedInt j=0; j<3; j++)
1433       dU[j+1] = ds.U.momentum[j];
1434     dU[4] = ds.U.E_total;
1435 
1436     for (int j=0; j<5; j++)
1437       v[j][i] = wdetJ * (context->ijacobian_time_shift * dU[j] - dbody_force[j]);
1438 
1439     if (1) {
1440       CeedScalar jacob_F_conv[3][5][5] = {0};
1441       computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
1442                              gamma, g, x_i);
1443       CeedScalar grad_dU[5][3];
1444       for (int j=0; j<3; j++) {
1445         grad_dU[0][j] = grad_ds[j].U.density;
1446         for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k];
1447         grad_dU[4][j] = grad_ds[j].U.E_total;
1448       }
1449       CeedScalar dstrong_conv[5] = {0.};
1450       for (int j=0; j<3; j++)
1451         for (int k=0; k<5; k++)
1452           for (int l=0; l<5; l++)
1453             dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j];
1454 
1455       CeedScalar dstrong_res[5];
1456       for (int j=0; j<5; j++)
1457         dstrong_res[j] = context->ijacobian_time_shift * dU[j] +
1458                          dstrong_conv[j] -
1459                          dbody_force[j];
1460 
1461       CeedScalar dtau_strong_res[5] = {0.},
1462                                       dtau_strong_res_conservative[5] = {0.};
1463       dtau_strong_res[0] = Tau_d[0] * dstrong_res[0];
1464       dtau_strong_res[1] = Tau_d[1] * dstrong_res[1];
1465       dtau_strong_res[2] = Tau_d[1] * dstrong_res[2];
1466       dtau_strong_res[3] = Tau_d[1] * dstrong_res[3];
1467       dtau_strong_res[4] = Tau_d[2] * dstrong_res[4];
1468       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
1469                                   dtau_strong_res, dtau_strong_res_conservative);
1470       CeedScalar dstab[5][3] = {0};
1471       for (int j=0; j<3; j++)
1472         for (int k=0; k<5; k++)
1473           for (int l=0; l<5; l++)
1474             dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l];
1475 
1476       for (int j=0; j<5; j++)
1477         for (int k=0; k<3; k++)
1478           Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] +
1479                                     dstab[j][1] * dXdx[k][1] +
1480                                     dstab[j][2] * dXdx[k][2]);
1481 
1482     }
1483   } // End Quadrature Point Loop
1484   return 0;
1485 }
1486 // *****************************************************************************
1487 
1488 #endif // newtonian_h
1489