xref: /libCEED/examples/fluids/qfunctions/newtonian.h (revision 3b0d37b7b48e6b9e226afa94eeb793b3e14bfbcc)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Operator for Navier-Stokes example using PETSc
10 
11 
12 #ifndef newtonian_h
13 #define newtonian_h
14 
15 #include <math.h>
16 #include <ceed.h>
17 #include "newtonian_types.h"
18 #include "newtonian_state.h"
19 #include "utils.h"
20 
21 // *****************************************************************************
22 // Helper function for computing flux Jacobian
23 // *****************************************************************************
24 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
25     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
26     const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
27   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
28   CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
29   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
30     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
31       dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
32                       u[i]*u[j];
33       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
34         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
35         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
36                           ((i==k) ? u[j] : 0.) -
37                           ((i==j) ? u[k] : 0.) * (gamma-1.);
38         dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
39                           (gamma-1.)*u[i]*u[k];
40       }
41       dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
42     }
43     dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
44     dF[i][4][4] = u[i] * gamma;
45   }
46 }
47 
48 // *****************************************************************************
49 // Helper function for computing flux Jacobian of Primitive variables
50 // *****************************************************************************
51 CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
52     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
53     const CeedScalar Rd, const CeedScalar cv) {
54   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
55   // TODO Add in gravity's contribution
56 
57   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
58   CeedScalar drdT = -rho / T;
59   CeedScalar drdP = 1. / ( Rd * T);
60   CeedScalar etot =  E / rho ;
61   CeedScalar e2p  = drdP * etot + 1. ;
62   CeedScalar e3p  = ( E  + rho * Rd * T );
63   CeedScalar e4p  = drdT * etot + rho * cv ;
64 
65   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
66     for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
67 //        [row][col] of A_i
68       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
69       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
70         dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
71         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
72                            ((i==k) ? u[j] : 0.) ) * rho;
73         dF[i][4][k+1]   = rho * u[i] * u[k]
74                           + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
75       }
76       dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
77     }
78     dF[i][4][0] = u[i] * e2p; // F^e wrt p
79     dF[i][4][4] = u[i] * e4p; // F^e wrt T
80     dF[i][0][0] = u[i] * drdP; // F^c wrt p
81     dF[i][0][4] = u[i] * drdT; // F^c wrt T
82   }
83 }
84 
85 CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
86     const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
87     const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
88   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
89   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
90   CeedScalar drdT = -rho / T;
91   CeedScalar drdP = 1. / ( Rd * T);
92   dU[0] = drdP * dY[0] + drdT * dY[4];
93   CeedScalar de_kinetic = 0;
94   for (CeedInt i=0; i<3; i++) {
95     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
96     de_kinetic += u[i] * dY[1+i];
97   }
98   dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
99           + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
100 }
101 
102 // *****************************************************************************
103 // Helper function for computing Tau elements (stabilization constant)
104 //   Model from:
105 //     PHASTA
106 //
107 //   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
108 //
109 // Where NOT UPDATED YET
110 // *****************************************************************************
111 CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
112                                         const CeedScalar dXdx[3][3], const CeedScalar u[3],
113                                         const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
114                                         const CeedScalar mu, const CeedScalar dt,
115                                         const CeedScalar rho) {
116   // Context
117   const CeedScalar Ctau_t = newt_ctx->Ctau_t;
118   const CeedScalar Ctau_v = newt_ctx->Ctau_v;
119   const CeedScalar Ctau_C = newt_ctx->Ctau_C;
120   const CeedScalar Ctau_M = newt_ctx->Ctau_M;
121   const CeedScalar Ctau_E = newt_ctx->Ctau_E;
122   CeedScalar gijd[6];
123   CeedScalar tau;
124   CeedScalar dts;
125   CeedScalar fact;
126 
127   //*INDENT-OFF*
128   gijd[0] =   dXdx[0][0] * dXdx[0][0]
129             + dXdx[1][0] * dXdx[1][0]
130             + dXdx[2][0] * dXdx[2][0];
131 
132   gijd[1] =   dXdx[0][0] * dXdx[0][1]
133             + dXdx[1][0] * dXdx[1][1]
134             + dXdx[2][0] * dXdx[2][1];
135 
136   gijd[2] =   dXdx[0][1] * dXdx[0][1]
137             + dXdx[1][1] * dXdx[1][1]
138             + dXdx[2][1] * dXdx[2][1];
139 
140   gijd[3] =   dXdx[0][0] * dXdx[0][2]
141             + dXdx[1][0] * dXdx[1][2]
142             + dXdx[2][0] * dXdx[2][2];
143 
144   gijd[4] =   dXdx[0][1] * dXdx[0][2]
145             + dXdx[1][1] * dXdx[1][2]
146             + dXdx[2][1] * dXdx[2][2];
147 
148   gijd[5] =   dXdx[0][2] * dXdx[0][2]
149             + dXdx[1][2] * dXdx[1][2]
150             + dXdx[2][2] * dXdx[2][2];
151   //*INDENT-ON*
152 
153   dts = Ctau_t / dt ;
154 
155   tau = rho*rho*((4. * dts * dts)
156                  + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
157                  + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
158                  + u[2] *   u[2] * gijd[5])
159         + Ctau_v* mu * mu *
160         (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
161          + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
162 
163   fact=sqrt(tau);
164 
165   Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
166 
167   Tau_d[1] = Ctau_M / fact;
168   Tau_d[2] = Ctau_E / ( fact * cv );
169 
170 // consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
171 //  to avoid a division if the compiler is smart enough to see that cv IS
172 // a constant that it could invert once for all elements
173 // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
174 // OR we could absorb cv into Ctau_E but this puts more burden on user to
175 // know how to change constants with a change of fluid or units.  Same for
176 // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
177 }
178 
179 // *****************************************************************************
180 // This QFunction sets a "still" initial condition for generic Newtonian IG problems
181 // *****************************************************************************
182 CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
183                                const CeedScalar *const *in, CeedScalar *const *out) {
184   // Inputs
185   const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
186 
187   // Outputs
188   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
189 
190   // Context
191   const SetupContext context = (SetupContext)ctx;
192   const CeedScalar theta0    = context->theta0;
193   const CeedScalar P0        = context->P0;
194   const CeedScalar cv        = context->cv;
195   const CeedScalar cp        = context->cp;
196   const CeedScalar *g        = context->g;
197   const CeedScalar Rd        = cp - cv;
198 
199   // Quadrature Point Loop
200   CeedPragmaSIMD
201   for (CeedInt i=0; i<Q; i++) {
202     CeedScalar q[5] = {0.};
203 
204     // Setup
205     // -- Coordinates
206     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
207     const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
208 
209     // -- Density
210     const CeedScalar rho = P0 / (Rd*theta0);
211 
212     // Initial Conditions
213     q[0] = rho;
214     q[1] = 0.0;
215     q[2] = 0.0;
216     q[3] = 0.0;
217     q[4] = rho * (cv*theta0 + e_potential);
218 
219     for (CeedInt j=0; j<5; j++)
220       q0[j][i] = q[j];
221   } // End of Quadrature Point Loop
222   return 0;
223 }
224 
225 // *****************************************************************************
226 // This QFunction implements the following formulation of Navier-Stokes with
227 //   explicit time stepping method
228 //
229 // This is 3D compressible Navier-Stokes in conservation form with state
230 //   variables of density, momentum density, and total energy density.
231 //
232 // State Variables: q = ( rho, U1, U2, U3, E )
233 //   rho - Mass Density
234 //   Ui  - Momentum Density,      Ui = rho ui
235 //   E   - Total Energy Density,  E  = rho (cv T + (u u)/2 + g z)
236 //
237 // Navier-Stokes Equations:
238 //   drho/dt + div( U )                               = 0
239 //   dU/dt   + div( rho (u x u) + P I3 ) + rho g khat = div( Fu )
240 //   dE/dt   + div( (E + P) u )                       = div( Fe )
241 //
242 // Viscous Stress:
243 //   Fu = mu (grad( u ) + grad( u )^T + lambda div ( u ) I3)
244 //
245 // Thermal Stress:
246 //   Fe = u Fu + k grad( T )
247 // Equation of State
248 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
249 //
250 // Stabilization:
251 //   Tau = diag(TauC, TauM, TauM, TauM, TauE)
252 //     f1 = rho  sqrt(ui uj gij)
253 //     gij = dXi/dX * dXi/dX
254 //     TauC = Cc f1 / (8 gii)
255 //     TauM = min( 1 , 1 / f1 )
256 //     TauE = TauM / (Ce cv)
257 //
258 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
259 //
260 // Constants:
261 //   lambda = - 2 / 3,  From Stokes hypothesis
262 //   mu              ,  Dynamic viscosity
263 //   k               ,  Thermal conductivity
264 //   cv              ,  Specific heat, constant volume
265 //   cp              ,  Specific heat, constant pressure
266 //   g               ,  Gravity
267 //   gamma  = cp / cv,  Specific heat ratio
268 //
269 // We require the product of the inverse of the Jacobian (dXdx_j,k) and
270 // its transpose (dXdx_k,j) to properly compute integrals of the form:
271 // int( gradv gradu )
272 //
273 // *****************************************************************************
274 CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q,
275                                       const CeedScalar *const *in, CeedScalar *const *out) {
276   // *INDENT-OFF*
277   // Inputs
278   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
279                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
280                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
281                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3];
282   // Outputs
283   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
284              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
285   // *INDENT-ON*
286 
287   // Context
288   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
289   const CeedScalar mu     = context->mu;
290   const CeedScalar cv     = context->cv;
291   const CeedScalar cp     = context->cp;
292   const CeedScalar *g     = context->g;
293   const CeedScalar dt     = context->dt;
294   const CeedScalar gamma  = cp / cv;
295   const CeedScalar Rd     = cp - cv;
296 
297   CeedPragmaSIMD
298   // Quadrature Point Loop
299   for (CeedInt i=0; i<Q; i++) {
300     CeedScalar U[5];
301     for (int j=0; j<5; j++) U[j] = q[j][i];
302     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
303     State s = StateFromU(context, U, x_i);
304 
305     // -- Interp-to-Interp q_data
306     const CeedScalar wdetJ      =   q_data[0][i];
307     // -- Interp-to-Grad q_data
308     // ---- Inverse of change of coordinate matrix: X_i,j
309     // *INDENT-OFF*
310     const CeedScalar dXdx[3][3] = {{q_data[1][i],
311                                     q_data[2][i],
312                                     q_data[3][i]},
313                                    {q_data[4][i],
314                                     q_data[5][i],
315                                     q_data[6][i]},
316                                    {q_data[7][i],
317                                     q_data[8][i],
318                                     q_data[9][i]}
319                                   };
320     // *INDENT-ON*
321 
322     State grad_s[3];
323     for (CeedInt j=0; j<3; j++) {
324       CeedScalar dx_i[3] = {0}, dU[5];
325       for (CeedInt k=0; k<5; k++)
326         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
327                 Grad_q[1][k][i] * dXdx[1][j] +
328                 Grad_q[2][k][i] * dXdx[2][j];
329       dx_i[j] = 1.;
330       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
331     }
332 
333     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
334     KMStrainRate(grad_s, strain_rate);
335     NewtonianStress(context, strain_rate, kmstress);
336     KMUnpack(kmstress, stress);
337     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
338 
339     StateConservative F_inviscid[3];
340     FluxInviscid(context, s, F_inviscid);
341 
342     // Total flux
343     CeedScalar Flux[5][3];
344     for (CeedInt j=0; j<3; j++) {
345       Flux[0][j] = F_inviscid[j].density;
346       for (CeedInt k=0; k<3; k++)
347         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
348       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
349     }
350 
351     for (CeedInt j=0; j<3; j++) {
352       for (CeedInt k=0; k<5; k++) {
353         Grad_v[j][k][i] = wdetJ * (dXdx[j][0] * Flux[k][0] +
354                                    dXdx[j][1] * Flux[k][1] +
355                                    dXdx[j][2] * Flux[k][2]);
356       }
357     }
358 
359     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
360     for (int j=0; j<5; j++)
361       v[j][i] = wdetJ * body_force[j];
362 
363     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
364     CeedScalar jacob_F_conv[3][5][5] = {0};
365     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
366                            gamma, g, x_i);
367     CeedScalar grad_U[5][3];
368     for (CeedInt j=0; j<3; j++) {
369       grad_U[0][j] = grad_s[j].U.density;
370       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
371       grad_U[4][j] = grad_s[j].U.E_total;
372     }
373 
374     // strong_conv = dF/dq * dq/dx    (Strong convection)
375     CeedScalar strong_conv[5] = {0};
376     for (CeedInt j=0; j<3; j++)
377       for (CeedInt k=0; k<5; k++)
378         for (CeedInt l=0; l<5; l++)
379           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
380 
381     // -- Stabilization method: none, SU, or SUPG
382     CeedScalar stab[5][3] = {{0.}};
383     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
384     CeedScalar Tau_d[3] = {0.};
385     switch (context->stabilization) {
386     case STAB_NONE:        // Galerkin
387       break;
388     case STAB_SU:        // SU
389       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
390       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
391       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
392       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
393       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
394       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
395       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
396                                   tau_strong_conv,
397                                   tau_strong_conv_conservative);
398       for (CeedInt j=0; j<3; j++)
399         for (CeedInt k=0; k<5; k++)
400           for (CeedInt l=0; l<5; l++)
401             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
402 
403       for (CeedInt j=0; j<5; j++)
404         for (CeedInt k=0; k<3; k++)
405           Grad_v[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
406                                     stab[j][1] * dXdx[k][1] +
407                                     stab[j][2] * dXdx[k][2]);
408       break;
409     case STAB_SUPG:        // SUPG is not implemented for explicit scheme
410       break;
411     }
412 
413   } // End Quadrature Point Loop
414 
415   // Return
416   return 0;
417 }
418 
419 // *****************************************************************************
420 // This QFunction implements the Navier-Stokes equations (mentioned above) with
421 //   implicit time stepping method
422 //
423 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
424 //  SUPG = Galerkin + grad(v) . ( Ai^T * Tau * (q_dot + Aj q,j - body force) )
425 //                                       (diffussive terms will be added later)
426 //
427 // *****************************************************************************
428 CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
429                                     const CeedScalar *const *in,
430                                     CeedScalar *const *out) {
431   // *INDENT-OFF*
432   // Inputs
433   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
434                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
435                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
436                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
437                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
438   // Outputs
439   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
440              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1],
441              (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2];
442   // *INDENT-ON*
443   // Context
444   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
445   const CeedScalar mu     = context->mu;
446   const CeedScalar cv     = context->cv;
447   const CeedScalar cp     = context->cp;
448   const CeedScalar *g     = context->g;
449   const CeedScalar dt     = context->dt;
450   const CeedScalar gamma  = cp / cv;
451   const CeedScalar Rd     = cp-cv;
452 
453   CeedPragmaSIMD
454   // Quadrature Point Loop
455   for (CeedInt i=0; i<Q; i++) {
456     CeedScalar U[5];
457     for (CeedInt j=0; j<5; j++) U[j] = q[j][i];
458     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
459     State s = StateFromU(context, U, x_i);
460 
461     // -- Interp-to-Interp q_data
462     const CeedScalar wdetJ      =   q_data[0][i];
463     // -- Interp-to-Grad q_data
464     // ---- Inverse of change of coordinate matrix: X_i,j
465     // *INDENT-OFF*
466     const CeedScalar dXdx[3][3] = {{q_data[1][i],
467                                     q_data[2][i],
468                                     q_data[3][i]},
469                                    {q_data[4][i],
470                                     q_data[5][i],
471                                     q_data[6][i]},
472                                    {q_data[7][i],
473                                     q_data[8][i],
474                                     q_data[9][i]}
475                                   };
476     // *INDENT-ON*
477     State grad_s[3];
478     for (CeedInt j=0; j<3; j++) {
479       CeedScalar dx_i[3] = {0}, dU[5];
480       for (CeedInt k=0; k<5; k++)
481         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
482                 Grad_q[1][k][i] * dXdx[1][j] +
483                 Grad_q[2][k][i] * dXdx[2][j];
484       dx_i[j] = 1.;
485       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
486     }
487 
488     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
489     KMStrainRate(grad_s, strain_rate);
490     NewtonianStress(context, strain_rate, kmstress);
491     KMUnpack(kmstress, stress);
492     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
493 
494     StateConservative F_inviscid[3];
495     FluxInviscid(context, s, F_inviscid);
496 
497 
498     // Total flux
499     CeedScalar Flux[5][3];
500     for (CeedInt j=0; j<3; j++) {
501       Flux[0][j] = F_inviscid[j].density;
502       for (CeedInt k=0; k<3; k++)
503         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
504       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
505     }
506 
507     for (CeedInt j=0; j<3; j++) {
508       for (CeedInt k=0; k<5; k++) {
509         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] +
510                                     dXdx[j][1] * Flux[k][1] +
511                                     dXdx[j][2] * Flux[k][2]);
512       }
513     }
514 
515     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
516     for (CeedInt j=0; j<5; j++)
517       v[j][i] = wdetJ * (q_dot[j][i] - body_force[j]);
518 
519     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
520     CeedScalar jacob_F_conv[3][5][5] = {0};
521     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
522                            gamma, g, x_i);
523     CeedScalar grad_U[5][3];
524     for (CeedInt j=0; j<3; j++) {
525       grad_U[0][j] = grad_s[j].U.density;
526       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
527       grad_U[4][j] = grad_s[j].U.E_total;
528     }
529 
530     // strong_conv = dF/dq * dq/dx    (Strong convection)
531     CeedScalar strong_conv[5] = {0};
532     for (CeedInt j=0; j<3; j++)
533       for (CeedInt k=0; k<5; k++)
534         for (CeedInt l=0; l<5; l++)
535           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
536 
537     // Strong residual
538     CeedScalar strong_res[5];
539     for (CeedInt j=0; j<5; j++)
540       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
541 
542     // -- Stabilization method: none, SU, or SUPG
543     CeedScalar stab[5][3] = {{0.}};
544     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
545     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
546     CeedScalar Tau_d[3] = {0.};
547     switch (context->stabilization) {
548     case STAB_NONE:        // Galerkin
549       break;
550     case STAB_SU:        // SU
551       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
552       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
553       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
554       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
555       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
556       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
557       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
558                                   tau_strong_conv, tau_strong_conv_conservative);
559       for (CeedInt j=0; j<3; j++)
560         for (CeedInt k=0; k<5; k++)
561           for (CeedInt l=0; l<5; l++)
562             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
563 
564       for (CeedInt j=0; j<5; j++)
565         for (CeedInt k=0; k<3; k++)
566           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
567                                     stab[j][1] * dXdx[k][1] +
568                                     stab[j][2] * dXdx[k][2]);
569 
570       break;
571     case STAB_SUPG:        // SUPG
572       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
573       tau_strong_res[0] = Tau_d[0] * strong_res[0];
574       tau_strong_res[1] = Tau_d[1] * strong_res[1];
575       tau_strong_res[2] = Tau_d[1] * strong_res[2];
576       tau_strong_res[3] = Tau_d[1] * strong_res[3];
577       tau_strong_res[4] = Tau_d[2] * strong_res[4];
578 // Alternate route (useful later with primitive variable code)
579 // this function was verified against PHASTA for as IC that was as close as possible
580 //    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
581 // it has also been verified to compute a correct through the following
582 //   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
583 // applied in the triple loop below
584 //  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
585       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
586                                   tau_strong_res, tau_strong_res_conservative);
587       for (CeedInt j=0; j<3; j++)
588         for (CeedInt k=0; k<5; k++)
589           for (CeedInt l=0; l<5; l++)
590             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
591 
592       for (CeedInt j=0; j<5; j++)
593         for (CeedInt k=0; k<3; k++)
594           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
595                                     stab[j][1] * dXdx[k][1] +
596                                     stab[j][2] * dXdx[k][2]);
597       break;
598     }
599     for (CeedInt j=0; j<5; j++) jac_data[j][i] = U[j];
600     for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j];
601     for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j];
602 
603   } // End Quadrature Point Loop
604 
605   // Return
606   return 0;
607 }
608 
609 CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q,
610                                     const CeedScalar *const *in,
611                                     CeedScalar *const *out) {
612   // *INDENT-OFF*
613   // Inputs
614   const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
615                    (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
616                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
617                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
618                    (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
619   // Outputs
620   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
621              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
622   // *INDENT-ON*
623   // Context
624   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
625   const CeedScalar *g = context->g;
626   const CeedScalar cp = context->cp;
627   const CeedScalar cv = context->cv;
628   const CeedScalar Rd = cp - cv;
629   const CeedScalar gamma = cp / cv;
630 
631   CeedPragmaSIMD
632   // Quadrature Point Loop
633   for (CeedInt i=0; i<Q; i++) {
634     // -- Interp-to-Interp q_data
635     const CeedScalar wdetJ      =   q_data[0][i];
636     // -- Interp-to-Grad q_data
637     // ---- Inverse of change of coordinate matrix: X_i,j
638     // *INDENT-OFF*
639     const CeedScalar dXdx[3][3] = {{q_data[1][i],
640                                     q_data[2][i],
641                                     q_data[3][i]},
642                                    {q_data[4][i],
643                                     q_data[5][i],
644                                     q_data[6][i]},
645                                    {q_data[7][i],
646                                     q_data[8][i],
647                                     q_data[9][i]}
648                                   };
649     // *INDENT-ON*
650 
651     CeedScalar U[5], kmstress[6], Tau_d[3] __attribute((unused));
652     for (int j=0; j<5; j++) U[j] = jac_data[j][i];
653     for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i];
654     for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i];
655     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
656     State s = StateFromU(context, U, x_i);
657 
658     CeedScalar dU[5], dx0[3] = {0};
659     for (int j=0; j<5; j++) dU[j] = dq[j][i];
660     State ds = StateFromU_fwd(context, s, dU, x_i, dx0);
661 
662     State grad_ds[3];
663     for (int j=0; j<3; j++) {
664       CeedScalar dUj[5];
665       for (int k=0; k<5; k++) dUj[k] = Grad_dq[0][k][i] * dXdx[0][j]
666                                          + Grad_dq[1][k][i] * dXdx[1][j]
667                                          + Grad_dq[2][k][i] * dXdx[2][j];
668       grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx0);
669     }
670 
671     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
672     KMStrainRate(grad_ds, dstrain_rate);
673     NewtonianStress(context, dstrain_rate, dkmstress);
674     KMUnpack(dkmstress, dstress);
675     KMUnpack(kmstress, stress);
676     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
677 
678     StateConservative dF_inviscid[3];
679     FluxInviscid_fwd(context, s, ds, dF_inviscid);
680 
681     // Total flux
682     CeedScalar dFlux[5][3];
683     for (int j=0; j<3; j++) {
684       dFlux[0][j] = dF_inviscid[j].density;
685       for (int k=0; k<3; k++)
686         dFlux[k+1][j] = dF_inviscid[j].momentum[k] - dstress[k][j];
687       dFlux[4][j] = dF_inviscid[j].E_total + dFe[j];
688     }
689 
690     for (int j=0; j<3; j++) {
691       for (int k=0; k<5; k++) {
692         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * dFlux[k][0] +
693                                     dXdx[j][1] * dFlux[k][1] +
694                                     dXdx[j][2] * dFlux[k][2]);
695       }
696     }
697 
698     const CeedScalar dbody_force[5] = {0, ds.U.density *g[0], ds.U.density *g[1], ds.U.density *g[2], 0};
699     for (int j=0; j<5; j++)
700       v[j][i] = wdetJ * (context->ijacobian_time_shift * dU[j] - dbody_force[j]);
701 
702     if (1) {
703       CeedScalar jacob_F_conv[3][5][5] = {0};
704       computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
705                              gamma, g, x_i);
706       CeedScalar grad_dU[5][3];
707       for (int j=0; j<3; j++) {
708         grad_dU[0][j] = grad_ds[j].U.density;
709         for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k];
710         grad_dU[4][j] = grad_ds[j].U.E_total;
711       }
712       CeedScalar dstrong_conv[5] = {0};
713       for (int j=0; j<3; j++)
714         for (int k=0; k<5; k++)
715           for (int l=0; l<5; l++)
716             dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j];
717       CeedScalar dstrong_res[5];
718       for (int j=0; j<5; j++)
719         dstrong_res[j] = context->ijacobian_time_shift * dU[j] + dstrong_conv[j] -
720                          dbody_force[j];
721       CeedScalar dtau_strong_res[5] = {0.}, dtau_strong_res_conservative[5] = {0};
722       dtau_strong_res[0] = Tau_d[0] * dstrong_res[0];
723       dtau_strong_res[1] = Tau_d[1] * dstrong_res[1];
724       dtau_strong_res[2] = Tau_d[1] * dstrong_res[2];
725       dtau_strong_res[3] = Tau_d[1] * dstrong_res[3];
726       dtau_strong_res[4] = Tau_d[2] * dstrong_res[4];
727       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
728                                   dtau_strong_res, dtau_strong_res_conservative);
729       CeedScalar dstab[5][3] = {0};
730       for (int j=0; j<3; j++)
731         for (int k=0; k<5; k++)
732           for (int l=0; l<5; l++)
733             dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l];
734       for (int j=0; j<5; j++)
735         for (int k=0; k<3; k++)
736           Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] +
737                                     dstab[j][1] * dXdx[k][1] +
738                                     dstab[j][2] * dXdx[k][2]);
739 
740     }
741   } // End Quadrature Point Loop
742   return 0;
743 }
744 
745 // Compute boundary integral (ie. for strongly set inflows)
746 CEED_QFUNCTION(BoundaryIntegral)(void *ctx, CeedInt Q,
747                                  const CeedScalar *const *in,
748                                  CeedScalar *const *out) {
749 
750   //*INDENT-OFF*
751   const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
752                    (*Grad_q)[5][CEED_Q_VLA]  = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
753                    (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
754                    (*x)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[3];
755 
756   CeedScalar (*v)[CEED_Q_VLA]            = (CeedScalar(*)[CEED_Q_VLA]) out[0],
757              (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA]) out[1];
758 
759   //*INDENT-ON*
760 
761   const NewtonianIdealGasContext context = (NewtonianIdealGasContext) ctx;
762   const bool is_implicit  = context->is_implicit;
763 
764   CeedPragmaSIMD
765   for(CeedInt i=0; i<Q; i++) {
766     const CeedScalar U[5]   = {q[0][i], q[1][i], q[2][i], q[3][i], q[4][i]};
767     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
768     const State      s      = StateFromU(context, U, x_i);
769 
770     const CeedScalar wdetJb  = (is_implicit ? -1. : 1.) * q_data_sur[0][i];
771     // ---- Normal vect
772     const CeedScalar norm[3] = {q_data_sur[1][i],
773                                 q_data_sur[2][i],
774                                 q_data_sur[3][i]
775                                };
776 
777     const CeedScalar dXdx[2][3] = {
778       {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
779       {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
780     };
781 
782     State grad_s[3];
783     for (CeedInt j=0; j<3; j++) {
784       CeedScalar dx_i[3] = {0}, dU[5];
785       for (CeedInt k=0; k<5; k++)
786         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
787                 Grad_q[1][k][i] * dXdx[1][j];
788       dx_i[j] = 1.;
789       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
790     }
791 
792     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
793     KMStrainRate(grad_s, strain_rate);
794     NewtonianStress(context, strain_rate, kmstress);
795     KMUnpack(kmstress, stress);
796     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
797 
798     StateConservative F_inviscid[3];
799     FluxInviscid(context, s, F_inviscid);
800 
801     CeedScalar Flux[5] = {0.};
802     for (int j=0; j<3; j++) {
803       Flux[0] += F_inviscid[j].density * norm[j];
804       for (int k=0; k<3; k++)
805         Flux[k+1] += (F_inviscid[j].momentum[k] - stress[k][j]) * norm[j];
806       Flux[4] += (F_inviscid[j].E_total + Fe[j])*norm[j];
807     }
808 
809     // -- Density
810     v[0][i] = -wdetJb * Flux[0];
811 
812     // -- Momentum
813     for (CeedInt j=0; j<3; j++)
814       v[j+1][i] = -wdetJb * Flux[j+1];
815 
816     // -- Total Energy Density
817     v[4][i] = -wdetJb * Flux[4];
818 
819     jac_data_sur[0][i] = s.U.density;
820     jac_data_sur[1][i] = s.Y.velocity[0];
821     jac_data_sur[2][i] = s.Y.velocity[1];
822     jac_data_sur[3][i] = s.Y.velocity[2];
823     jac_data_sur[4][i] = s.U.E_total;
824     for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j];
825   }
826   return 0;
827 }
828 
829 // Jacobian for "set nothing" boundary integral
830 CEED_QFUNCTION(BoundaryIntegral_Jacobian)(void *ctx, CeedInt Q,
831     const CeedScalar *const *in,
832     CeedScalar *const *out) {
833   // *INDENT-OFF*
834   // Inputs
835   const CeedScalar (*dq)[CEED_Q_VLA]           = (const CeedScalar(*)[CEED_Q_VLA])in[0],
836                    (*Grad_dq)[5][CEED_Q_VLA]   = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
837                    (*q_data_sur)[CEED_Q_VLA]   = (const CeedScalar(*)[CEED_Q_VLA])in[2],
838                    (*x)[CEED_Q_VLA]            = (const CeedScalar(*)[CEED_Q_VLA])in[3],
839                    (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
840   // Outputs
841   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
842   // *INDENT-ON*
843 
844   const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
845   const bool implicit     = context->is_implicit;
846 
847   CeedPragmaSIMD
848   // Quadrature Point Loop
849   for (CeedInt i=0; i<Q; i++) {
850     const CeedScalar x_i[3]  = {x[0][i], x[1][i], x[2][i]};
851     const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
852     const CeedScalar norm[3] = {q_data_sur[1][i],
853                                 q_data_sur[2][i],
854                                 q_data_sur[3][i]
855                                };
856     const CeedScalar dXdx[2][3] = {
857       {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
858       {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
859     };
860 
861     CeedScalar U[5], kmstress[6], dU[5], dx_i[3] = {0.};
862     for (int j=0; j<5; j++) U[j]         = jac_data_sur[j][i];
863     for (int j=0; j<6; j++) kmstress[j]  = jac_data_sur[5+j][i];
864     for (int j=0; j<3; j++) U[j+1]      *= U[0];
865     for (int j=0; j<5; j++) dU[j]        = dq[j][i];
866     State s  = StateFromU(context, U, x_i);
867     State ds = StateFromU_fwd(context, s, dU, x_i, dx_i);
868 
869     State grad_ds[3];
870     for (CeedInt j=0; j<3; j++) {
871       CeedScalar dx_i[3] = {0}, dUj[5];
872       for (CeedInt k=0; k<5; k++)
873         dUj[k] = Grad_dq[0][k][i] * dXdx[0][j] +
874                  Grad_dq[1][k][i] * dXdx[1][j];
875       dx_i[j] = 1.;
876       grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx_i);
877     }
878 
879     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
880     KMStrainRate(grad_ds, dstrain_rate);
881     NewtonianStress(context, dstrain_rate, dkmstress);
882     KMUnpack(dkmstress, dstress);
883     KMUnpack(kmstress, stress);
884     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
885 
886     StateConservative dF_inviscid[3];
887     FluxInviscid_fwd(context, s, ds, dF_inviscid);
888 
889     CeedScalar dFlux[5] = {0.};
890     for (int j=0; j<3; j++) {
891       dFlux[0] += dF_inviscid[j].density * norm[j];
892       for (int k=0; k<3; k++)
893         dFlux[k+1] += (dF_inviscid[j].momentum[k] - dstress[k][j]) * norm[j];
894       dFlux[4] += (dF_inviscid[j].E_total + dFe[j]) * norm[j];
895     }
896 
897     for (int j=0; j<5; j++)
898       v[j][i] = -wdetJb * dFlux[j];
899   } // End Quadrature Point Loop
900   return 0;
901 }
902 
903 // Outflow boundary condition, weakly setting a constant pressure
904 CEED_QFUNCTION(PressureOutflow)(void *ctx, CeedInt Q,
905                                 const CeedScalar *const *in,
906                                 CeedScalar *const *out) {
907   // *INDENT-OFF*
908   // Inputs
909   const CeedScalar (*q)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[0],
910                    (*Grad_q)[5][CEED_Q_VLA]  = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
911                    (*q_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
912                    (*x)[CEED_Q_VLA]          = (const CeedScalar(*)[CEED_Q_VLA])in[3];
913   // Outputs
914   CeedScalar (*v)[CEED_Q_VLA]            = (CeedScalar(*)[CEED_Q_VLA])out[0],
915              (*jac_data_sur)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[1];
916   // *INDENT-ON*
917 
918   const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
919   const bool       implicit = context->is_implicit;
920   const CeedScalar P0       = context->P0;
921 
922   CeedPragmaSIMD
923   // Quadrature Point Loop
924   for (CeedInt i=0; i<Q; i++) {
925     // Setup
926     // -- Interp in
927     const CeedScalar U[5]   = {q[0][i], q[1][i], q[2][i], q[3][i], q[4][i]};
928     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
929     State            s      = StateFromU(context, U, x_i);
930     s.Y.pressure = P0;
931 
932     // -- Interp-to-Interp q_data
933     // For explicit mode, the surface integral is on the RHS of ODE q_dot = f(q).
934     // For implicit mode, it gets pulled to the LHS of implicit ODE/DAE g(q_dot, q).
935     // We can effect this by swapping the sign on this weight
936     const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
937 
938     // ---- Normal vect
939     const CeedScalar norm[3] = {q_data_sur[1][i],
940                                 q_data_sur[2][i],
941                                 q_data_sur[3][i]
942                                };
943 
944     const CeedScalar dXdx[2][3] = {
945       {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
946       {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
947     };
948 
949     State grad_s[3];
950     for (CeedInt j=0; j<3; j++) {
951       CeedScalar dx_i[3] = {0}, dU[5];
952       for (CeedInt k=0; k<5; k++)
953         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
954                 Grad_q[1][k][i] * dXdx[1][j];
955       dx_i[j] = 1.;
956       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
957     }
958 
959     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
960     KMStrainRate(grad_s, strain_rate);
961     NewtonianStress(context, strain_rate, kmstress);
962     KMUnpack(kmstress, stress);
963     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
964 
965     StateConservative F_inviscid[3];
966     FluxInviscid(context, s, F_inviscid);
967 
968     CeedScalar Flux[5] = {0.};
969     for (int j=0; j<3; j++) {
970       Flux[0] += F_inviscid[j].density * norm[j];
971       for (int k=0; k<3; k++)
972         Flux[k+1] += (F_inviscid[j].momentum[k] - stress[k][j]) * norm[j];
973       Flux[4] += (F_inviscid[j].E_total + Fe[j])*norm[j];
974     }
975 
976     // -- Density
977     v[0][i] = -wdetJb * Flux[0];
978 
979     // -- Momentum
980     for (CeedInt j=0; j<3; j++)
981       v[j+1][i] = -wdetJb * Flux[j+1];
982 
983     // -- Total Energy Density
984     v[4][i] = -wdetJb * Flux[4];
985 
986     // Save values for Jacobian
987     jac_data_sur[0][i] = s.U.density;
988     jac_data_sur[1][i] = s.Y.velocity[0];
989     jac_data_sur[2][i] = s.Y.velocity[1];
990     jac_data_sur[3][i] = s.Y.velocity[2];
991     jac_data_sur[4][i] = s.U.E_total;
992     for (int j=0; j<6; j++) jac_data_sur[5+j][i] = kmstress[j];
993   } // End Quadrature Point Loop
994   return 0;
995 }
996 
997 // Jacobian for weak-pressure outflow boundary condition
998 CEED_QFUNCTION(PressureOutflow_Jacobian)(void *ctx, CeedInt Q,
999     const CeedScalar *const *in,
1000     CeedScalar *const *out) {
1001   // *INDENT-OFF*
1002   // Inputs
1003   const CeedScalar (*dq)[CEED_Q_VLA]           = (const CeedScalar(*)[CEED_Q_VLA])in[0],
1004                    (*Grad_dq)[5][CEED_Q_VLA]   = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
1005                    (*q_data_sur)[CEED_Q_VLA]   = (const CeedScalar(*)[CEED_Q_VLA])in[2],
1006                    (*x)[CEED_Q_VLA]            = (const CeedScalar(*)[CEED_Q_VLA])in[3],
1007                    (*jac_data_sur)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
1008   // Outputs
1009   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
1010   // *INDENT-ON*
1011 
1012   const NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
1013   const bool implicit     = context->is_implicit;
1014 
1015   CeedPragmaSIMD
1016   // Quadrature Point Loop
1017   for (CeedInt i=0; i<Q; i++) {
1018     const CeedScalar x_i[3]  = {x[0][i], x[1][i], x[2][i]};
1019     const CeedScalar wdetJb  = (implicit ? -1. : 1.) * q_data_sur[0][i];
1020     const CeedScalar norm[3] = {q_data_sur[1][i],
1021                                 q_data_sur[2][i],
1022                                 q_data_sur[3][i]
1023                                };
1024     const CeedScalar dXdx[2][3] = {
1025       {q_data_sur[4][i], q_data_sur[5][i], q_data_sur[6][i]},
1026       {q_data_sur[7][i], q_data_sur[8][i], q_data_sur[9][i]}
1027     };
1028 
1029     CeedScalar U[5], kmstress[6], dU[5], dx_i[3] = {0.};
1030     for (int j=0; j<5; j++) U[j]         = jac_data_sur[j][i];
1031     for (int j=0; j<6; j++) kmstress[j]  = jac_data_sur[5+j][i];
1032     for (int j=0; j<3; j++) U[j+1]      *= U[0];
1033     for (int j=0; j<5; j++) dU[j]        = dq[j][i];
1034     State s  = StateFromU(context, U, x_i);
1035     State ds = StateFromU_fwd(context, s, dU, x_i, dx_i);
1036     s.Y.pressure  = context->P0;
1037     ds.Y.pressure = 0.;
1038 
1039     State grad_ds[3];
1040     for (CeedInt j=0; j<3; j++) {
1041       CeedScalar dx_i[3] = {0}, dUj[5];
1042       for (CeedInt k=0; k<5; k++)
1043         dUj[k] = Grad_dq[0][k][i] * dXdx[0][j] +
1044                  Grad_dq[1][k][i] * dXdx[1][j];
1045       dx_i[j] = 1.;
1046       grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx_i);
1047     }
1048 
1049     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
1050     KMStrainRate(grad_ds, dstrain_rate);
1051     NewtonianStress(context, dstrain_rate, dkmstress);
1052     KMUnpack(dkmstress, dstress);
1053     KMUnpack(kmstress, stress);
1054     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
1055 
1056     StateConservative dF_inviscid[3];
1057     FluxInviscid_fwd(context, s, ds, dF_inviscid);
1058 
1059     CeedScalar dFlux[5] = {0.};
1060     for (int j=0; j<3; j++) {
1061       dFlux[0] += dF_inviscid[j].density * norm[j];
1062       for (int k=0; k<3; k++)
1063         dFlux[k+1] += (dF_inviscid[j].momentum[k] - dstress[k][j]) * norm[j];
1064       dFlux[4] += (dF_inviscid[j].E_total + dFe[j]) * norm[j];
1065     }
1066 
1067     for (int j=0; j<5; j++)
1068       v[j][i] = -wdetJb * dFlux[j];
1069   } // End Quadrature Point Loop
1070   return 0;
1071 }
1072 
1073 // *****************************************************************************
1074 #endif // newtonian_h
1075