xref: /libCEED/examples/fluids/qfunctions/newtonian.h (revision 9a3a46e1d4300a3ad2fa9b1d35445254cd604d16)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Operator for Navier-Stokes example using PETSc
10 
11 
12 #ifndef newtonian_h
13 #define newtonian_h
14 
15 #include <math.h>
16 #include <ceed.h>
17 #include "newtonian_types.h"
18 
19 #ifndef M_PI
20 #define M_PI    3.14159265358979323846
21 #endif
22 
23 typedef struct {
24   CeedScalar pressure;
25   CeedScalar velocity[3];
26   CeedScalar temperature;
27 } StatePrimitive;
28 
29 typedef struct {
30   CeedScalar density;
31   CeedScalar momentum[3];
32   CeedScalar E_total;
33 } StateConservative;
34 
35 typedef struct {
36   StateConservative U;
37   StatePrimitive Y;
38 } State;
39 
40 CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative(
41   NewtonianIdealGasContext gas, StateConservative U, const CeedScalar x[3]) {
42   StatePrimitive Y;
43   for (int i=0; i<3; i++) Y.velocity[i] = U.momentum[i] / U.density;
44   CeedScalar e_kinetic = .5 * Dot3(Y.velocity, Y.velocity);
45   CeedScalar e_potential = -Dot3(gas->g, x);
46   CeedScalar e_total = U.E_total / U.density;
47   CeedScalar e_internal = e_total - e_kinetic - e_potential;
48   Y.temperature = e_internal / gas->cv;
49   Y.pressure = (gas->cp / gas->cv - 1) * U.density * e_internal;
50   return Y;
51 }
52 
53 CEED_QFUNCTION_HELPER StatePrimitive StatePrimitiveFromConservative_fwd(
54   NewtonianIdealGasContext gas, State s, StateConservative dU,
55   const CeedScalar x[3], const CeedScalar dx[3]) {
56   StatePrimitive dY;
57   for (int i=0; i<3; i++) {
58     dY.velocity[i] = (dU.momentum[i] - s.Y.velocity[i] * dU.density) / s.U.density;
59   }
60   CeedScalar e_kinetic = .5 * Dot3(s.Y.velocity, s.Y.velocity);
61   CeedScalar de_kinetic = Dot3(dY.velocity, s.Y.velocity);
62   CeedScalar e_potential = -Dot3(gas->g, x);
63   CeedScalar de_potential = -Dot3(gas->g, dx);
64   CeedScalar e_total = s.U.E_total / s.U.density;
65   CeedScalar de_total = (dU.E_total - e_total * dU.density) / s.U.density;
66   CeedScalar e_internal = e_total - e_kinetic - e_potential;
67   CeedScalar de_internal = de_total - de_kinetic - de_potential;
68   dY.temperature = de_internal / gas->cv;
69   dY.pressure = (gas->cp / gas->cv - 1)
70                 * (dU.density * e_internal + s.U.density * de_internal);
71   return dY;
72 }
73 
74 CEED_QFUNCTION_HELPER State StateFromU(NewtonianIdealGasContext gas,
75                                        const CeedScalar U[5], const CeedScalar x[3]) {
76   State s;
77   s.U.density = U[0];
78   s.U.momentum[0] = U[1];
79   s.U.momentum[1] = U[2];
80   s.U.momentum[2] = U[3];
81   s.U.E_total = U[4];
82   s.Y = StatePrimitiveFromConservative(gas, s.U, x);
83   return s;
84 }
85 
86 CEED_QFUNCTION_HELPER State StateFromU_fwd(NewtonianIdealGasContext gas,
87     State s, const CeedScalar dU[5],
88     const CeedScalar x[3], const CeedScalar dx[3]) {
89   State ds;
90   ds.U.density = dU[0];
91   ds.U.momentum[0] = dU[1];
92   ds.U.momentum[1] = dU[2];
93   ds.U.momentum[2] = dU[3];
94   ds.U.E_total = dU[4];
95   ds.Y = StatePrimitiveFromConservative_fwd(gas, s, ds.U, x, dx);
96   return ds;
97 }
98 
99 CEED_QFUNCTION_HELPER void FluxInviscid(NewtonianIdealGasContext gas, State s,
100                                         StateConservative Flux[3]) {
101   for (int i=0; i<3; i++) {
102     Flux[i].density = s.U.momentum[i];
103     for (int j=0; j<3; j++)
104       Flux[i].momentum[j] = s.U.momentum[i] * s.Y.velocity[j]
105                             + s.Y.pressure * (i == j);
106     Flux[i].E_total = (s.U.E_total + s.Y.pressure) * s.Y.velocity[i];
107   }
108 }
109 
110 CEED_QFUNCTION_HELPER void FluxInviscid_fwd(NewtonianIdealGasContext gas,
111     State s, State ds, StateConservative dFlux[3]) {
112   for (int i=0; i<3; i++) {
113     dFlux[i].density = ds.U.momentum[i];
114     for (int j=0; j<3; j++)
115       dFlux[i].momentum[j] = ds.U.momentum[i] * s.Y.velocity[j] +
116                              s.U.momentum[i] * ds.Y.velocity[j] + ds.Y.pressure * (i == j);
117     dFlux[i].E_total = (ds.U.E_total + ds.Y.pressure) * s.Y.velocity[i] +
118                        (s.U.E_total + s.Y.pressure) * ds.Y.velocity[i];
119   }
120 }
121 
122 // Kelvin-Mandel notation
123 CEED_QFUNCTION_HELPER void KMStrainRate(const State grad_s[3],
124                                         CeedScalar strain_rate[6]) {
125   const CeedScalar weight = 1 / sqrt(2.);
126   strain_rate[0] = grad_s[0].Y.velocity[0];
127   strain_rate[1] = grad_s[1].Y.velocity[1];
128   strain_rate[2] = grad_s[2].Y.velocity[2];
129   strain_rate[3] = weight * (grad_s[2].Y.velocity[1] + grad_s[1].Y.velocity[2]);
130   strain_rate[4] = weight * (grad_s[2].Y.velocity[0] + grad_s[0].Y.velocity[2]);
131   strain_rate[5] = weight * (grad_s[1].Y.velocity[0] + grad_s[0].Y.velocity[1]);
132 }
133 
134 CEED_QFUNCTION_HELPER void KMUnpack(const CeedScalar v[6], CeedScalar A[3][3]) {
135   const CeedScalar weight = 1 / sqrt(2.);
136   A[0][0] = v[0];
137   A[1][1] = v[1];
138   A[2][2] = v[2];
139   A[2][1] = A[1][2] = weight * v[3];
140   A[2][0] = A[0][2] = weight * v[4];
141   A[1][0] = A[0][1] = weight * v[5];
142 }
143 
144 CEED_QFUNCTION_HELPER void NewtonianStress(NewtonianIdealGasContext gas,
145     const CeedScalar strain_rate[6], CeedScalar stress[6]) {
146   CeedScalar div_u = strain_rate[0] + strain_rate[1] + strain_rate[2];
147   for (int i=0; i<6; i++) {
148     stress[i] = gas->mu * (2 * strain_rate[i] + gas->lambda * div_u * (i < 3));
149   }
150 }
151 
152 CEED_QFUNCTION_HELPER void ViscousEnergyFlux(NewtonianIdealGasContext gas,
153     StatePrimitive Y, const State grad_s[3], const CeedScalar stress[3][3],
154     CeedScalar Fe[3]) {
155   for (int i=0; i<3; i++) {
156     Fe[i] = - Y.velocity[0] * stress[0][i]
157             - Y.velocity[1] * stress[1][i]
158             - Y.velocity[2] * stress[2][i]
159             - gas->k * grad_s[i].Y.temperature;
160   }
161 }
162 
163 CEED_QFUNCTION_HELPER void ViscousEnergyFlux_fwd(NewtonianIdealGasContext gas,
164     StatePrimitive Y, StatePrimitive dY, const State grad_ds[3],
165     const CeedScalar stress[3][3],
166     const CeedScalar dstress[3][3],
167     CeedScalar dFe[3]) {
168   for (int i=0; i<3; i++) {
169     dFe[i] = - Y.velocity[0] * dstress[0][i] - dY.velocity[0] * stress[0][i]
170              - Y.velocity[1] * dstress[1][i] - dY.velocity[1] * stress[1][i]
171              - Y.velocity[2] * dstress[2][i] - dY.velocity[2] * stress[2][i]
172              - gas->k * grad_ds[i].Y.temperature;
173   }
174 }
175 // *****************************************************************************
176 // Helper function for computing flux Jacobian
177 // *****************************************************************************
178 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
179     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
180     const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
181   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
182   CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
183   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
184     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
185       dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
186                       u[i]*u[j];
187       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
188         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
189         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
190                           ((i==k) ? u[j] : 0.) -
191                           ((i==j) ? u[k] : 0.) * (gamma-1.);
192         dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
193                           (gamma-1.)*u[i]*u[k];
194       }
195       dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
196     }
197     dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
198     dF[i][4][4] = u[i] * gamma;
199   }
200 }
201 
202 // *****************************************************************************
203 // Helper function for computing flux Jacobian of Primitive variables
204 // *****************************************************************************
205 CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
206     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
207     const CeedScalar Rd, const CeedScalar cv) {
208   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
209   // TODO Add in gravity's contribution
210 
211   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
212   CeedScalar drdT = -rho / T;
213   CeedScalar drdP = 1. / ( Rd * T);
214   CeedScalar etot =  E / rho ;
215   CeedScalar e2p  = drdP * etot + 1. ;
216   CeedScalar e3p  = ( E  + rho * Rd * T );
217   CeedScalar e4p  = drdT * etot + rho * cv ;
218 
219   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
220     for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
221 //        [row][col] of A_i
222       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
223       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
224         dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
225         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
226                            ((i==k) ? u[j] : 0.) ) * rho;
227         dF[i][4][k+1]   = rho * u[i] * u[k]
228                           + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
229       }
230       dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
231     }
232     dF[i][4][0] = u[i] * e2p; // F^e wrt p
233     dF[i][4][4] = u[i] * e4p; // F^e wrt T
234     dF[i][0][0] = u[i] * drdP; // F^c wrt p
235     dF[i][0][4] = u[i] * drdT; // F^c wrt T
236   }
237 }
238 
239 CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
240     const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
241     const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
242   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
243   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
244   CeedScalar drdT = -rho / T;
245   CeedScalar drdP = 1. / ( Rd * T);
246   dU[0] = drdP * dY[0] + drdT * dY[4];
247   CeedScalar de_kinetic = 0;
248   for (CeedInt i=0; i<3; i++) {
249     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
250     de_kinetic += u[i] * dY[1+i];
251   }
252   dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
253           + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
254 }
255 
256 // *****************************************************************************
257 // Helper function for computing Tau elements (stabilization constant)
258 //   Model from:
259 //     PHASTA
260 //
261 //   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
262 //
263 // Where NOT UPDATED YET
264 // *****************************************************************************
265 CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
266                                         const CeedScalar dXdx[3][3], const CeedScalar u[3],
267                                         const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
268                                         const CeedScalar mu, const CeedScalar dt,
269                                         const CeedScalar rho) {
270   // Context
271   const CeedScalar Ctau_t = newt_ctx->Ctau_t;
272   const CeedScalar Ctau_v = newt_ctx->Ctau_v;
273   const CeedScalar Ctau_C = newt_ctx->Ctau_C;
274   const CeedScalar Ctau_M = newt_ctx->Ctau_M;
275   const CeedScalar Ctau_E = newt_ctx->Ctau_E;
276   CeedScalar gijd[6];
277   CeedScalar tau;
278   CeedScalar dts;
279   CeedScalar fact;
280 
281   //*INDENT-OFF*
282   gijd[0] =   dXdx[0][0] * dXdx[0][0]
283             + dXdx[1][0] * dXdx[1][0]
284             + dXdx[2][0] * dXdx[2][0];
285 
286   gijd[1] =   dXdx[0][0] * dXdx[0][1]
287             + dXdx[1][0] * dXdx[1][1]
288             + dXdx[2][0] * dXdx[2][1];
289 
290   gijd[2] =   dXdx[0][1] * dXdx[0][1]
291             + dXdx[1][1] * dXdx[1][1]
292             + dXdx[2][1] * dXdx[2][1];
293 
294   gijd[3] =   dXdx[0][0] * dXdx[0][2]
295             + dXdx[1][0] * dXdx[1][2]
296             + dXdx[2][0] * dXdx[2][2];
297 
298   gijd[4] =   dXdx[0][1] * dXdx[0][2]
299             + dXdx[1][1] * dXdx[1][2]
300             + dXdx[2][1] * dXdx[2][2];
301 
302   gijd[5] =   dXdx[0][2] * dXdx[0][2]
303             + dXdx[1][2] * dXdx[1][2]
304             + dXdx[2][2] * dXdx[2][2];
305   //*INDENT-ON*
306 
307   dts = Ctau_t / dt ;
308 
309   tau = rho*rho*((4. * dts * dts)
310                  + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
311                  + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
312                  + u[2] *   u[2] * gijd[5])
313         + Ctau_v* mu * mu *
314         (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
315          + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
316 
317   fact=sqrt(tau);
318 
319   Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
320 
321   Tau_d[1] = Ctau_M / fact;
322   Tau_d[2] = Ctau_E / ( fact * cv );
323 
324 // consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
325 //  to avoid a division if the compiler is smart enough to see that cv IS
326 // a constant that it could invert once for all elements
327 // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
328 // OR we could absorb cv into Ctau_E but this puts more burden on user to
329 // know how to change constants with a change of fluid or units.  Same for
330 // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
331 }
332 
333 // *****************************************************************************
334 // This QFunction sets a "still" initial condition for generic Newtonian IG problems
335 // *****************************************************************************
336 CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
337                                const CeedScalar *const *in, CeedScalar *const *out) {
338   // Inputs
339   const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
340 
341   // Outputs
342   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
343 
344   // Context
345   const SetupContext context = (SetupContext)ctx;
346   const CeedScalar theta0    = context->theta0;
347   const CeedScalar P0        = context->P0;
348   const CeedScalar cv        = context->cv;
349   const CeedScalar cp        = context->cp;
350   const CeedScalar *g        = context->g;
351   const CeedScalar Rd        = cp - cv;
352 
353   // Quadrature Point Loop
354   CeedPragmaSIMD
355   for (CeedInt i=0; i<Q; i++) {
356     CeedScalar q[5] = {0.};
357 
358     // Setup
359     // -- Coordinates
360     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
361     const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
362 
363     // -- Density
364     const CeedScalar rho = P0 / (Rd*theta0);
365 
366     // Initial Conditions
367     q[0] = rho;
368     q[1] = 0.0;
369     q[2] = 0.0;
370     q[3] = 0.0;
371     q[4] = rho * (cv*theta0 + e_potential);
372 
373     for (CeedInt j=0; j<5; j++)
374       q0[j][i] = q[j];
375   } // End of Quadrature Point Loop
376   return 0;
377 }
378 
379 // *****************************************************************************
380 // This QFunction implements the following formulation of Navier-Stokes with
381 //   explicit time stepping method
382 //
383 // This is 3D compressible Navier-Stokes in conservation form with state
384 //   variables of density, momentum density, and total energy density.
385 //
386 // State Variables: q = ( rho, U1, U2, U3, E )
387 //   rho - Mass Density
388 //   Ui  - Momentum Density,      Ui = rho ui
389 //   E   - Total Energy Density,  E  = rho (cv T + (u u)/2 + g z)
390 //
391 // Navier-Stokes Equations:
392 //   drho/dt + div( U )                               = 0
393 //   dU/dt   + div( rho (u x u) + P I3 ) + rho g khat = div( Fu )
394 //   dE/dt   + div( (E + P) u )                       = div( Fe )
395 //
396 // Viscous Stress:
397 //   Fu = mu (grad( u ) + grad( u )^T + lambda div ( u ) I3)
398 //
399 // Thermal Stress:
400 //   Fe = u Fu + k grad( T )
401 // Equation of State
402 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
403 //
404 // Stabilization:
405 //   Tau = diag(TauC, TauM, TauM, TauM, TauE)
406 //     f1 = rho  sqrt(ui uj gij)
407 //     gij = dXi/dX * dXi/dX
408 //     TauC = Cc f1 / (8 gii)
409 //     TauM = min( 1 , 1 / f1 )
410 //     TauE = TauM / (Ce cv)
411 //
412 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
413 //
414 // Constants:
415 //   lambda = - 2 / 3,  From Stokes hypothesis
416 //   mu              ,  Dynamic viscosity
417 //   k               ,  Thermal conductivity
418 //   cv              ,  Specific heat, constant volume
419 //   cp              ,  Specific heat, constant pressure
420 //   g               ,  Gravity
421 //   gamma  = cp / cv,  Specific heat ratio
422 //
423 // We require the product of the inverse of the Jacobian (dXdx_j,k) and
424 // its transpose (dXdx_k,j) to properly compute integrals of the form:
425 // int( gradv gradu )
426 //
427 // *****************************************************************************
428 CEED_QFUNCTION(RHSFunction_Newtonian)(void *ctx, CeedInt Q,
429                                       const CeedScalar *const *in, CeedScalar *const *out) {
430   // *INDENT-OFF*
431   // Inputs
432   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
433                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
434                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
435                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3];
436   // Outputs
437   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
438              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
439   // *INDENT-ON*
440 
441   // Context
442   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
443   const CeedScalar mu     = context->mu;
444   const CeedScalar cv     = context->cv;
445   const CeedScalar cp     = context->cp;
446   const CeedScalar *g     = context->g;
447   const CeedScalar dt     = context->dt;
448   const CeedScalar gamma  = cp / cv;
449   const CeedScalar Rd     = cp - cv;
450 
451   CeedPragmaSIMD
452   // Quadrature Point Loop
453   for (CeedInt i=0; i<Q; i++) {
454     CeedScalar U[5];
455     for (int j=0; j<5; j++) U[j] = q[j][i];
456     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
457     State s = StateFromU(context, U, x_i);
458 
459     // -- Interp-to-Interp q_data
460     const CeedScalar wdetJ      =   q_data[0][i];
461     // -- Interp-to-Grad q_data
462     // ---- Inverse of change of coordinate matrix: X_i,j
463     // *INDENT-OFF*
464     const CeedScalar dXdx[3][3] = {{q_data[1][i],
465                                     q_data[2][i],
466                                     q_data[3][i]},
467                                    {q_data[4][i],
468                                     q_data[5][i],
469                                     q_data[6][i]},
470                                    {q_data[7][i],
471                                     q_data[8][i],
472                                     q_data[9][i]}
473                                   };
474     // *INDENT-ON*
475 
476     State grad_s[3];
477     for (CeedInt j=0; j<3; j++) {
478       CeedScalar dx_i[3] = {0}, dU[5];
479       for (CeedInt k=0; k<5; k++)
480         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
481                 Grad_q[1][k][i] * dXdx[1][j] +
482                 Grad_q[2][k][i] * dXdx[2][j];
483       dx_i[j] = 1.;
484       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
485     }
486 
487     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
488     KMStrainRate(grad_s, strain_rate);
489     NewtonianStress(context, strain_rate, kmstress);
490     KMUnpack(kmstress, stress);
491     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
492 
493     StateConservative F_inviscid[3];
494     FluxInviscid(context, s, F_inviscid);
495 
496     // Total flux
497     CeedScalar Flux[5][3];
498     for (CeedInt j=0; j<3; j++) {
499       Flux[0][j] = F_inviscid[j].density;
500       for (CeedInt k=0; k<3; k++)
501         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
502       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
503     }
504 
505     for (CeedInt j=0; j<3; j++) {
506       for (CeedInt k=0; k<5; k++) {
507         Grad_v[j][k][i] = wdetJ * (dXdx[j][0] * Flux[k][0] +
508                                    dXdx[j][1] * Flux[k][1] +
509                                    dXdx[j][2] * Flux[k][2]);
510       }
511     }
512 
513     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
514     for (int j=0; j<5; j++)
515       v[j][i] = wdetJ * body_force[j];
516 
517     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
518     CeedScalar jacob_F_conv[3][5][5] = {0};
519     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
520                            gamma, g, x_i);
521     CeedScalar grad_U[5][3];
522     for (CeedInt j=0; j<3; j++) {
523       grad_U[0][j] = grad_s[j].U.density;
524       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
525       grad_U[4][j] = grad_s[j].U.E_total;
526     }
527 
528     // strong_conv = dF/dq * dq/dx    (Strong convection)
529     CeedScalar strong_conv[5] = {0};
530     for (CeedInt j=0; j<3; j++)
531       for (CeedInt k=0; k<5; k++)
532         for (CeedInt l=0; l<5; l++)
533           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
534 
535     // -- Stabilization method: none, SU, or SUPG
536     CeedScalar stab[5][3] = {{0.}};
537     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
538     CeedScalar Tau_d[3] = {0.};
539     switch (context->stabilization) {
540     case STAB_NONE:        // Galerkin
541       break;
542     case STAB_SU:        // SU
543       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
544       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
545       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
546       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
547       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
548       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
549       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
550                                   tau_strong_conv,
551                                   tau_strong_conv_conservative);
552       for (CeedInt j=0; j<3; j++)
553         for (CeedInt k=0; k<5; k++)
554           for (CeedInt l=0; l<5; l++)
555             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
556 
557       for (CeedInt j=0; j<5; j++)
558         for (CeedInt k=0; k<3; k++)
559           Grad_v[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
560                                     stab[j][1] * dXdx[k][1] +
561                                     stab[j][2] * dXdx[k][2]);
562       break;
563     case STAB_SUPG:        // SUPG is not implemented for explicit scheme
564       break;
565     }
566 
567   } // End Quadrature Point Loop
568 
569   // Return
570   return 0;
571 }
572 
573 // *****************************************************************************
574 // This QFunction implements the Navier-Stokes equations (mentioned above) with
575 //   implicit time stepping method
576 //
577 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
578 //  SUPG = Galerkin + grad(v) . ( Ai^T * Tau * (q_dot + Aj q,j - body force) )
579 //                                       (diffussive terms will be added later)
580 //
581 // *****************************************************************************
582 CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
583                                     const CeedScalar *const *in,
584                                     CeedScalar *const *out) {
585   // *INDENT-OFF*
586   // Inputs
587   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
588                    (*Grad_q)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
589                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
590                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
591                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
592   // Outputs
593   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
594              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1],
595              (*jac_data)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[2];
596   // *INDENT-ON*
597   // Context
598   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
599   const CeedScalar mu     = context->mu;
600   const CeedScalar cv     = context->cv;
601   const CeedScalar cp     = context->cp;
602   const CeedScalar *g     = context->g;
603   const CeedScalar dt     = context->dt;
604   const CeedScalar gamma  = cp / cv;
605   const CeedScalar Rd     = cp-cv;
606 
607   CeedPragmaSIMD
608   // Quadrature Point Loop
609   for (CeedInt i=0; i<Q; i++) {
610     CeedScalar U[5];
611     for (CeedInt j=0; j<5; j++) U[j] = q[j][i];
612     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
613     State s = StateFromU(context, U, x_i);
614 
615     // -- Interp-to-Interp q_data
616     const CeedScalar wdetJ      =   q_data[0][i];
617     // -- Interp-to-Grad q_data
618     // ---- Inverse of change of coordinate matrix: X_i,j
619     // *INDENT-OFF*
620     const CeedScalar dXdx[3][3] = {{q_data[1][i],
621                                     q_data[2][i],
622                                     q_data[3][i]},
623                                    {q_data[4][i],
624                                     q_data[5][i],
625                                     q_data[6][i]},
626                                    {q_data[7][i],
627                                     q_data[8][i],
628                                     q_data[9][i]}
629                                   };
630     // *INDENT-ON*
631     State grad_s[3];
632     for (CeedInt j=0; j<3; j++) {
633       CeedScalar dx_i[3] = {0}, dU[5];
634       for (CeedInt k=0; k<5; k++)
635         dU[k] = Grad_q[0][k][i] * dXdx[0][j] +
636                 Grad_q[1][k][i] * dXdx[1][j] +
637                 Grad_q[2][k][i] * dXdx[2][j];
638       dx_i[j] = 1.;
639       grad_s[j] = StateFromU_fwd(context, s, dU, x_i, dx_i);
640     }
641 
642     CeedScalar strain_rate[6], kmstress[6], stress[3][3], Fe[3];
643     KMStrainRate(grad_s, strain_rate);
644     NewtonianStress(context, strain_rate, kmstress);
645     KMUnpack(kmstress, stress);
646     ViscousEnergyFlux(context, s.Y, grad_s, stress, Fe);
647 
648     StateConservative F_inviscid[3];
649     FluxInviscid(context, s, F_inviscid);
650 
651 
652     // Total flux
653     CeedScalar Flux[5][3];
654     for (CeedInt j=0; j<3; j++) {
655       Flux[0][j] = F_inviscid[j].density;
656       for (CeedInt k=0; k<3; k++)
657         Flux[k+1][j] = F_inviscid[j].momentum[k] - stress[k][j];
658       Flux[4][j] = F_inviscid[j].E_total + Fe[j];
659     }
660 
661     for (CeedInt j=0; j<3; j++) {
662       for (CeedInt k=0; k<5; k++) {
663         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * Flux[k][0] +
664                                     dXdx[j][1] * Flux[k][1] +
665                                     dXdx[j][2] * Flux[k][2]);
666       }
667     }
668 
669     const CeedScalar body_force[5] = {0, s.U.density *g[0], s.U.density *g[1], s.U.density *g[2], 0};
670     for (CeedInt j=0; j<5; j++)
671       v[j][i] = wdetJ * (q_dot[j][i] - body_force[j]);
672 
673     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
674     CeedScalar jacob_F_conv[3][5][5] = {0};
675     computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
676                            gamma, g, x_i);
677     CeedScalar grad_U[5][3];
678     for (CeedInt j=0; j<3; j++) {
679       grad_U[0][j] = grad_s[j].U.density;
680       for (CeedInt k=0; k<3; k++) grad_U[k+1][j] = grad_s[j].U.momentum[k];
681       grad_U[4][j] = grad_s[j].U.E_total;
682     }
683 
684     // strong_conv = dF/dq * dq/dx    (Strong convection)
685     CeedScalar strong_conv[5] = {0};
686     for (CeedInt j=0; j<3; j++)
687       for (CeedInt k=0; k<5; k++)
688         for (CeedInt l=0; l<5; l++)
689           strong_conv[k] += jacob_F_conv[j][k][l] * grad_U[l][j];
690 
691     // Strong residual
692     CeedScalar strong_res[5];
693     for (CeedInt j=0; j<5; j++)
694       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
695 
696     // -- Stabilization method: none, SU, or SUPG
697     CeedScalar stab[5][3] = {{0.}};
698     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
699     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
700     CeedScalar Tau_d[3] = {0.};
701     switch (context->stabilization) {
702     case STAB_NONE:        // Galerkin
703       break;
704     case STAB_SU:        // SU
705       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
706       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
707       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
708       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
709       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
710       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
711       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
712                                   tau_strong_conv, tau_strong_conv_conservative);
713       for (CeedInt j=0; j<3; j++)
714         for (CeedInt k=0; k<5; k++)
715           for (CeedInt l=0; l<5; l++)
716             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
717 
718       for (CeedInt j=0; j<5; j++)
719         for (CeedInt k=0; k<3; k++)
720           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
721                                     stab[j][1] * dXdx[k][1] +
722                                     stab[j][2] * dXdx[k][2]);
723 
724       break;
725     case STAB_SUPG:        // SUPG
726       Tau_diagPrim(Tau_d, dXdx, s.Y.velocity, cv, context, mu, dt, s.U.density);
727       tau_strong_res[0] = Tau_d[0] * strong_res[0];
728       tau_strong_res[1] = Tau_d[1] * strong_res[1];
729       tau_strong_res[2] = Tau_d[1] * strong_res[2];
730       tau_strong_res[3] = Tau_d[1] * strong_res[3];
731       tau_strong_res[4] = Tau_d[2] * strong_res[4];
732 // Alternate route (useful later with primitive variable code)
733 // this function was verified against PHASTA for as IC that was as close as possible
734 //    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
735 // it has also been verified to compute a correct through the following
736 //   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
737 // applied in the triple loop below
738 //  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
739       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
740                                   tau_strong_res, tau_strong_res_conservative);
741       for (CeedInt j=0; j<3; j++)
742         for (CeedInt k=0; k<5; k++)
743           for (CeedInt l=0; l<5; l++)
744             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
745 
746       for (CeedInt j=0; j<5; j++)
747         for (CeedInt k=0; k<3; k++)
748           Grad_v[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
749                                     stab[j][1] * dXdx[k][1] +
750                                     stab[j][2] * dXdx[k][2]);
751       break;
752     }
753     for (CeedInt j=0; j<5; j++) jac_data[j][i] = U[j];
754     for (CeedInt j=0; j<6; j++) jac_data[5+j][i] = kmstress[j];
755     for (CeedInt j=0; j<3; j++) jac_data[5+6+j][i] = Tau_d[j];
756 
757   } // End Quadrature Point Loop
758 
759   // Return
760   return 0;
761 }
762 
763 CEED_QFUNCTION(IJacobian_Newtonian)(void *ctx, CeedInt Q,
764                                     const CeedScalar *const *in,
765                                     CeedScalar *const *out) {
766   // *INDENT-OFF*
767   // Inputs
768   const CeedScalar (*dq)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
769                    (*Grad_dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
770                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
771                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
772                    (*jac_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
773   // Outputs
774   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
775              (*Grad_v)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
776   // *INDENT-ON*
777   // Context
778   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
779   const CeedScalar *g = context->g;
780   const CeedScalar cp = context->cp;
781   const CeedScalar cv = context->cv;
782   const CeedScalar Rd = cp - cv;
783   const CeedScalar gamma = cp / cv;
784 
785   CeedPragmaSIMD
786   // Quadrature Point Loop
787   for (CeedInt i=0; i<Q; i++) {
788     // -- Interp-to-Interp q_data
789     const CeedScalar wdetJ      =   q_data[0][i];
790     // -- Interp-to-Grad q_data
791     // ---- Inverse of change of coordinate matrix: X_i,j
792     // *INDENT-OFF*
793     const CeedScalar dXdx[3][3] = {{q_data[1][i],
794                                     q_data[2][i],
795                                     q_data[3][i]},
796                                    {q_data[4][i],
797                                     q_data[5][i],
798                                     q_data[6][i]},
799                                    {q_data[7][i],
800                                     q_data[8][i],
801                                     q_data[9][i]}
802                                   };
803     // *INDENT-ON*
804 
805     CeedScalar U[5], kmstress[6], Tau_d[3] __attribute((unused));
806     for (int j=0; j<5; j++) U[j] = jac_data[j][i];
807     for (int j=0; j<6; j++) kmstress[j] = jac_data[5+j][i];
808     for (int j=0; j<3; j++) Tau_d[j] = jac_data[5+6+j][i];
809     const CeedScalar x_i[3] = {x[0][i], x[1][i], x[2][i]};
810     State s = StateFromU(context, U, x_i);
811 
812     CeedScalar dU[5], dx0[3] = {0};
813     for (int j=0; j<5; j++) dU[j] = dq[j][i];
814     State ds = StateFromU_fwd(context, s, dU, x_i, dx0);
815 
816     State grad_ds[3];
817     for (int j=0; j<3; j++) {
818       CeedScalar dUj[5];
819       for (int k=0; k<5; k++) dUj[k] = Grad_dq[0][k][i] * dXdx[0][j]
820                                          + Grad_dq[1][k][i] * dXdx[1][j]
821                                          + Grad_dq[2][k][i] * dXdx[2][j];
822       grad_ds[j] = StateFromU_fwd(context, s, dUj, x_i, dx0);
823     }
824 
825     CeedScalar dstrain_rate[6], dkmstress[6], stress[3][3], dstress[3][3], dFe[3];
826     KMStrainRate(grad_ds, dstrain_rate);
827     NewtonianStress(context, dstrain_rate, dkmstress);
828     KMUnpack(dkmstress, dstress);
829     KMUnpack(kmstress, stress);
830     ViscousEnergyFlux_fwd(context, s.Y, ds.Y, grad_ds, stress, dstress, dFe);
831 
832     StateConservative dF_inviscid[3];
833     FluxInviscid_fwd(context, s, ds, dF_inviscid);
834 
835     // Total flux
836     CeedScalar dFlux[5][3];
837     for (int j=0; j<3; j++) {
838       dFlux[0][j] = dF_inviscid[j].density;
839       for (int k=0; k<3; k++)
840         dFlux[k+1][j] = dF_inviscid[j].momentum[k] - dstress[k][j];
841       dFlux[4][j] = dF_inviscid[j].E_total + dFe[j];
842     }
843 
844     for (int j=0; j<3; j++) {
845       for (int k=0; k<5; k++) {
846         Grad_v[j][k][i] = -wdetJ * (dXdx[j][0] * dFlux[k][0] +
847                                     dXdx[j][1] * dFlux[k][1] +
848                                     dXdx[j][2] * dFlux[k][2]);
849       }
850     }
851 
852     const CeedScalar dbody_force[5] = {0, ds.U.density *g[0], ds.U.density *g[1], ds.U.density *g[2], 0};
853     for (int j=0; j<5; j++)
854       v[j][i] = wdetJ * (context->ijacobian_time_shift * dU[j] - dbody_force[j]);
855 
856     if (1) {
857       CeedScalar jacob_F_conv[3][5][5] = {0};
858       computeFluxJacobian_NS(jacob_F_conv, s.U.density, s.Y.velocity, s.U.E_total,
859                              gamma, g, x_i);
860       CeedScalar grad_dU[5][3];
861       for (int j=0; j<3; j++) {
862         grad_dU[0][j] = grad_ds[j].U.density;
863         for (int k=0; k<3; k++) grad_dU[k+1][j] = grad_ds[j].U.momentum[k];
864         grad_dU[4][j] = grad_ds[j].U.E_total;
865       }
866       CeedScalar dstrong_conv[5] = {0};
867       for (int j=0; j<3; j++)
868         for (int k=0; k<5; k++)
869           for (int l=0; l<5; l++)
870             dstrong_conv[k] += jacob_F_conv[j][k][l] * grad_dU[l][j];
871       CeedScalar dstrong_res[5];
872       for (int j=0; j<5; j++)
873         dstrong_res[j] = context->ijacobian_time_shift * dU[j] + dstrong_conv[j] -
874                          dbody_force[j];
875       CeedScalar dtau_strong_res[5] = {0.}, dtau_strong_res_conservative[5] = {0};
876       dtau_strong_res[0] = Tau_d[0] * dstrong_res[0];
877       dtau_strong_res[1] = Tau_d[1] * dstrong_res[1];
878       dtau_strong_res[2] = Tau_d[1] * dstrong_res[2];
879       dtau_strong_res[3] = Tau_d[1] * dstrong_res[3];
880       dtau_strong_res[4] = Tau_d[2] * dstrong_res[4];
881       PrimitiveToConservative_fwd(s.U.density, s.Y.velocity, s.U.E_total, Rd, cv,
882                                   dtau_strong_res, dtau_strong_res_conservative);
883       CeedScalar dstab[5][3] = {0};
884       for (int j=0; j<3; j++)
885         for (int k=0; k<5; k++)
886           for (int l=0; l<5; l++)
887             dstab[k][j] += jacob_F_conv[j][k][l] * dtau_strong_res_conservative[l];
888       for (int j=0; j<5; j++)
889         for (int k=0; k<3; k++)
890           Grad_v[k][j][i] += wdetJ*(dstab[j][0] * dXdx[k][0] +
891                                     dstab[j][1] * dXdx[k][1] +
892                                     dstab[j][2] * dXdx[k][2]);
893 
894     }
895   } // End Quadrature Point Loop
896   return 0;
897 }
898 // *****************************************************************************
899 #endif // newtonian_h
900