xref: /libCEED/examples/fluids/qfunctions/newtonian.h (revision 8687e1d445b8fb5c2aba1a76f10bd56e6cda067d)
1 // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors.
2 // All Rights Reserved. See the top-level LICENSE and NOTICE files for details.
3 //
4 // SPDX-License-Identifier: BSD-2-Clause
5 //
6 // This file is part of CEED:  http://github.com/ceed
7 
8 /// @file
9 /// Operator for Navier-Stokes example using PETSc
10 
11 
12 #ifndef newtonian_h
13 #define newtonian_h
14 
15 #include <math.h>
16 #include <ceed.h>
17 #include "newtonian_types.h"
18 
19 #ifndef M_PI
20 #define M_PI    3.14159265358979323846
21 #endif
22 
23 // *****************************************************************************
24 // Helper function for computing flux Jacobian
25 // *****************************************************************************
26 CEED_QFUNCTION_HELPER void computeFluxJacobian_NS(CeedScalar dF[3][5][5],
27     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
28     const CeedScalar gamma, const CeedScalar g[3], const CeedScalar x[3]) {
29   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
30   CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
31   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
32     for (CeedInt j=0; j<3; j++) { // Rows of each Jacobian matrix
33       dF[i][j+1][0] = ((i==j) ? ((gamma-1.)*(u_sq/2. - e_potential)) : 0.) -
34                       u[i]*u[j];
35       for (CeedInt k=0; k<3; k++) { // Columns of each Jacobian matrix
36         dF[i][0][k+1]   = ((i==k) ? 1. : 0.);
37         dF[i][j+1][k+1] = ((j==k) ? u[i] : 0.) +
38                           ((i==k) ? u[j] : 0.) -
39                           ((i==j) ? u[k] : 0.) * (gamma-1.);
40         dF[i][4][k+1]   = ((i==k) ? (E*gamma/rho - (gamma-1.)*u_sq/2.) : 0.) -
41                           (gamma-1.)*u[i]*u[k];
42       }
43       dF[i][j+1][4] = ((i==j) ? (gamma-1.) : 0.);
44     }
45     dF[i][4][0] = u[i] * ((gamma-1.)*u_sq - E*gamma/rho);
46     dF[i][4][4] = u[i] * gamma;
47   }
48 }
49 
50 // *****************************************************************************
51 // Helper function for computing flux Jacobian of Primitive variables
52 // *****************************************************************************
53 CEED_QFUNCTION_HELPER void computeFluxJacobian_NSp(CeedScalar dF[3][5][5],
54     const CeedScalar rho, const CeedScalar u[3], const CeedScalar E,
55     const CeedScalar Rd, const CeedScalar cv) {
56   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2]; // Velocity square
57   // TODO Add in gravity's contribution
58 
59   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
60   CeedScalar drdT = -rho / T;
61   CeedScalar drdP = 1. / ( Rd * T);
62   CeedScalar etot =  E / rho ;
63   CeedScalar e2p  = drdP * etot + 1. ;
64   CeedScalar e3p  = ( E  + rho * Rd * T );
65   CeedScalar e4p  = drdT * etot + rho * cv ;
66 
67   for (CeedInt i=0; i<3; i++) { // Jacobian matrices for 3 directions
68     for (CeedInt j=0; j<3; j++) { // j counts F^{m_j}
69 //        [row][col] of A_i
70       dF[i][j+1][0] = drdP * u[i] * u[j] + ((i==j) ? 1. : 0.); // F^{{m_j} wrt p
71       for (CeedInt k=0; k<3; k++) { // k counts the wrt vel_k
72         dF[i][0][k+1]   =  ((i==k) ? rho  : 0.);   // F^c wrt u_k
73         dF[i][j+1][k+1] = (((j==k) ? u[i] : 0.) +  // F^m_j wrt u_k
74                            ((i==k) ? u[j] : 0.) ) * rho;
75         dF[i][4][k+1]   = rho * u[i] * u[k]
76                           + ((i==k) ? e3p  : 0.) ; // F^e wrt u_k
77       }
78       dF[i][j+1][4] = drdT * u[i] * u[j]; // F^{m_j} wrt T
79     }
80     dF[i][4][0] = u[i] * e2p; // F^e wrt p
81     dF[i][4][4] = u[i] * e4p; // F^e wrt T
82     dF[i][0][0] = u[i] * drdP; // F^c wrt p
83     dF[i][0][4] = u[i] * drdT; // F^c wrt T
84   }
85 }
86 
87 CEED_QFUNCTION_HELPER void PrimitiveToConservative_fwd(const CeedScalar rho,
88     const CeedScalar u[3], const CeedScalar E, const CeedScalar Rd,
89     const CeedScalar cv, const CeedScalar dY[5], CeedScalar dU[5]) {
90   CeedScalar u_sq = u[0]*u[0] + u[1]*u[1] + u[2]*u[2];
91   CeedScalar T    = ( E / rho - u_sq / 2. ) / cv;
92   CeedScalar drdT = -rho / T;
93   CeedScalar drdP = 1. / ( Rd * T);
94   dU[0] = drdP * dY[0] + drdT * dY[4];
95   CeedScalar de_kinetic = 0;
96   for (CeedInt i=0; i<3; i++) {
97     dU[1+i] = dU[0] * u[i] + rho * dY[1+i];
98     de_kinetic += u[i] * dY[1+i];
99   }
100   dU[4] = rho * cv * dY[4] + dU[0] * cv * T // internal energy: rho * e
101           + rho * de_kinetic + .5 * dU[0] * u_sq; // kinetic energy: .5 * rho * |u|^2
102 }
103 
104 // *****************************************************************************
105 // Helper function for computing Tau elements (stabilization constant)
106 //   Model from:
107 //     PHASTA
108 //
109 //   Tau[i] = itau=0 which is diagonal-Shakib (3 values still but not spatial)
110 //
111 // Where NOT UPDATED YET
112 // *****************************************************************************
113 CEED_QFUNCTION_HELPER void Tau_diagPrim(CeedScalar Tau_d[3],
114                                         const CeedScalar dXdx[3][3], const CeedScalar u[3],
115                                         const CeedScalar cv, const NewtonianIdealGasContext newt_ctx,
116                                         const CeedScalar mu, const CeedScalar dt,
117                                         const CeedScalar rho) {
118   // Context
119   const CeedScalar Ctau_t = newt_ctx->Ctau_t;
120   const CeedScalar Ctau_v = newt_ctx->Ctau_v;
121   const CeedScalar Ctau_C = newt_ctx->Ctau_C;
122   const CeedScalar Ctau_M = newt_ctx->Ctau_M;
123   const CeedScalar Ctau_E = newt_ctx->Ctau_E;
124   CeedScalar gijd[6];
125   CeedScalar tau;
126   CeedScalar dts;
127   CeedScalar fact;
128 
129   //*INDENT-OFF*
130   gijd[0] =   dXdx[0][0] * dXdx[0][0]
131             + dXdx[1][0] * dXdx[1][0]
132             + dXdx[2][0] * dXdx[2][0];
133 
134   gijd[1] =   dXdx[0][0] * dXdx[0][1]
135             + dXdx[1][0] * dXdx[1][1]
136             + dXdx[2][0] * dXdx[2][1];
137 
138   gijd[2] =   dXdx[0][1] * dXdx[0][1]
139             + dXdx[1][1] * dXdx[1][1]
140             + dXdx[2][1] * dXdx[2][1];
141 
142   gijd[3] =   dXdx[0][0] * dXdx[0][2]
143             + dXdx[1][0] * dXdx[1][2]
144             + dXdx[2][0] * dXdx[2][2];
145 
146   gijd[4] =   dXdx[0][1] * dXdx[0][2]
147             + dXdx[1][1] * dXdx[1][2]
148             + dXdx[2][1] * dXdx[2][2];
149 
150   gijd[5] =   dXdx[0][2] * dXdx[0][2]
151             + dXdx[1][2] * dXdx[1][2]
152             + dXdx[2][2] * dXdx[2][2];
153   //*INDENT-ON*
154 
155   dts = Ctau_t / dt ;
156 
157   tau = rho*rho*((4. * dts * dts)
158                  + u[0] * ( u[0] * gijd[0] + 2. * ( u[1] * gijd[1] + u[2] * gijd[3]))
159                  + u[1] * ( u[1] * gijd[2] + 2. *   u[2] * gijd[4])
160                  + u[2] *   u[2] * gijd[5])
161         + Ctau_v* mu * mu *
162         (gijd[0]*gijd[0] + gijd[2]*gijd[2] + gijd[5]*gijd[5] +
163          + 2. * (gijd[1]*gijd[1] + gijd[3]*gijd[3] + gijd[4]*gijd[4]));
164 
165   fact=sqrt(tau);
166 
167   Tau_d[0] = Ctau_C * fact / (rho*(gijd[0] + gijd[2] + gijd[5]))*0.125;
168 
169   Tau_d[1] = Ctau_M / fact;
170   Tau_d[2] = Ctau_E / ( fact * cv );
171 
172 // consider putting back the way I initially had it  Ctau_E * Tau_d[1] /cv
173 //  to avoid a division if the compiler is smart enough to see that cv IS
174 // a constant that it could invert once for all elements
175 // but in that case energy tau is scaled by the product of Ctau_E * Ctau_M
176 // OR we could absorb cv into Ctau_E but this puts more burden on user to
177 // know how to change constants with a change of fluid or units.  Same for
178 // Ctau_v * mu * mu IF AND ONLY IF we don't add viscosity law =f(T)
179 }
180 
181 // *****************************************************************************
182 // Helper function for computing Tau elements (stabilization constant)
183 //   Model from:
184 //     Stabilized Methods for Compressible Flows, Hughes et al 2010
185 //
186 //   Spatial criterion #2 - Tau is a 3x3 diagonal matrix
187 //   Tau[i] = c_tau h[i] Xi(Pe) / rho(A[i]) (no sum)
188 //
189 // Where
190 //   c_tau     = stabilization constant (0.5 is reported as "optimal")
191 //   h[i]      = 2 length(dxdX[i])
192 //   Pe        = Peclet number ( Pe = sqrt(u u) / dot(dXdx,u) diffusivity )
193 //   Xi(Pe)    = coth Pe - 1. / Pe (1. at large local Peclet number )
194 //   rho(A[i]) = spectral radius of the convective flux Jacobian i,
195 //               wave speed in direction i
196 // *****************************************************************************
197 CEED_QFUNCTION_HELPER void Tau_spatial(CeedScalar Tau_x[3],
198                                        const CeedScalar dXdx[3][3], const CeedScalar u[3],
199                                        /* const CeedScalar sound_speed, const CeedScalar c_tau) { */
200                                        const CeedScalar sound_speed, const CeedScalar c_tau,
201                                        const CeedScalar viscosity) {
202   const CeedScalar mag_u_visc = sqrt(u[0]*u[0] +u[1]*u[1] +u[2]*u[2]) /
203                                 (2*viscosity);
204   for (CeedInt i=0; i<3; i++) {
205     // length of element in direction i
206     CeedScalar h = 2 / sqrt(dXdx[0][i]*dXdx[0][i] + dXdx[1][i]*dXdx[1][i] +
207                             dXdx[2][i]*dXdx[2][i]);
208     CeedScalar Pe = mag_u_visc*h;
209     CeedScalar Xi = 1/tanh(Pe) - 1/Pe;
210     // fastest wave in direction i
211     CeedScalar fastest_wave = fabs(u[i]) + sound_speed;
212     Tau_x[i] = c_tau * h * Xi / fastest_wave;
213   }
214 }
215 
216 // *****************************************************************************
217 // This QFunction sets a "still" initial condition for generic Newtonian IG problems
218 // *****************************************************************************
219 CEED_QFUNCTION(ICsNewtonianIG)(void *ctx, CeedInt Q,
220                                const CeedScalar *const *in, CeedScalar *const *out) {
221   // Inputs
222   const CeedScalar (*X)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0];
223 
224   // Outputs
225   CeedScalar (*q0)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0];
226 
227   // Context
228   const SetupContext context = (SetupContext)ctx;
229   const CeedScalar theta0    = context->theta0;
230   const CeedScalar P0        = context->P0;
231   const CeedScalar cv        = context->cv;
232   const CeedScalar cp        = context->cp;
233   const CeedScalar *g        = context->g;
234   const CeedScalar Rd        = cp - cv;
235 
236   // Quadrature Point Loop
237   CeedPragmaSIMD
238   for (CeedInt i=0; i<Q; i++) {
239     CeedScalar q[5] = {0.};
240 
241     // Setup
242     // -- Coordinates
243     const CeedScalar x[3] = {X[0][i], X[1][i], X[2][i]};
244     const CeedScalar e_potential = -(g[0]*x[0] + g[1]*x[1] + g[2]*x[2]);
245 
246     // -- Density
247     const CeedScalar rho = P0 / (Rd*theta0);
248 
249     // Initial Conditions
250     q[0] = rho;
251     q[1] = 0.0;
252     q[2] = 0.0;
253     q[3] = 0.0;
254     q[4] = rho * (cv*theta0 + e_potential);
255 
256     for (CeedInt j=0; j<5; j++)
257       q0[j][i] = q[j];
258   } // End of Quadrature Point Loop
259   return 0;
260 }
261 
262 // *****************************************************************************
263 // This QFunction implements the following formulation of Navier-Stokes with
264 //   explicit time stepping method
265 //
266 // This is 3D compressible Navier-Stokes in conservation form with state
267 //   variables of density, momentum density, and total energy density.
268 //
269 // State Variables: q = ( rho, U1, U2, U3, E )
270 //   rho - Mass Density
271 //   Ui  - Momentum Density,      Ui = rho ui
272 //   E   - Total Energy Density,  E  = rho (cv T + (u u)/2 + g z)
273 //
274 // Navier-Stokes Equations:
275 //   drho/dt + div( U )                               = 0
276 //   dU/dt   + div( rho (u x u) + P I3 ) + rho g khat = div( Fu )
277 //   dE/dt   + div( (E + P) u )                       = div( Fe )
278 //
279 // Viscous Stress:
280 //   Fu = mu (grad( u ) + grad( u )^T + lambda div ( u ) I3)
281 //
282 // Thermal Stress:
283 //   Fe = u Fu + k grad( T )
284 // Equation of State
285 //   P = (gamma - 1) (E - rho (u u) / 2 - rho g z)
286 //
287 // Stabilization:
288 //   Tau = diag(TauC, TauM, TauM, TauM, TauE)
289 //     f1 = rho  sqrt(ui uj gij)
290 //     gij = dXi/dX * dXi/dX
291 //     TauC = Cc f1 / (8 gii)
292 //     TauM = min( 1 , 1 / f1 )
293 //     TauE = TauM / (Ce cv)
294 //
295 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
296 //
297 // Constants:
298 //   lambda = - 2 / 3,  From Stokes hypothesis
299 //   mu              ,  Dynamic viscosity
300 //   k               ,  Thermal conductivity
301 //   cv              ,  Specific heat, constant volume
302 //   cp              ,  Specific heat, constant pressure
303 //   g               ,  Gravity
304 //   gamma  = cp / cv,  Specific heat ratio
305 //
306 // We require the product of the inverse of the Jacobian (dXdx_j,k) and
307 // its transpose (dXdx_k,j) to properly compute integrals of the form:
308 // int( gradv gradu )
309 //
310 // *****************************************************************************
311 CEED_QFUNCTION(Newtonian)(void *ctx, CeedInt Q,
312                           const CeedScalar *const *in, CeedScalar *const *out) {
313   // *INDENT-OFF*
314   // Inputs
315   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
316                    (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
317                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
318                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3];
319   // Outputs
320   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
321              (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
322   // *INDENT-ON*
323 
324   // Context
325   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
326   const CeedScalar lambda = context->lambda;
327   const CeedScalar mu     = context->mu;
328   const CeedScalar k      = context->k;
329   const CeedScalar cv     = context->cv;
330   const CeedScalar cp     = context->cp;
331   const CeedScalar *g     = context->g;
332   const CeedScalar dt     = context->dt;
333   const CeedScalar gamma  = cp / cv;
334   const CeedScalar Rd     = cp - cv;
335 
336   CeedPragmaSIMD
337   // Quadrature Point Loop
338   for (CeedInt i=0; i<Q; i++) {
339     // *INDENT-OFF*
340     // Setup
341     // -- Interp in
342     const CeedScalar rho        =   q[0][i];
343     const CeedScalar u[3]       =  {q[1][i] / rho,
344                                     q[2][i] / rho,
345                                     q[3][i] / rho
346                                    };
347     const CeedScalar E          =   q[4][i];
348     // -- Grad in
349     const CeedScalar drho[3]    =  {dq[0][0][i],
350                                     dq[1][0][i],
351                                     dq[2][0][i]
352                                    };
353     const CeedScalar dU[3][3]   = {{dq[0][1][i],
354                                     dq[1][1][i],
355                                     dq[2][1][i]},
356                                    {dq[0][2][i],
357                                     dq[1][2][i],
358                                     dq[2][2][i]},
359                                    {dq[0][3][i],
360                                     dq[1][3][i],
361                                     dq[2][3][i]}
362                                   };
363     const CeedScalar dE[3]      =  {dq[0][4][i],
364                                     dq[1][4][i],
365                                     dq[2][4][i]
366                                    };
367     // -- Interp-to-Interp q_data
368     const CeedScalar wdetJ      =   q_data[0][i];
369     // -- Interp-to-Grad q_data
370     // ---- Inverse of change of coordinate matrix: X_i,j
371     // *INDENT-OFF*
372     const CeedScalar dXdx[3][3] = {{q_data[1][i],
373                                     q_data[2][i],
374                                     q_data[3][i]},
375                                    {q_data[4][i],
376                                     q_data[5][i],
377                                     q_data[6][i]},
378                                    {q_data[7][i],
379                                     q_data[8][i],
380                                     q_data[9][i]}
381                                   };
382     const CeedScalar x_i[3]       = {x[0][i], x[1][i], x[2][i]};
383     // *INDENT-ON*
384     // -- Grad-to-Grad q_data
385     // dU/dx
386     CeedScalar du[3][3] = {{0}};
387     CeedScalar drhodx[3] = {0};
388     CeedScalar dEdx[3] = {0};
389     CeedScalar dUdx[3][3] = {{0}};
390     CeedScalar dXdxdXdxT[3][3] = {{0}};
391     for (CeedInt j=0; j<3; j++) {
392       for (CeedInt k=0; k<3; k++) {
393         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
394         drhodx[j] += drho[k] * dXdx[k][j];
395         dEdx[j] += dE[k] * dXdx[k][j];
396         for (CeedInt l=0; l<3; l++) {
397           dUdx[j][k] += dU[j][l] * dXdx[l][k];
398           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
399         }
400       }
401     }
402     CeedScalar dudx[3][3] = {{0}};
403     for (CeedInt j=0; j<3; j++)
404       for (CeedInt k=0; k<3; k++)
405         for (CeedInt l=0; l<3; l++)
406           dudx[j][k] += du[j][l] * dXdx[l][k];
407     // -- grad_T
408     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
409                                     (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
410                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
411                                     (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
412                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
413                                     (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
414                                   };
415 
416     // -- Fuvisc
417     // ---- Symmetric 3x3 matrix
418     const CeedScalar Fu[6]     =  {mu*(dudx[0][0] * (2 + lambda) + /* *NOPAD* */
419                                        lambda * (dudx[1][1] + dudx[2][2])),
420                                    mu*(dudx[0][1] + dudx[1][0]), /* *NOPAD* */
421                                    mu*(dudx[0][2] + dudx[2][0]), /* *NOPAD* */
422                                    mu*(dudx[1][1] * (2 + lambda) + /* *NOPAD* */
423                                        lambda * (dudx[0][0] + dudx[2][2])),
424                                    mu*(dudx[1][2] + dudx[2][1]), /* *NOPAD* */
425                                    mu*(dudx[2][2] * (2 + lambda) + /* *NOPAD* */
426                                        lambda * (dudx[0][0] + dudx[1][1]))
427                                   };
428     // -- Fevisc
429     const CeedScalar Fe[3]     =  {u[0]*Fu[0] + u[1]*Fu[1] + u[2]*Fu[2] + /* *NOPAD* */
430                                    k*grad_T[0], /* *NOPAD* */
431                                    u[0]*Fu[1] + u[1]*Fu[3] + u[2]*Fu[4] + /* *NOPAD* */
432                                    k*grad_T[1], /* *NOPAD* */
433                                    u[0]*Fu[2] + u[1]*Fu[4] + u[2]*Fu[5] + /* *NOPAD* */
434                                    k*grad_T[2] /* *NOPAD* */
435                                   };
436     // Pressure
437     const CeedScalar
438     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
439     E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
440     E_internal  = E - E_kinetic - E_potential,
441     P           = E_internal * (gamma - 1.); // P = pressure
442 
443     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
444     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
445     computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
446 
447     // dqdx collects drhodx, dUdx and dEdx in one vector
448     CeedScalar dqdx[5][3];
449     for (CeedInt j=0; j<3; j++) {
450       dqdx[0][j] = drhodx[j];
451       dqdx[4][j] = dEdx[j];
452       for (CeedInt k=0; k<3; k++)
453         dqdx[k+1][j] = dUdx[k][j];
454     }
455 
456     // strong_conv = dF/dq * dq/dx    (Strong convection)
457     CeedScalar strong_conv[5] = {0};
458     for (CeedInt j=0; j<3; j++)
459       for (CeedInt k=0; k<5; k++)
460         for (CeedInt l=0; l<5; l++)
461           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
462 
463     // Body force
464     const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
465 
466     // The Physics
467     // Zero dv so all future terms can safely sum into it
468     for (CeedInt j=0; j<5; j++)
469       for (CeedInt k=0; k<3; k++)
470         dv[k][j][i] = 0;
471 
472     // -- Density
473     // ---- u rho
474     for (CeedInt j=0; j<3; j++)
475       dv[j][0][i]  += wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
476                              rho*u[2]*dXdx[j][2]);
477     // -- Momentum
478     // ---- rho (u x u) + P I3
479     for (CeedInt j=0; j<3; j++)
480       for (CeedInt k=0; k<3; k++)
481         dv[k][j+1][i]  += wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
482                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
483                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
484     // ---- Fuvisc
485     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
486     for (CeedInt j=0; j<3; j++)
487       for (CeedInt k=0; k<3; k++)
488         dv[k][j+1][i] -= wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
489                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
490                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
491     // -- Total Energy Density
492     // ---- (E + P) u
493     for (CeedInt j=0; j<3; j++)
494       dv[j][4][i]  += wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
495                                          u[2]*dXdx[j][2]);
496     // ---- Fevisc
497     for (CeedInt j=0; j<3; j++)
498       dv[j][4][i] -= wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
499                               Fe[2]*dXdx[j][2]);
500     // Body Force
501     for (CeedInt j=0; j<5; j++)
502       v[j][i] = wdetJ * body_force[j];
503 
504     // Spatial Stabilization
505     // -- Not used in favor of diagonal tau. Kept for future testing
506     // const CeedScalar sound_speed = sqrt(gamma * P / rho);
507     // CeedScalar Tau_x[3] = {0.};
508     // Tau_spatial(Tau_x, dXdx, u, sound_speed, context->c_tau, mu);
509 
510     // -- Stabilization method: none, SU, or SUPG
511     CeedScalar stab[5][3] = {{0.}};
512     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
513     CeedScalar Tau_d[3] = {0.};
514     switch (context->stabilization) {
515     case STAB_NONE:        // Galerkin
516       break;
517     case STAB_SU:        // SU
518       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
519       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
520       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
521       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
522       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
523       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
524       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
525                                   tau_strong_conv_conservative);
526       for (CeedInt j=0; j<3; j++)
527         for (CeedInt k=0; k<5; k++)
528           for (CeedInt l=0; l<5; l++)
529             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
530 
531       for (CeedInt j=0; j<5; j++)
532         for (CeedInt k=0; k<3; k++)
533           dv[k][j][i] -= wdetJ*(stab[j][0] * dXdx[k][0] +
534                                 stab[j][1] * dXdx[k][1] +
535                                 stab[j][2] * dXdx[k][2]);
536       break;
537     case STAB_SUPG:        // SUPG is not implemented for explicit scheme
538       break;
539     }
540 
541   } // End Quadrature Point Loop
542 
543   // Return
544   return 0;
545 }
546 
547 // *****************************************************************************
548 // This QFunction implements the Navier-Stokes equations (mentioned above) with
549 //   implicit time stepping method
550 //
551 //  SU   = Galerkin + grad(v) . ( Ai^T * Tau * (Aj q,j) )
552 //  SUPG = Galerkin + grad(v) . ( Ai^T * Tau * (q_dot + Aj q,j - body force) )
553 //                                       (diffussive terms will be added later)
554 //
555 // *****************************************************************************
556 CEED_QFUNCTION(IFunction_Newtonian)(void *ctx, CeedInt Q,
557                                     const CeedScalar *const *in,
558                                     CeedScalar *const *out) {
559   // *INDENT-OFF*
560   // Inputs
561   const CeedScalar (*q)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[0],
562                    (*dq)[5][CEED_Q_VLA] = (const CeedScalar(*)[5][CEED_Q_VLA])in[1],
563                    (*q_dot)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[2],
564                    (*q_data)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[3],
565                    (*x)[CEED_Q_VLA] = (const CeedScalar(*)[CEED_Q_VLA])in[4];
566   // Outputs
567   CeedScalar (*v)[CEED_Q_VLA] = (CeedScalar(*)[CEED_Q_VLA])out[0],
568              (*dv)[5][CEED_Q_VLA] = (CeedScalar(*)[5][CEED_Q_VLA])out[1];
569   // *INDENT-ON*
570   // Context
571   NewtonianIdealGasContext context = (NewtonianIdealGasContext)ctx;
572   const CeedScalar lambda = context->lambda;
573   const CeedScalar mu     = context->mu;
574   const CeedScalar k      = context->k;
575   const CeedScalar cv     = context->cv;
576   const CeedScalar cp     = context->cp;
577   const CeedScalar *g     = context->g;
578   const CeedScalar dt     = context->dt;
579   const CeedScalar gamma  = cp / cv;
580   const CeedScalar Rd     = cp-cv;
581 
582   CeedPragmaSIMD
583   // Quadrature Point Loop
584   for (CeedInt i=0; i<Q; i++) {
585     // Setup
586     // -- Interp in
587     const CeedScalar rho        =   q[0][i];
588     const CeedScalar u[3]       =  {q[1][i] / rho,
589                                     q[2][i] / rho,
590                                     q[3][i] / rho
591                                    };
592     const CeedScalar E          =   q[4][i];
593     // -- Grad in
594     const CeedScalar drho[3]    =  {dq[0][0][i],
595                                     dq[1][0][i],
596                                     dq[2][0][i]
597                                    };
598     // *INDENT-OFF*
599     const CeedScalar dU[3][3]   = {{dq[0][1][i],
600                                     dq[1][1][i],
601                                     dq[2][1][i]},
602                                    {dq[0][2][i],
603                                     dq[1][2][i],
604                                     dq[2][2][i]},
605                                    {dq[0][3][i],
606                                     dq[1][3][i],
607                                     dq[2][3][i]}
608                                   };
609     // *INDENT-ON*
610     const CeedScalar dE[3]      =  {dq[0][4][i],
611                                     dq[1][4][i],
612                                     dq[2][4][i]
613                                    };
614     // -- Interp-to-Interp q_data
615     const CeedScalar wdetJ      =   q_data[0][i];
616     // -- Interp-to-Grad q_data
617     // ---- Inverse of change of coordinate matrix: X_i,j
618     // *INDENT-OFF*
619     const CeedScalar dXdx[3][3] = {{q_data[1][i],
620                                     q_data[2][i],
621                                     q_data[3][i]},
622                                    {q_data[4][i],
623                                     q_data[5][i],
624                                     q_data[6][i]},
625                                    {q_data[7][i],
626                                     q_data[8][i],
627                                     q_data[9][i]}
628                                   };
629     const CeedScalar x_i[3]     = {x[0][i], x[1][i], x[2][i]};
630     // *INDENT-ON*
631     // -- Grad-to-Grad q_data
632     // dU/dx
633     CeedScalar du[3][3] = {{0}};
634     CeedScalar drhodx[3] = {0};
635     CeedScalar dEdx[3] = {0};
636     CeedScalar dUdx[3][3] = {{0}};
637     CeedScalar dXdxdXdxT[3][3] = {{0}};
638     for (CeedInt j=0; j<3; j++) {
639       for (CeedInt k=0; k<3; k++) {
640         du[j][k] = (dU[j][k] - drho[k]*u[j]) / rho;
641         drhodx[j] += drho[k] * dXdx[k][j];
642         dEdx[j] += dE[k] * dXdx[k][j];
643         for (CeedInt l=0; l<3; l++) {
644           dUdx[j][k] += dU[j][l] * dXdx[l][k];
645           dXdxdXdxT[j][k] += dXdx[j][l]*dXdx[k][l];  //dXdx_j,k * dXdx_k,j
646         }
647       }
648     }
649     CeedScalar dudx[3][3] = {{0}};
650     for (CeedInt j=0; j<3; j++)
651       for (CeedInt k=0; k<3; k++)
652         for (CeedInt l=0; l<3; l++)
653           dudx[j][k] += du[j][l] * dXdx[l][k];
654     // -- grad_T
655     const CeedScalar grad_T[3]  = {(dEdx[0]/rho - E*drhodx[0]/(rho*rho) - /* *NOPAD* */
656                                     (u[0]*dudx[0][0] + u[1]*dudx[1][0] + u[2]*dudx[2][0]) + g[0])/cv,
657                                    (dEdx[1]/rho - E*drhodx[1]/(rho*rho) - /* *NOPAD* */
658                                     (u[0]*dudx[0][1] + u[1]*dudx[1][1] + u[2]*dudx[2][1]) + g[1])/cv,
659                                    (dEdx[2]/rho - E*drhodx[2]/(rho*rho) - /* *NOPAD* */
660                                     (u[0]*dudx[0][2] + u[1]*dudx[1][2] + u[2]*dudx[2][2]) + g[2])/cv
661                                   };
662     // -- Fuvisc
663     // ---- Symmetric 3x3 matrix
664     const CeedScalar Fu[6]     =  {mu*(dudx[0][0] * (2 + lambda) + /* *NOPAD* */
665                                        lambda * (dudx[1][1] + dudx[2][2])),
666                                    mu*(dudx[0][1] + dudx[1][0]), /* *NOPAD* */
667                                    mu*(dudx[0][2] + dudx[2][0]), /* *NOPAD* */
668                                    mu*(dudx[1][1] * (2 + lambda) + /* *NOPAD* */
669                                        lambda * (dudx[0][0] + dudx[2][2])),
670                                    mu*(dudx[1][2] + dudx[2][1]), /* *NOPAD* */
671                                    mu*(dudx[2][2] * (2 + lambda) + /* *NOPAD* */
672                                        lambda * (dudx[0][0] + dudx[1][1]))
673                                   };
674     // -- Fevisc
675     const CeedScalar Fe[3]     =  {u[0]*Fu[0] + u[1]*Fu[1] + u[2]*Fu[2] + /* *NOPAD* */
676                                    k*grad_T[0], /* *NOPAD* */
677                                    u[0]*Fu[1] + u[1]*Fu[3] + u[2]*Fu[4] + /* *NOPAD* */
678                                    k*grad_T[1], /* *NOPAD* */
679                                    u[0]*Fu[2] + u[1]*Fu[4] + u[2]*Fu[5] + /* *NOPAD* */
680                                    k*grad_T[2] /* *NOPAD* */
681                                   };
682     // Pressure
683     const CeedScalar
684     E_kinetic   = 0.5 * rho * (u[0]*u[0] + u[1]*u[1] + u[2]*u[2]),
685     E_potential = -rho*(g[0]*x_i[0] + g[1]*x_i[1] + g[2]*x_i[2]),
686     E_internal  = E - E_kinetic - E_potential,
687     P           = E_internal * (gamma - 1.); // P = pressure
688 
689     // jacob_F_conv[3][5][5] = dF(convective)/dq at each direction
690     CeedScalar jacob_F_conv[3][5][5] = {{{0.}}};
691     computeFluxJacobian_NS(jacob_F_conv, rho, u, E, gamma, g, x_i);
692 
693     // dqdx collects drhodx, dUdx and dEdx in one vector
694     CeedScalar dqdx[5][3];
695     for (CeedInt j=0; j<3; j++) {
696       dqdx[0][j] = drhodx[j];
697       dqdx[4][j] = dEdx[j];
698       for (CeedInt k=0; k<3; k++)
699         dqdx[k+1][j] = dUdx[k][j];
700     }
701     // strong_conv = dF/dq * dq/dx    (Strong convection)
702     CeedScalar strong_conv[5] = {0};
703     for (CeedInt j=0; j<3; j++)
704       for (CeedInt k=0; k<5; k++)
705         for (CeedInt l=0; l<5; l++)
706           strong_conv[k] += jacob_F_conv[j][k][l] * dqdx[l][j];
707 
708     // Body force
709     const CeedScalar body_force[5] = {0, rho *g[0], rho *g[1], rho *g[2], 0};
710 
711     // Strong residual
712     CeedScalar strong_res[5];
713     for (CeedInt j=0; j<5; j++)
714       strong_res[j] = q_dot[j][i] + strong_conv[j] - body_force[j];
715 
716     // The Physics
717     //-----mass matrix
718     for (CeedInt j=0; j<5; j++)
719       v[j][i] = wdetJ*q_dot[j][i];
720 
721     // Zero dv so all future terms can safely sum into it
722     for (CeedInt j=0; j<5; j++)
723       for (CeedInt k=0; k<3; k++)
724         dv[k][j][i] = 0;
725 
726     // -- Density
727     // ---- u rho
728     for (CeedInt j=0; j<3; j++)
729       dv[j][0][i]  -= wdetJ*(rho*u[0]*dXdx[j][0] + rho*u[1]*dXdx[j][1] +
730                              rho*u[2]*dXdx[j][2]);
731     // -- Momentum
732     // ---- rho (u x u) + P I3
733     for (CeedInt j=0; j<3; j++)
734       for (CeedInt k=0; k<3; k++)
735         dv[k][j+1][i]  -= wdetJ*((rho*u[j]*u[0] + (j==0?P:0))*dXdx[k][0] +
736                                  (rho*u[j]*u[1] + (j==1?P:0))*dXdx[k][1] +
737                                  (rho*u[j]*u[2] + (j==2?P:0))*dXdx[k][2]);
738     // ---- Fuvisc
739     const CeedInt Fuviscidx[3][3] = {{0, 1, 2}, {1, 3, 4}, {2, 4, 5}}; // symmetric matrix indices
740     for (CeedInt j=0; j<3; j++)
741       for (CeedInt k=0; k<3; k++)
742         dv[k][j+1][i] += wdetJ*(Fu[Fuviscidx[j][0]]*dXdx[k][0] +
743                                 Fu[Fuviscidx[j][1]]*dXdx[k][1] +
744                                 Fu[Fuviscidx[j][2]]*dXdx[k][2]);
745     // -- Total Energy Density
746     // ---- (E + P) u
747     for (CeedInt j=0; j<3; j++)
748       dv[j][4][i]  -= wdetJ * (E + P) * (u[0]*dXdx[j][0] + u[1]*dXdx[j][1] +
749                                          u[2]*dXdx[j][2]);
750     // ---- Fevisc
751     for (CeedInt j=0; j<3; j++)
752       dv[j][4][i] += wdetJ * (Fe[0]*dXdx[j][0] + Fe[1]*dXdx[j][1] +
753                               Fe[2]*dXdx[j][2]);
754     // Body Force
755     for (CeedInt j=0; j<5; j++)
756       v[j][i] -= wdetJ*body_force[j];
757 
758     // Spatial Stabilization
759     // -- Not used in favor of diagonal tau. Kept for future testing
760     // const CeedScalar sound_speed = sqrt(gamma * P / rho);
761     // CeedScalar Tau_x[3] = {0.};
762     // Tau_spatial(Tau_x, dXdx, u, sound_speed, c_tau, mu);
763 
764     // -- Stabilization method: none, SU, or SUPG
765     CeedScalar stab[5][3] = {{0.}};
766     CeedScalar tau_strong_res[5] = {0.}, tau_strong_res_conservative[5] = {0};
767     CeedScalar tau_strong_conv[5] = {0.}, tau_strong_conv_conservative[5] = {0};
768     CeedScalar Tau_d[3] = {0.};
769     switch (context->stabilization) {
770     case STAB_NONE:        // Galerkin
771       break;
772     case STAB_SU:        // SU
773       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
774       tau_strong_conv[0] = Tau_d[0] * strong_conv[0];
775       tau_strong_conv[1] = Tau_d[1] * strong_conv[1];
776       tau_strong_conv[2] = Tau_d[1] * strong_conv[2];
777       tau_strong_conv[3] = Tau_d[1] * strong_conv[3];
778       tau_strong_conv[4] = Tau_d[2] * strong_conv[4];
779       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_conv,
780                                   tau_strong_conv_conservative);
781       for (CeedInt j=0; j<3; j++)
782         for (CeedInt k=0; k<5; k++)
783           for (CeedInt l=0; l<5; l++)
784             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_conv_conservative[l];
785 
786       for (CeedInt j=0; j<5; j++)
787         for (CeedInt k=0; k<3; k++)
788           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
789                                 stab[j][1] * dXdx[k][1] +
790                                 stab[j][2] * dXdx[k][2]);
791       break;
792     case STAB_SUPG:        // SUPG
793       Tau_diagPrim(Tau_d, dXdx, u, cv, context, mu, dt, rho);
794       tau_strong_res[0] = Tau_d[0] * strong_res[0];
795       tau_strong_res[1] = Tau_d[1] * strong_res[1];
796       tau_strong_res[2] = Tau_d[1] * strong_res[2];
797       tau_strong_res[3] = Tau_d[1] * strong_res[3];
798       tau_strong_res[4] = Tau_d[2] * strong_res[4];
799 // Alternate route (useful later with primitive variable code)
800 // this function was verified against PHASTA for as IC that was as close as possible
801 //    computeFluxJacobian_NSp(jacob_F_conv_p, rho, u, E, Rd, cv);
802 // it has also been verified to compute a correct through the following
803 //   stab[k][j] += jacob_F_conv_p[j][k][l] * tau_strong_res[l] // flux Jacobian wrt primitive
804 // applied in the triple loop below
805 //  However, it is more flops than using the existing Jacobian wrt q after q_{,Y} viz
806       PrimitiveToConservative_fwd(rho, u, E, Rd, cv, tau_strong_res,
807                                   tau_strong_res_conservative);
808       for (CeedInt j=0; j<3; j++)
809         for (CeedInt k=0; k<5; k++)
810           for (CeedInt l=0; l<5; l++)
811             stab[k][j] += jacob_F_conv[j][k][l] * tau_strong_res_conservative[l];
812 
813       for (CeedInt j=0; j<5; j++)
814         for (CeedInt k=0; k<3; k++)
815           dv[k][j][i] += wdetJ*(stab[j][0] * dXdx[k][0] +
816                                 stab[j][1] * dXdx[k][1] +
817                                 stab[j][2] * dXdx[k][2]);
818       break;
819     }
820 
821   } // End Quadrature Point Loop
822 
823   // Return
824   return 0;
825 }
826 // *****************************************************************************
827 #endif // newtonian_h
828