xref: /libCEED/examples/fluids/navierstokes.c (revision d3630711bf9eeb1e21a8a5563055877605d64a80)
1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3 // reserved. See files LICENSE and NOTICE for details.
4 //
5 // This file is part of CEED, a collection of benchmarks, miniapps, software
6 // libraries and APIs for efficient high-order finite element and spectral
7 // element discretizations for exascale applications. For more information and
8 // source code availability see http://github.com/ceed.
9 //
10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
11 // a collaborative effort of two U.S. Department of Energy organizations (Office
12 // of Science and the National Nuclear Security Administration) responsible for
13 // the planning and preparation of a capable exascale ecosystem, including
14 // software, applications, hardware, advanced system engineering and early
15 // testbed platforms, in support of the nation's exascale computing imperative.
16 
17 //                        libCEED + PETSc Example: Navier-Stokes
18 //
19 // This example demonstrates a simple usage of libCEED with PETSc to solve a
20 // Navier-Stokes problem.
21 //
22 // The code is intentionally "raw", using only low-level communication
23 // primitives.
24 //
25 // Build with:
26 //
27 //     make [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] navierstokes
28 //
29 // Sample runs:
30 //
31 //     ./navierstokes -ceed /cpu/self -problem density_current -degree 1
32 //     ./navierstokes -ceed /gpu/occa -problem advection -degree 1
33 //
34 //TESTARGS -ceed {ceed_resource} -test explicit -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3
35 //TESTARGS -ceed {ceed_resource} -test implicit_stab_none -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha
36 //TESTARGS -ceed {ceed_resource} -test implicit_stab_supg -degree 3 -dm_plex_box_faces 1,1,2 -units_kilogram 1e-9 -lx 125 -ly 125 -lz 250 -center 62.5,62.5,187.5 -rc 100. -thetaC -35. -ksp_atol 1e-4 -ksp_rtol 1e-3 -ksp_type bcgs -snes_atol 1e-3 -snes_lag_jacobian 100 -snes_lag_jacobian_persists -snes_mf_operator -ts_dt 1e-3 -implicit -ts_type alpha -stab supg
37 
38 /// @file
39 /// Navier-Stokes example using PETSc
40 
41 const char help[] = "Solve Navier-Stokes using PETSc and libCEED\n";
42 
43 #include <petscts.h>
44 #include <petscdmplex.h>
45 #include <ceed.h>
46 #include <stdbool.h>
47 #include <petscsys.h>
48 #include "common.h"
49 #include "setup-boundary.h"
50 #include "advection.h"
51 #include "advection2d.h"
52 #include "densitycurrent.h"
53 
54 #if PETSC_VERSION_LT(3,14,0)
55 #  define DMPlexGetClosureIndices(a,b,c,d,e,f,g,h,i) DMPlexGetClosureIndices(a,b,c,d,f,g,i)
56 #  define DMPlexRestoreClosureIndices(a,b,c,d,e,f,g,h,i) DMPlexRestoreClosureIndices(a,b,c,d,f,g,i)
57 #endif
58 
59 // MemType Options
60 static const char *const memTypes[] = {
61   "host",
62   "device",
63   "memType", "CEED_MEM_", NULL
64 };
65 
66 // Problem Options
67 typedef enum {
68   NS_DENSITY_CURRENT = 0,
69   NS_ADVECTION = 1,
70   NS_ADVECTION2D = 2,
71 } problemType;
72 static const char *const problemTypes[] = {
73   "density_current",
74   "advection",
75   "advection2d",
76   "problemType", "NS_", NULL
77 };
78 
79 typedef enum {
80   STAB_NONE = 0,
81   STAB_SU = 1,   // Streamline Upwind
82   STAB_SUPG = 2, // Streamline Upwind Petrov-Galerkin
83 } StabilizationType;
84 static const char *const StabilizationTypes[] = {
85   "none",
86   "SU",
87   "SUPG",
88   "StabilizationType", "STAB_", NULL
89 };
90 
91 // Test Options
92 typedef enum {
93   TEST_NONE = 0,               // Non test mode
94   TEST_EXPLICIT = 1,           // Explicit test
95   TEST_IMPLICIT_STAB_NONE = 2, // Implicit test no stab
96   TEST_IMPLICIT_STAB_SUPG = 3, // Implicit test supg stab
97 } testType;
98 static const char *const testTypes[] = {
99   "none",
100   "explicit",
101   "implicit_stab_none",
102   "implicit_stab_supg",
103   "testType", "TEST_", NULL
104 };
105 
106 // Tests specific data
107 typedef struct {
108   PetscScalar testtol;
109   const char *filepath;
110 } testData;
111 
112 testData testOptions[] = {
113   [TEST_NONE] = {
114     .testtol = 0.,
115     .filepath = NULL
116   },
117   [TEST_EXPLICIT] = {
118     .testtol = 1E-5,
119     .filepath = "examples/fluids/tests-output/fluids-navierstokes-explicit.bin"
120   },
121   [TEST_IMPLICIT_STAB_NONE] = {
122     .testtol = 5E-4,
123     .filepath = "examples/fluids/tests-output/fluids-navierstokes-implicit-stab-none.bin"
124   },
125   [TEST_IMPLICIT_STAB_SUPG] = {
126     .testtol = 5E-4,
127     .filepath = "examples/fluids/tests-output/fluids-navierstokes-implicit-stab-supg.bin"
128   }
129 };
130 
131 // Problem specific data
132 typedef struct {
133   CeedInt dim, qdatasizeVol, qdatasizeSur;
134   CeedQFunctionUser setupVol, setupSur, ics, applyVol_rhs, applySur_rhs,
135                     applyVol_ifunction, applySur_ifunction;
136   PetscErrorCode (*bc)(PetscInt, PetscReal, const PetscReal[], PetscInt,
137                        PetscScalar[], void *);
138   const char *setupVol_loc, *setupSur_loc, *ics_loc, *applyVol_rhs_loc,
139              *applySur_rhs_loc, *applyVol_ifunction_loc, *applySur_ifunction_loc;
140   const bool non_zero_time;
141 } problemData;
142 
143 problemData problemOptions[] = {
144   [NS_DENSITY_CURRENT] = {
145     .dim                       = 3,
146     .qdatasizeVol              = 10,
147     .qdatasizeSur              = 4,
148     .setupVol                  = Setup,
149     .setupVol_loc              = Setup_loc,
150     .setupSur                  = SetupBoundary,
151     .setupSur_loc              = SetupBoundary_loc,
152     .ics                       = ICsDC,
153     .ics_loc                   = ICsDC_loc,
154     .applyVol_rhs              = DC,
155     .applyVol_rhs_loc          = DC_loc,
156   //.applySur_rhs              = DC_Sur,
157   //.applySur_rhs_loc          = DC_Sur_loc,
158     .applyVol_ifunction        = IFunction_DC,
159     .applyVol_ifunction_loc    = IFunction_DC_loc,
160   //.applySur_ifunction        = IFunction_DC_Sur,
161   //.applySur_ifunction_loc    = IFunction_DC_Sur_loc,
162     .bc                        = Exact_DC,
163     .non_zero_time             = PETSC_FALSE,
164   },
165   [NS_ADVECTION] = {
166     .dim                       = 3,
167     .qdatasizeVol              = 10,
168     .qdatasizeSur              = 4,
169     .setupVol                  = Setup,
170     .setupVol_loc              = Setup_loc,
171     .setupSur                  = SetupBoundary,
172     .setupSur_loc              = SetupBoundary_loc,
173     .ics                       = ICsAdvection,
174     .ics_loc                   = ICsAdvection_loc,
175     .applyVol_rhs              = Advection,
176     .applyVol_rhs_loc          = Advection_loc,
177     .applySur_rhs              = Advection_Sur,
178     .applySur_rhs_loc          = Advection_Sur_loc,
179     .applyVol_ifunction        = IFunction_Advection,
180     .applyVol_ifunction_loc    = IFunction_Advection_loc,
181   //.applySur_ifunction        = IFunction_Advection_Sur,
182   //.applySur_ifunction_loc    = IFunction_Advection_Sur_loc,
183     .bc                        = Exact_Advection,
184     .non_zero_time             = PETSC_FALSE,
185   },
186   [NS_ADVECTION2D] = {
187     .dim                       = 2,
188     .qdatasizeVol              = 5,
189     .qdatasizeSur              = 3,
190     .setupVol                  = Setup2d,
191     .setupVol_loc              = Setup2d_loc,
192     .setupSur                  = SetupBoundary2d,
193     .setupSur_loc              = SetupBoundary2d_loc,
194     .ics                       = ICsAdvection2d,
195     .ics_loc                   = ICsAdvection2d_loc,
196     .applyVol_rhs              = Advection2d,
197     .applyVol_rhs_loc          = Advection2d_loc,
198     .applySur_rhs              = Advection2d_Sur,
199     .applySur_rhs_loc          = Advection2d_Sur_loc,
200     .applyVol_ifunction        = IFunction_Advection2d,
201     .applyVol_ifunction_loc    = IFunction_Advection2d_loc,
202   //.applySur_ifunction        = IFunction_Advection2d_Sur,
203   //.applySur_ifunction_loc    = IFunction_Advection2d_Sur_loc,
204     .bc                        = Exact_Advection2d,
205     .non_zero_time             = PETSC_TRUE,
206   },
207 };
208 
209 // PETSc user data
210 typedef struct User_ *User;
211 typedef struct Units_ *Units;
212 
213 struct User_ {
214   MPI_Comm comm;
215   PetscInt outputfreq;
216   DM dm;
217   DM dmviz;
218   Mat interpviz;
219   Ceed ceed;
220   Units units;
221   CeedVector qceed, qdotceed, gceed;
222   CeedOperator op_rhs_vol, op_rhs_sur[6], op_rhs,
223                op_ifunction_vol, op_ifunction_sur[6], op_ifunction;
224   Vec M;
225   char outputfolder[PETSC_MAX_PATH_LEN];
226   PetscInt contsteps;
227 };
228 
229 struct Units_ {
230   // fundamental units
231   PetscScalar meter;
232   PetscScalar kilogram;
233   PetscScalar second;
234   PetscScalar Kelvin;
235   // derived units
236   PetscScalar Pascal;
237   PetscScalar JperkgK;
238   PetscScalar mpersquareds;
239   PetscScalar WpermK;
240   PetscScalar kgpercubicm;
241   PetscScalar kgpersquaredms;
242   PetscScalar Joulepercubicm;
243 };
244 
245 typedef struct SimpleBC_ *SimpleBC;
246 struct SimpleBC_ {
247   PetscInt nwall, nslip[3], noutflow;
248   PetscInt walls[6], slips[3][6], outflow[6];
249   PetscBool userbc;
250 };
251 
252 // Essential BC dofs are encoded in closure indices as -(i+1).
253 static PetscInt Involute(PetscInt i) {
254   return i >= 0 ? i : -(i+1);
255 }
256 
257 // Utility function to create local CEED restriction
258 static PetscErrorCode CreateRestrictionFromPlex(Ceed ceed, DM dm, CeedInt P,
259     CeedInt height, DMLabel domainLabel, CeedInt value,
260     CeedElemRestriction *Erestrict) {
261 
262   PetscSection   section;
263   PetscInt       p, Nelem, Ndof, *erestrict, eoffset, nfields, dim,
264                  depth;
265   DMLabel        depthLabel;
266   IS             depthIS, iterIS;
267   Vec            Uloc;
268   const PetscInt *iterIndices;
269   PetscErrorCode ierr;
270 
271   PetscFunctionBeginUser;
272   ierr = DMGetDimension(dm, &dim); CHKERRQ(ierr);
273   dim -= height;
274   ierr = DMGetLocalSection(dm, &section); CHKERRQ(ierr);
275   ierr = PetscSectionGetNumFields(section, &nfields); CHKERRQ(ierr);
276   PetscInt ncomp[nfields], fieldoff[nfields+1];
277   fieldoff[0] = 0;
278   for (PetscInt f=0; f<nfields; f++) {
279     ierr = PetscSectionGetFieldComponents(section, f, &ncomp[f]); CHKERRQ(ierr);
280     fieldoff[f+1] = fieldoff[f] + ncomp[f];
281   }
282 
283   ierr = DMPlexGetDepth(dm, &depth); CHKERRQ(ierr);
284   ierr = DMPlexGetDepthLabel(dm, &depthLabel); CHKERRQ(ierr);
285   ierr = DMLabelGetStratumIS(depthLabel, depth - height, &depthIS); CHKERRQ(ierr);
286   if (domainLabel) {
287     IS domainIS;
288     ierr = DMLabelGetStratumIS(domainLabel, value, &domainIS); CHKERRQ(ierr);
289     ierr = ISIntersect(depthIS, domainIS, &iterIS); CHKERRQ(ierr);
290     ierr = ISDestroy(&domainIS); CHKERRQ(ierr);
291     ierr = ISDestroy(&depthIS); CHKERRQ(ierr);
292   } else {
293     iterIS = depthIS;
294   }
295   ierr = ISGetLocalSize(iterIS, &Nelem); CHKERRQ(ierr);
296   ierr = ISGetIndices(iterIS, &iterIndices); CHKERRQ(ierr);
297   ierr = PetscMalloc1(Nelem*PetscPowInt(P, dim), &erestrict); CHKERRQ(ierr);
298   for (p=0,eoffset=0; p<Nelem; p++) {
299     PetscInt c = iterIndices[p];
300     PetscInt numindices, *indices, nnodes;
301     ierr = DMPlexGetClosureIndices(dm, section, section, c, PETSC_TRUE,
302                                    &numindices, &indices, NULL, NULL);
303     CHKERRQ(ierr);
304     if (numindices % fieldoff[nfields]) SETERRQ1(PETSC_COMM_SELF,
305           PETSC_ERR_ARG_INCOMP, "Number of closure indices not compatible with Cell %D",
306           c);
307     nnodes = numindices / fieldoff[nfields];
308     for (PetscInt i=0; i<nnodes; i++) {
309       // Check that indices are blocked by node and thus can be coalesced as a single field with
310       // fieldoff[nfields] = sum(ncomp) components.
311       for (PetscInt f=0; f<nfields; f++) {
312         for (PetscInt j=0; j<ncomp[f]; j++) {
313           if (Involute(indices[fieldoff[f]*nnodes + i*ncomp[f] + j])
314               != Involute(indices[i*ncomp[0]]) + fieldoff[f] + j)
315             SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,
316                      "Cell %D closure indices not interlaced for node %D field %D component %D",
317                      c, i, f, j);
318         }
319       }
320       // Essential boundary conditions are encoded as -(loc+1), but we don't care so we decode.
321       PetscInt loc = Involute(indices[i*ncomp[0]]);
322       erestrict[eoffset++] = loc;
323     }
324     ierr = DMPlexRestoreClosureIndices(dm, section, section, c, PETSC_TRUE,
325                                        &numindices, &indices, NULL, NULL);
326     CHKERRQ(ierr);
327   }
328   if (eoffset != Nelem*PetscPowInt(P, dim))
329     SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_LIB,
330              "ElemRestriction of size (%D,%D) initialized %D nodes", Nelem,
331              PetscPowInt(P, dim),eoffset);
332   ierr = ISRestoreIndices(iterIS, &iterIndices); CHKERRQ(ierr);
333   ierr = ISDestroy(&iterIS); CHKERRQ(ierr);
334 
335   ierr = DMGetLocalVector(dm, &Uloc); CHKERRQ(ierr);
336   ierr = VecGetLocalSize(Uloc, &Ndof); CHKERRQ(ierr);
337   ierr = DMRestoreLocalVector(dm, &Uloc); CHKERRQ(ierr);
338   CeedElemRestrictionCreate(ceed, Nelem, PetscPowInt(P, dim), fieldoff[nfields],
339                             1, Ndof, CEED_MEM_HOST, CEED_COPY_VALUES, erestrict,
340                             Erestrict);
341   ierr = PetscFree(erestrict); CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 // Utility function to get Ceed Restriction for each domain
346 static PetscErrorCode GetRestrictionForDomain(Ceed ceed, DM dm, CeedInt ncompx, CeedInt dim,
347     CeedInt height, DMLabel domainLabel, PetscInt value, CeedInt P, CeedInt Q,
348     CeedInt qdatasize, CeedElemRestriction *restrictq,
349     CeedElemRestriction *restrictx, CeedElemRestriction *restrictqdi) {
350 
351   DM dmcoord;
352   CeedInt localNelem;
353   CeedInt Qdim = CeedIntPow(Q, dim);
354   PetscErrorCode ierr;
355 
356   PetscFunctionBeginUser;
357   ierr = DMGetCoordinateDM(dm, &dmcoord); CHKERRQ(ierr);
358   ierr = DMPlexSetClosurePermutationTensor(dmcoord, PETSC_DETERMINE, NULL);
359   CHKERRQ(ierr);
360   ierr = CreateRestrictionFromPlex(ceed, dm, P, height, domainLabel, value, restrictq);
361   CHKERRQ(ierr);
362   ierr = CreateRestrictionFromPlex(ceed, dmcoord, 2, height, domainLabel, value, restrictx);
363   CHKERRQ(ierr);
364   CeedElemRestrictionGetNumElements(*restrictq, &localNelem);
365   CeedElemRestrictionCreateStrided(ceed, localNelem, Qdim,
366                                    qdatasize, qdatasize*localNelem*Qdim,
367                                    CEED_STRIDES_BACKEND, restrictqdi);
368   PetscFunctionReturn(0);
369 }
370 
371 static int CreateVectorFromPetscVec(Ceed ceed, Vec p, CeedVector *v) {
372   PetscErrorCode ierr;
373   PetscInt m;
374 
375   PetscFunctionBeginUser;
376   ierr = VecGetLocalSize(p, &m); CHKERRQ(ierr);
377   ierr = CeedVectorCreate(ceed, m, v); CHKERRQ(ierr);
378   PetscFunctionReturn(0);
379 }
380 
381 static int VectorPlacePetscVec(CeedVector c, Vec p) {
382   PetscErrorCode ierr;
383   PetscInt mceed,mpetsc;
384   PetscScalar *a;
385 
386   PetscFunctionBeginUser;
387   ierr = CeedVectorGetLength(c, &mceed); CHKERRQ(ierr);
388   ierr = VecGetLocalSize(p, &mpetsc); CHKERRQ(ierr);
389   if (mceed != mpetsc) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,
390                                   "Cannot place PETSc Vec of length %D in CeedVector of length %D",
391                                   mpetsc, mceed);
392   ierr = VecGetArray(p, &a); CHKERRQ(ierr);
393   CeedVectorSetArray(c, CEED_MEM_HOST, CEED_USE_POINTER, a);
394   PetscFunctionReturn(0);
395 }
396 
397 static PetscErrorCode DMPlexInsertBoundaryValues_NS(DM dm,
398     PetscBool insertEssential, Vec Qloc, PetscReal time, Vec faceGeomFVM,
399     Vec cellGeomFVM, Vec gradFVM) {
400   PetscErrorCode ierr;
401   Vec Qbc;
402 
403   PetscFunctionBegin;
404   ierr = DMGetNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr);
405   ierr = VecAXPY(Qloc, 1., Qbc); CHKERRQ(ierr);
406   ierr = DMRestoreNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 // This is the RHS of the ODE, given as u_t = G(t,u)
411 // This function takes in a state vector Q and writes into G
412 static PetscErrorCode RHS_NS(TS ts, PetscReal t, Vec Q, Vec G, void *userData) {
413   PetscErrorCode ierr;
414   User user = *(User *)userData;
415   PetscScalar *q, *g;
416   Vec Qloc, Gloc;
417 
418   // Global-to-local
419   PetscFunctionBeginUser;
420   ierr = DMGetLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
421   ierr = DMGetLocalVector(user->dm, &Gloc); CHKERRQ(ierr);
422   ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
423   ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Qloc); CHKERRQ(ierr);
424   ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Qloc, 0.0,
425                                     NULL, NULL, NULL); CHKERRQ(ierr);
426   ierr = VecZeroEntries(Gloc); CHKERRQ(ierr);
427 
428   // Ceed Vectors
429   ierr = VecGetArrayRead(Qloc, (const PetscScalar **)&q); CHKERRQ(ierr);
430   ierr = VecGetArray(Gloc, &g); CHKERRQ(ierr);
431   CeedVectorSetArray(user->qceed, CEED_MEM_HOST, CEED_USE_POINTER, q);
432   CeedVectorSetArray(user->gceed, CEED_MEM_HOST, CEED_USE_POINTER, g);
433 
434   // Apply CEED operator
435   CeedOperatorApply(user->op_rhs, user->qceed, user->gceed,
436                     CEED_REQUEST_IMMEDIATE);
437 
438   // Restore vectors
439   ierr = VecRestoreArrayRead(Qloc, (const PetscScalar **)&q); CHKERRQ(ierr);
440   ierr = VecRestoreArray(Gloc, &g); CHKERRQ(ierr);
441 
442   ierr = VecZeroEntries(G); CHKERRQ(ierr);
443   ierr = DMLocalToGlobal(user->dm, Gloc, ADD_VALUES, G); CHKERRQ(ierr);
444 
445   // Inverse of the lumped mass matrix
446   ierr = VecPointwiseMult(G, G, user->M); // M is Minv
447   CHKERRQ(ierr);
448 
449   ierr = DMRestoreLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
450   ierr = DMRestoreLocalVector(user->dm, &Gloc); CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 static PetscErrorCode IFunction_NS(TS ts, PetscReal t, Vec Q, Vec Qdot, Vec G,
455                                    void *userData) {
456   PetscErrorCode ierr;
457   User user = *(User *)userData;
458   const PetscScalar *q, *qdot;
459   PetscScalar *g;
460   Vec Qloc, Qdotloc, Gloc;
461 
462   // Global-to-local
463   PetscFunctionBeginUser;
464   ierr = DMGetLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
465   ierr = DMGetLocalVector(user->dm, &Qdotloc); CHKERRQ(ierr);
466   ierr = DMGetLocalVector(user->dm, &Gloc); CHKERRQ(ierr);
467   ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
468   ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Qloc); CHKERRQ(ierr);
469   ierr = DMPlexInsertBoundaryValues(user->dm, PETSC_TRUE, Qloc, 0.0,
470                                     NULL, NULL, NULL); CHKERRQ(ierr);
471   ierr = VecZeroEntries(Qdotloc); CHKERRQ(ierr);
472   ierr = DMGlobalToLocal(user->dm, Qdot, INSERT_VALUES, Qdotloc); CHKERRQ(ierr);
473   ierr = VecZeroEntries(Gloc); CHKERRQ(ierr);
474 
475   // Ceed Vectors
476   ierr = VecGetArrayRead(Qloc, &q); CHKERRQ(ierr);
477   ierr = VecGetArrayRead(Qdotloc, &qdot); CHKERRQ(ierr);
478   ierr = VecGetArray(Gloc, &g); CHKERRQ(ierr);
479   CeedVectorSetArray(user->qceed, CEED_MEM_HOST, CEED_USE_POINTER,
480                      (PetscScalar *)q);
481   CeedVectorSetArray(user->qdotceed, CEED_MEM_HOST, CEED_USE_POINTER,
482                      (PetscScalar *)qdot);
483   CeedVectorSetArray(user->gceed, CEED_MEM_HOST, CEED_USE_POINTER, g);
484 
485   // Apply CEED operator
486   CeedOperatorApply(user->op_ifunction, user->qceed, user->gceed,
487                     CEED_REQUEST_IMMEDIATE);
488 
489   // Restore vectors
490   ierr = VecRestoreArrayRead(Qloc, &q); CHKERRQ(ierr);
491   ierr = VecRestoreArrayRead(Qdotloc, &qdot); CHKERRQ(ierr);
492   ierr = VecRestoreArray(Gloc, &g); CHKERRQ(ierr);
493 
494   ierr = VecZeroEntries(G); CHKERRQ(ierr);
495   ierr = DMLocalToGlobal(user->dm, Gloc, ADD_VALUES, G); CHKERRQ(ierr);
496 
497   ierr = DMRestoreLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
498   ierr = DMRestoreLocalVector(user->dm, &Qdotloc); CHKERRQ(ierr);
499   ierr = DMRestoreLocalVector(user->dm, &Gloc); CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 // User provided TS Monitor
504 static PetscErrorCode TSMonitor_NS(TS ts, PetscInt stepno, PetscReal time,
505                                    Vec Q, void *ctx) {
506   User user = ctx;
507   Vec Qloc;
508   char filepath[PETSC_MAX_PATH_LEN];
509   PetscViewer viewer;
510   PetscErrorCode ierr;
511 
512   // Set up output
513   PetscFunctionBeginUser;
514   // Print every 'outputfreq' steps
515   if (stepno % user->outputfreq != 0)
516     PetscFunctionReturn(0);
517   ierr = DMGetLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
518   ierr = PetscObjectSetName((PetscObject)Qloc, "StateVec"); CHKERRQ(ierr);
519   ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
520   ierr = DMGlobalToLocal(user->dm, Q, INSERT_VALUES, Qloc); CHKERRQ(ierr);
521 
522   // Output
523   ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-%03D.vtu",
524                        user->outputfolder, stepno + user->contsteps);
525   CHKERRQ(ierr);
526   ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Q), filepath,
527                             FILE_MODE_WRITE, &viewer); CHKERRQ(ierr);
528   ierr = VecView(Qloc, viewer); CHKERRQ(ierr);
529   ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
530   if (user->dmviz) {
531     Vec Qrefined, Qrefined_loc;
532     char filepath_refined[PETSC_MAX_PATH_LEN];
533     PetscViewer viewer_refined;
534 
535     ierr = DMGetGlobalVector(user->dmviz, &Qrefined); CHKERRQ(ierr);
536     ierr = DMGetLocalVector(user->dmviz, &Qrefined_loc); CHKERRQ(ierr);
537     ierr = PetscObjectSetName((PetscObject)Qrefined_loc, "Refined");
538     CHKERRQ(ierr);
539     ierr = MatInterpolate(user->interpviz, Q, Qrefined); CHKERRQ(ierr);
540     ierr = VecZeroEntries(Qrefined_loc); CHKERRQ(ierr);
541     ierr = DMGlobalToLocal(user->dmviz, Qrefined, INSERT_VALUES, Qrefined_loc);
542     CHKERRQ(ierr);
543     ierr = PetscSNPrintf(filepath_refined, sizeof filepath_refined,
544                          "%s/nsrefined-%03D.vtu",
545                          user->outputfolder, stepno + user->contsteps);
546     CHKERRQ(ierr);
547     ierr = PetscViewerVTKOpen(PetscObjectComm((PetscObject)Qrefined),
548                               filepath_refined,
549                               FILE_MODE_WRITE, &viewer_refined); CHKERRQ(ierr);
550     ierr = VecView(Qrefined_loc, viewer_refined); CHKERRQ(ierr);
551     ierr = DMRestoreLocalVector(user->dmviz, &Qrefined_loc); CHKERRQ(ierr);
552     ierr = DMRestoreGlobalVector(user->dmviz, &Qrefined); CHKERRQ(ierr);
553     ierr = PetscViewerDestroy(&viewer_refined); CHKERRQ(ierr);
554   }
555   ierr = DMRestoreLocalVector(user->dm, &Qloc); CHKERRQ(ierr);
556 
557   // Save data in a binary file for continuation of simulations
558   ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-solution.bin",
559                        user->outputfolder); CHKERRQ(ierr);
560   ierr = PetscViewerBinaryOpen(user->comm, filepath, FILE_MODE_WRITE, &viewer);
561   CHKERRQ(ierr);
562   ierr = VecView(Q, viewer); CHKERRQ(ierr);
563   ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
564 
565   // Save time stamp
566   // Dimensionalize time back
567   time /= user->units->second;
568   ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-time.bin",
569                        user->outputfolder); CHKERRQ(ierr);
570   ierr = PetscViewerBinaryOpen(user->comm, filepath, FILE_MODE_WRITE, &viewer);
571   CHKERRQ(ierr);
572   #if PETSC_VERSION_GE(3,13,0)
573   ierr = PetscViewerBinaryWrite(viewer, &time, 1, PETSC_REAL);
574   #else
575   ierr = PetscViewerBinaryWrite(viewer, &time, 1, PETSC_REAL, true);
576   #endif
577   CHKERRQ(ierr);
578   ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
579 
580   PetscFunctionReturn(0);
581 }
582 
583 static PetscErrorCode ICs_FixMultiplicity(CeedOperator op_ics,
584     CeedVector xcorners, CeedVector q0ceed, DM dm, Vec Qloc, Vec Q,
585     CeedElemRestriction restrictq, SetupContext ctxSetup, CeedScalar time) {
586   PetscErrorCode ierr;
587   CeedVector multlvec;
588   Vec Multiplicity, MultiplicityLoc;
589 
590   ctxSetup->time = time;
591   ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
592   ierr = VectorPlacePetscVec(q0ceed, Qloc); CHKERRQ(ierr);
593   CeedOperatorApply(op_ics, xcorners, q0ceed, CEED_REQUEST_IMMEDIATE);
594   ierr = VecZeroEntries(Q); CHKERRQ(ierr);
595   ierr = DMLocalToGlobal(dm, Qloc, ADD_VALUES, Q); CHKERRQ(ierr);
596 
597   // Fix multiplicity for output of ICs
598   ierr = DMGetLocalVector(dm, &MultiplicityLoc); CHKERRQ(ierr);
599   CeedElemRestrictionCreateVector(restrictq, &multlvec, NULL);
600   ierr = VectorPlacePetscVec(multlvec, MultiplicityLoc); CHKERRQ(ierr);
601   CeedElemRestrictionGetMultiplicity(restrictq, multlvec);
602   CeedVectorDestroy(&multlvec);
603   ierr = DMGetGlobalVector(dm, &Multiplicity); CHKERRQ(ierr);
604   ierr = VecZeroEntries(Multiplicity); CHKERRQ(ierr);
605   ierr = DMLocalToGlobal(dm, MultiplicityLoc, ADD_VALUES, Multiplicity);
606   CHKERRQ(ierr);
607   ierr = VecPointwiseDivide(Q, Q, Multiplicity); CHKERRQ(ierr);
608   ierr = VecPointwiseDivide(Qloc, Qloc, MultiplicityLoc); CHKERRQ(ierr);
609   ierr = DMRestoreLocalVector(dm, &MultiplicityLoc); CHKERRQ(ierr);
610   ierr = DMRestoreGlobalVector(dm, &Multiplicity); CHKERRQ(ierr);
611 
612   PetscFunctionReturn(0);
613 }
614 
615 static PetscErrorCode ComputeLumpedMassMatrix(Ceed ceed, DM dm,
616     CeedElemRestriction restrictq, CeedBasis basisq,
617     CeedElemRestriction restrictqdi, CeedVector qdata, Vec M) {
618   PetscErrorCode ierr;
619   CeedQFunction qf_mass;
620   CeedOperator op_mass;
621   CeedVector mceed;
622   Vec Mloc;
623   CeedInt ncompq, qdatasize;
624 
625   PetscFunctionBeginUser;
626   CeedElemRestrictionGetNumComponents(restrictq, &ncompq);
627   CeedElemRestrictionGetNumComponents(restrictqdi, &qdatasize);
628   // Create the Q-function that defines the action of the mass operator
629   CeedQFunctionCreateInterior(ceed, 1, Mass, Mass_loc, &qf_mass);
630   CeedQFunctionAddInput(qf_mass, "q", ncompq, CEED_EVAL_INTERP);
631   CeedQFunctionAddInput(qf_mass, "qdata", qdatasize, CEED_EVAL_NONE);
632   CeedQFunctionAddOutput(qf_mass, "v", ncompq, CEED_EVAL_INTERP);
633 
634   // Create the mass operator
635   CeedOperatorCreate(ceed, qf_mass, NULL, NULL, &op_mass);
636   CeedOperatorSetField(op_mass, "q", restrictq, basisq, CEED_VECTOR_ACTIVE);
637   CeedOperatorSetField(op_mass, "qdata", restrictqdi,
638                        CEED_BASIS_COLLOCATED, qdata);
639   CeedOperatorSetField(op_mass, "v", restrictq, basisq, CEED_VECTOR_ACTIVE);
640 
641   ierr = DMGetLocalVector(dm, &Mloc); CHKERRQ(ierr);
642   ierr = VecZeroEntries(Mloc); CHKERRQ(ierr);
643   CeedElemRestrictionCreateVector(restrictq, &mceed, NULL);
644   ierr = VectorPlacePetscVec(mceed, Mloc); CHKERRQ(ierr);
645 
646   {
647     // Compute a lumped mass matrix
648     CeedVector onesvec;
649     CeedElemRestrictionCreateVector(restrictq, &onesvec, NULL);
650     CeedVectorSetValue(onesvec, 1.0);
651     CeedOperatorApply(op_mass, onesvec, mceed, CEED_REQUEST_IMMEDIATE);
652     CeedVectorDestroy(&onesvec);
653     CeedOperatorDestroy(&op_mass);
654     CeedVectorDestroy(&mceed);
655   }
656   CeedQFunctionDestroy(&qf_mass);
657 
658   ierr = VecZeroEntries(M); CHKERRQ(ierr);
659   ierr = DMLocalToGlobal(dm, Mloc, ADD_VALUES, M); CHKERRQ(ierr);
660   ierr = DMRestoreLocalVector(dm, &Mloc); CHKERRQ(ierr);
661 
662   // Invert diagonally lumped mass vector for RHS function
663   ierr = VecReciprocal(M); CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 static PetscErrorCode SetUpDM(DM dm, problemData *problem, PetscInt degree,
668                               SimpleBC bc, void *ctxSetup) {
669   PetscErrorCode ierr;
670 
671   PetscFunctionBeginUser;
672   {
673     // Configure the finite element space and boundary conditions
674     PetscFE fe;
675     PetscInt ncompq = 5;
676     ierr = PetscFECreateLagrange(PETSC_COMM_SELF, problem->dim, ncompq,
677                                  PETSC_FALSE, degree, PETSC_DECIDE,
678                                  &fe); CHKERRQ(ierr);
679     ierr = PetscObjectSetName((PetscObject)fe, "Q"); CHKERRQ(ierr);
680     ierr = DMAddField(dm,NULL,(PetscObject)fe); CHKERRQ(ierr);
681     ierr = DMCreateDS(dm); CHKERRQ(ierr);
682     {
683       PetscInt comps[1] = {1};
684       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipx", "Face Sets", 0,
685                            1, comps, (void(*)(void))NULL, bc->nslip[0],
686                            bc->slips[0], ctxSetup); CHKERRQ(ierr);
687       comps[0] = 2;
688       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipy", "Face Sets", 0,
689                            1, comps, (void(*)(void))NULL, bc->nslip[1],
690                            bc->slips[1], ctxSetup); CHKERRQ(ierr);
691       comps[0] = 3;
692       ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "slipz", "Face Sets", 0,
693                            1, comps, (void(*)(void))NULL, bc->nslip[2],
694                            bc->slips[2], ctxSetup); CHKERRQ(ierr);
695     }
696     if (bc->userbc == PETSC_TRUE) {
697       for (PetscInt c = 0; c < 3; c++) {
698         for (PetscInt s = 0; s < bc->nslip[c]; s++) {
699           for (PetscInt w = 0; w < bc->nwall; w++) {
700             if (bc->slips[c][s] == bc->walls[w])
701               SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG,
702                        "Boundary condition already set on face %D!\n", bc->walls[w]);
703 
704           }
705         }
706       }
707     }
708     // Wall boundary conditions are zero energy density and zero flux for
709     //   velocity in advection/advection2d, and zero velocity and zero flux
710     //   for mass density and energy density in density_current
711     {
712       if (problem->bc == Exact_Advection || problem->bc == Exact_Advection2d) {
713         PetscInt comps[1] = {4};
714         ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", "Face Sets", 0,
715                              1, comps, (void(*)(void))problem->bc,
716                              bc->nwall, bc->walls, ctxSetup); CHKERRQ(ierr);
717       } else if (problem->bc == Exact_DC) {
718         PetscInt comps[3] = {1, 2, 3};
719         ierr = DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", "Face Sets", 0,
720                              3, comps, (void(*)(void))problem->bc,
721                              bc->nwall, bc->walls, ctxSetup); CHKERRQ(ierr);
722       } else
723         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_NULL,
724                 "Undefined boundary conditions for this problem");
725     }
726     ierr = DMPlexSetClosurePermutationTensor(dm, PETSC_DETERMINE, NULL);
727     CHKERRQ(ierr);
728     ierr = PetscFEDestroy(&fe); CHKERRQ(ierr);
729   }
730   {
731     // Empty name for conserved field (because there is only one field)
732     PetscSection section;
733     ierr = DMGetLocalSection(dm, &section); CHKERRQ(ierr);
734     ierr = PetscSectionSetFieldName(section, 0, ""); CHKERRQ(ierr);
735     ierr = PetscSectionSetComponentName(section, 0, 0, "Density");
736     CHKERRQ(ierr);
737     ierr = PetscSectionSetComponentName(section, 0, 1, "MomentumX");
738     CHKERRQ(ierr);
739     ierr = PetscSectionSetComponentName(section, 0, 2, "MomentumY");
740     CHKERRQ(ierr);
741     ierr = PetscSectionSetComponentName(section, 0, 3, "MomentumZ");
742     CHKERRQ(ierr);
743     ierr = PetscSectionSetComponentName(section, 0, 4, "EnergyDensity");
744     CHKERRQ(ierr);
745   }
746   PetscFunctionReturn(0);
747 }
748 
749 int main(int argc, char **argv) {
750   PetscInt ierr;
751   MPI_Comm comm;
752   DM dm, dmcoord, dmviz;
753   Mat interpviz;
754   TS ts;
755   TSAdapt adapt;
756   User user;
757   Units units;
758   char ceedresource[4096] = "/cpu/self";
759   PetscInt localNelemVol, lnodes, gnodes, steps;
760   const PetscInt ncompq = 5;
761   PetscMPIInt rank;
762   PetscScalar ftime;
763   Vec Q, Qloc, Xloc;
764   Ceed ceed;
765   CeedInt numP, numQ;
766   CeedVector xcorners, qdata, q0ceed;
767   CeedBasis basisx, basisxc, basisq;
768   CeedElemRestriction restrictx, restrictq, restrictqdi;
769   CeedQFunction qf_setupVol, qf_ics, qf_rhsVol, qf_ifunctionVol;
770   CeedOperator op_setupVol, op_ics;
771   CeedScalar Rd;
772   CeedMemType memtyperequested;
773   PetscScalar WpermK, Pascal, JperkgK, mpersquareds, kgpercubicm,
774               kgpersquaredms, Joulepercubicm;
775   problemType problemChoice;
776   problemData *problem = NULL;
777   StabilizationType stab;
778   testType testChoice;
779   testData *test = NULL;
780   PetscBool implicit;
781   PetscInt    viz_refine = 0;
782   struct SimpleBC_ bc = {
783     .nslip = {2, 2, 2},
784     .slips = {{5, 6}, {3, 4}, {1, 2}}
785   };
786   double start, cpu_time_used;
787   // Check PETSc CUDA support
788   PetscBool petschavecuda, setmemtyperequest = PETSC_FALSE;
789   // *INDENT-OFF*
790   #ifdef PETSC_HAVE_CUDA
791   petschavecuda = PETSC_TRUE;
792   #else
793   petschavecuda = PETSC_FALSE;
794   #endif
795   // *INDENT-ON*
796 
797   // Create the libCEED contexts
798   PetscScalar meter      = 1e-2;     // 1 meter in scaled length units
799   PetscScalar second     = 1e-2;     // 1 second in scaled time units
800   PetscScalar kilogram   = 1e-6;     // 1 kilogram in scaled mass units
801   PetscScalar Kelvin     = 1;        // 1 Kelvin in scaled temperature units
802   CeedScalar theta0      = 300.;     // K
803   CeedScalar thetaC      = -15.;     // K
804   CeedScalar P0          = 1.e5;     // Pa
805   CeedScalar N           = 0.01;     // 1/s
806   CeedScalar cv          = 717.;     // J/(kg K)
807   CeedScalar cp          = 1004.;    // J/(kg K)
808   CeedScalar g           = 9.81;     // m/s^2
809   CeedScalar lambda      = -2./3.;   // -
810   CeedScalar mu          = 75.;      // Pa s, dynamic viscosity
811   // mu = 75 is not physical for air, but is good for numerical stability
812   CeedScalar k           = 0.02638;  // W/(m K)
813   CeedScalar CtauS       = 0.;       // dimensionless
814   CeedScalar strong_form = 0.;      // [0,1]
815   PetscScalar lx         = 8000.;    // m
816   PetscScalar ly         = 8000.;    // m
817   PetscScalar lz         = 4000.;    // m
818   CeedScalar rc          = 1000.;    // m (Radius of bubble)
819   PetscScalar resx       = 1000.;    // m (resolution in x)
820   PetscScalar resy       = 1000.;    // m (resolution in y)
821   PetscScalar resz       = 1000.;    // m (resolution in z)
822   PetscInt outputfreq    = 10;       // -
823   PetscInt contsteps     = 0;        // -
824   PetscInt degree        = 1;        // -
825   PetscInt qextra        = 2;        // -
826   PetscInt qextraSur     = 2;        // -
827   PetscReal center[3], dc_axis[3] = {0, 0, 0};
828 
829   ierr = PetscInitialize(&argc, &argv, NULL, help);
830   if (ierr) return ierr;
831 
832   // Allocate PETSc context
833   ierr = PetscCalloc1(1, &user); CHKERRQ(ierr);
834   ierr = PetscMalloc1(1, &units); CHKERRQ(ierr);
835 
836   // Parse command line options
837   comm = PETSC_COMM_WORLD;
838   ierr = PetscOptionsBegin(comm, NULL, "Navier-Stokes in PETSc with libCEED",
839                            NULL); CHKERRQ(ierr);
840   ierr = PetscOptionsString("-ceed", "CEED resource specifier",
841                             NULL, ceedresource, ceedresource,
842                             sizeof(ceedresource), NULL); CHKERRQ(ierr);
843   testChoice = TEST_NONE;
844   ierr = PetscOptionsEnum("-test", "Run tests", NULL,
845                           testTypes, (PetscEnum)testChoice,
846                           (PetscEnum *)&testChoice,
847                           NULL); CHKERRQ(ierr);
848   test = &testOptions[testChoice];
849   problemChoice = NS_DENSITY_CURRENT;
850   ierr = PetscOptionsEnum("-problem", "Problem to solve", NULL,
851                           problemTypes, (PetscEnum)problemChoice,
852                           (PetscEnum *)&problemChoice, NULL); CHKERRQ(ierr);
853   problem = &problemOptions[problemChoice];
854   ierr = PetscOptionsEnum("-stab", "Stabilization method", NULL,
855                           StabilizationTypes, (PetscEnum)(stab = STAB_NONE),
856                           (PetscEnum *)&stab, NULL); CHKERRQ(ierr);
857   ierr = PetscOptionsBool("-implicit", "Use implicit (IFunction) formulation",
858                           NULL, implicit=PETSC_FALSE, &implicit, NULL);
859   CHKERRQ(ierr);
860   if (!implicit && stab != STAB_NONE) {
861     ierr = PetscPrintf(comm, "Warning! Use -stab only with -implicit\n");
862     CHKERRQ(ierr);
863   }
864   {
865     PetscInt len;
866     PetscBool flg;
867     ierr = PetscOptionsIntArray("-bc_outflow",
868                               "Use outflow boundary conditions on this list of faces",
869                               NULL, bc.outflow,
870                               (len = sizeof(bc.outflow) / sizeof(bc.outflow[0]),
871                               &len), &flg); CHKERRQ(ierr);
872     if (flg) {
873       bc.noutflow = len;
874       // Using outflow boundaries disables automatic wall/slip boundaries (they must be set explicitly)
875       bc.nwall = 0;
876       bc.nslip[0] = bc.nslip[1] = bc.nslip[2] = 0;
877     }
878     ierr = PetscOptionsIntArray("-bc_wall",
879                                 "Use wall boundary conditions on this list of faces",
880                                 NULL, bc.walls,
881                                 (len = sizeof(bc.walls) / sizeof(bc.walls[0]),
882                                  &len), &flg); CHKERRQ(ierr);
883     if (flg) {
884       bc.nwall = len;
885       // Using a no-slip wall disables automatic slip walls (they must be set explicitly)
886       bc.nslip[0] = bc.nslip[1] = bc.nslip[2] = 0;
887     }
888     for (PetscInt j=0; j<3; j++) {
889       const char *flags[3] = {"-bc_slip_x", "-bc_slip_y", "-bc_slip_z"};
890       ierr = PetscOptionsIntArray(flags[j],
891                                   "Use slip boundary conditions on this list of faces",
892                                   NULL, bc.slips[j],
893                                   (len = sizeof(bc.slips[j]) / sizeof(bc.slips[j][0]),
894                                    &len), &flg);
895       CHKERRQ(ierr);
896       if (flg) {
897         bc.nslip[j] = len;
898         bc.userbc = PETSC_TRUE;
899       }
900     }
901   }
902   ierr = PetscOptionsInt("-viz_refine",
903                          "Regular refinement levels for visualization",
904                          NULL, viz_refine, &viz_refine, NULL);
905   CHKERRQ(ierr);
906   ierr = PetscOptionsScalar("-units_meter", "1 meter in scaled length units",
907                             NULL, meter, &meter, NULL); CHKERRQ(ierr);
908   meter = fabs(meter);
909   ierr = PetscOptionsScalar("-units_second","1 second in scaled time units",
910                             NULL, second, &second, NULL); CHKERRQ(ierr);
911   second = fabs(second);
912   ierr = PetscOptionsScalar("-units_kilogram","1 kilogram in scaled mass units",
913                             NULL, kilogram, &kilogram, NULL); CHKERRQ(ierr);
914   kilogram = fabs(kilogram);
915   ierr = PetscOptionsScalar("-units_Kelvin",
916                             "1 Kelvin in scaled temperature units",
917                             NULL, Kelvin, &Kelvin, NULL); CHKERRQ(ierr);
918   Kelvin = fabs(Kelvin);
919   ierr = PetscOptionsScalar("-theta0", "Reference potential temperature",
920                             NULL, theta0, &theta0, NULL); CHKERRQ(ierr);
921   ierr = PetscOptionsScalar("-thetaC", "Perturbation of potential temperature",
922                             NULL, thetaC, &thetaC, NULL); CHKERRQ(ierr);
923   ierr = PetscOptionsScalar("-P0", "Atmospheric pressure",
924                             NULL, P0, &P0, NULL); CHKERRQ(ierr);
925   ierr = PetscOptionsScalar("-N", "Brunt-Vaisala frequency",
926                             NULL, N, &N, NULL); CHKERRQ(ierr);
927   ierr = PetscOptionsScalar("-cv", "Heat capacity at constant volume",
928                             NULL, cv, &cv, NULL); CHKERRQ(ierr);
929   ierr = PetscOptionsScalar("-cp", "Heat capacity at constant pressure",
930                             NULL, cp, &cp, NULL); CHKERRQ(ierr);
931   ierr = PetscOptionsScalar("-g", "Gravitational acceleration",
932                             NULL, g, &g, NULL); CHKERRQ(ierr);
933   ierr = PetscOptionsScalar("-lambda",
934                             "Stokes hypothesis second viscosity coefficient",
935                             NULL, lambda, &lambda, NULL); CHKERRQ(ierr);
936   ierr = PetscOptionsScalar("-mu", "Shear dynamic viscosity coefficient",
937                             NULL, mu, &mu, NULL); CHKERRQ(ierr);
938   ierr = PetscOptionsScalar("-k", "Thermal conductivity",
939                             NULL, k, &k, NULL); CHKERRQ(ierr);
940   ierr = PetscOptionsScalar("-CtauS",
941                             "Scale coefficient for tau (nondimensional)",
942                             NULL, CtauS, &CtauS, NULL); CHKERRQ(ierr);
943   if (stab == STAB_NONE && CtauS != 0) {
944     ierr = PetscPrintf(comm,
945                        "Warning! Use -CtauS only with -stab su or -stab supg\n");
946     CHKERRQ(ierr);
947   }
948   ierr = PetscOptionsScalar("-strong_form",
949                             "Strong (1) or weak/integrated by parts (0) advection residual",
950                             NULL, strong_form, &strong_form, NULL);
951   CHKERRQ(ierr);
952   if (problemChoice == NS_DENSITY_CURRENT && (CtauS != 0 || strong_form != 0)) {
953     ierr = PetscPrintf(comm,
954                        "Warning! Problem density_current does not support -CtauS or -strong_form\n");
955     CHKERRQ(ierr);
956   }
957   ierr = PetscOptionsScalar("-lx", "Length scale in x direction",
958                             NULL, lx, &lx, NULL); CHKERRQ(ierr);
959   ierr = PetscOptionsScalar("-ly", "Length scale in y direction",
960                             NULL, ly, &ly, NULL); CHKERRQ(ierr);
961   ierr = PetscOptionsScalar("-lz", "Length scale in z direction",
962                             NULL, lz, &lz, NULL); CHKERRQ(ierr);
963   ierr = PetscOptionsScalar("-rc", "Characteristic radius of thermal bubble",
964                             NULL, rc, &rc, NULL); CHKERRQ(ierr);
965   ierr = PetscOptionsScalar("-resx","Target resolution in x",
966                             NULL, resx, &resx, NULL); CHKERRQ(ierr);
967   ierr = PetscOptionsScalar("-resy","Target resolution in y",
968                             NULL, resy, &resy, NULL); CHKERRQ(ierr);
969   ierr = PetscOptionsScalar("-resz","Target resolution in z",
970                             NULL, resz, &resz, NULL); CHKERRQ(ierr);
971   PetscInt n = problem->dim;
972   center[0] = 0.5 * lx;
973   center[1] = 0.5 * ly;
974   center[2] = 0.5 * lz;
975   ierr = PetscOptionsRealArray("-center", "Location of bubble center",
976                                NULL, center, &n, NULL); CHKERRQ(ierr);
977   n = problem->dim;
978   ierr = PetscOptionsRealArray("-dc_axis",
979                                "Axis of density current cylindrical anomaly, or {0,0,0} for spherically symmetric",
980                                NULL, dc_axis, &n, NULL); CHKERRQ(ierr);
981   {
982     PetscReal norm = PetscSqrtReal(PetscSqr(dc_axis[0]) +
983                                    PetscSqr(dc_axis[1]) + PetscSqr(dc_axis[2]));
984     if (norm > 0) {
985       for (int i=0; i<3; i++) dc_axis[i] /= norm;
986     }
987   }
988   ierr = PetscOptionsInt("-output_freq",
989                          "Frequency of output, in number of steps",
990                          NULL, outputfreq, &outputfreq, NULL); CHKERRQ(ierr);
991   ierr = PetscOptionsInt("-continue", "Continue from previous solution",
992                          NULL, contsteps, &contsteps, NULL); CHKERRQ(ierr);
993   ierr = PetscOptionsInt("-degree", "Polynomial degree of finite elements",
994                          NULL, degree, &degree, NULL); CHKERRQ(ierr);
995   ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points",
996                          NULL, qextra, &qextra, NULL); CHKERRQ(ierr);
997   ierr = PetscStrncpy(user->outputfolder, ".", 2); CHKERRQ(ierr);
998   ierr = PetscOptionsString("-of", "Output folder",
999                             NULL, user->outputfolder, user->outputfolder,
1000                             sizeof(user->outputfolder), NULL); CHKERRQ(ierr);
1001   memtyperequested = petschavecuda ? CEED_MEM_DEVICE : CEED_MEM_HOST;
1002   ierr = PetscOptionsEnum("-memtype",
1003                           "CEED MemType requested", NULL,
1004                           memTypes, (PetscEnum)memtyperequested,
1005                           (PetscEnum *)&memtyperequested, &setmemtyperequest);
1006   CHKERRQ(ierr);
1007   ierr = PetscOptionsEnd(); CHKERRQ(ierr);
1008 
1009   // Define derived units
1010   Pascal = kilogram / (meter * PetscSqr(second));
1011   JperkgK =  PetscSqr(meter) / (PetscSqr(second) * Kelvin);
1012   mpersquareds = meter / PetscSqr(second);
1013   WpermK = kilogram * meter / (pow(second,3) * Kelvin);
1014   kgpercubicm = kilogram / pow(meter,3);
1015   kgpersquaredms = kilogram / (PetscSqr(meter) * second);
1016   Joulepercubicm = kilogram / (meter * PetscSqr(second));
1017 
1018   // Scale variables to desired units
1019   theta0 *= Kelvin;
1020   thetaC *= Kelvin;
1021   P0 *= Pascal;
1022   N *= (1./second);
1023   cv *= JperkgK;
1024   cp *= JperkgK;
1025   Rd = cp - cv;
1026   g *= mpersquareds;
1027   mu *= Pascal * second;
1028   k *= WpermK;
1029   lx = fabs(lx) * meter;
1030   ly = fabs(ly) * meter;
1031   lz = fabs(lz) * meter;
1032   rc = fabs(rc) * meter;
1033   resx = fabs(resx) * meter;
1034   resy = fabs(resy) * meter;
1035   resz = fabs(resz) * meter;
1036   for (int i=0; i<3; i++) center[i] *= meter;
1037 
1038   const CeedInt dim = problem->dim, ncompx = problem->dim,
1039                 qdatasizeVol = problem->qdatasizeVol;
1040   // Set up the libCEED context
1041   struct SetupContext_ ctxSetup = {
1042     .theta0 = theta0,
1043     .thetaC = thetaC,
1044     .P0 = P0,
1045     .N = N,
1046     .cv = cv,
1047     .cp = cp,
1048     .Rd = Rd,
1049     .g = g,
1050     .rc = rc,
1051     .lx = lx,
1052     .ly = ly,
1053     .lz = lz,
1054     .center[0] = center[0],
1055     .center[1] = center[1],
1056     .center[2] = center[2],
1057     .dc_axis[0] = dc_axis[0],
1058     .dc_axis[1] = dc_axis[1],
1059     .dc_axis[2] = dc_axis[2],
1060     .time = 0,
1061   };
1062 
1063   // Create the mesh
1064   {
1065     const PetscReal scale[3] = {lx, ly, lz};
1066     ierr = DMPlexCreateBoxMesh(comm, dim, PETSC_FALSE, NULL, NULL, scale,
1067                                NULL, PETSC_TRUE, &dm);
1068     CHKERRQ(ierr);
1069   }
1070 
1071   // Distribute the mesh over processes
1072   {
1073     DM               dmDist = NULL;
1074     PetscPartitioner part;
1075 
1076     ierr = DMPlexGetPartitioner(dm, &part); CHKERRQ(ierr);
1077     ierr = PetscPartitionerSetFromOptions(part); CHKERRQ(ierr);
1078     ierr = DMPlexDistribute(dm, 0, NULL, &dmDist); CHKERRQ(ierr);
1079     if (dmDist) {
1080       ierr = DMDestroy(&dm); CHKERRQ(ierr);
1081       dm  = dmDist;
1082     }
1083   }
1084   ierr = DMViewFromOptions(dm, NULL, "-dm_view"); CHKERRQ(ierr);
1085 
1086   // Setup DM
1087   ierr = DMLocalizeCoordinates(dm); CHKERRQ(ierr);
1088   ierr = DMSetFromOptions(dm); CHKERRQ(ierr);
1089   ierr = SetUpDM(dm, problem, degree, &bc, &ctxSetup); CHKERRQ(ierr);
1090 
1091   // Refine DM for high-order viz
1092   dmviz = NULL;
1093   interpviz = NULL;
1094   if (viz_refine) {
1095     DM dmhierarchy[viz_refine+1];
1096 
1097     ierr = DMPlexSetRefinementUniform(dm, PETSC_TRUE); CHKERRQ(ierr);
1098     dmhierarchy[0] = dm;
1099     for (PetscInt i = 0, d = degree; i < viz_refine; i++) {
1100       Mat interp_next;
1101 
1102       ierr = DMRefine(dmhierarchy[i], MPI_COMM_NULL, &dmhierarchy[i+1]);
1103       CHKERRQ(ierr);
1104       ierr = DMSetCoarseDM(dmhierarchy[i+1], dmhierarchy[i]); CHKERRQ(ierr);
1105       d = (d + 1) / 2;
1106       if (i + 1 == viz_refine) d = 1;
1107       ierr = SetUpDM(dmhierarchy[i+1], problem, d, &bc, &ctxSetup); CHKERRQ(ierr);
1108       ierr = DMCreateInterpolation(dmhierarchy[i], dmhierarchy[i+1],
1109                                    &interp_next, NULL); CHKERRQ(ierr);
1110       if (!i) interpviz = interp_next;
1111       else {
1112         Mat C;
1113         ierr = MatMatMult(interp_next, interpviz, MAT_INITIAL_MATRIX,
1114                           PETSC_DECIDE, &C); CHKERRQ(ierr);
1115         ierr = MatDestroy(&interp_next); CHKERRQ(ierr);
1116         ierr = MatDestroy(&interpviz); CHKERRQ(ierr);
1117         interpviz = C;
1118       }
1119     }
1120     for (PetscInt i=1; i<viz_refine; i++) {
1121       ierr = DMDestroy(&dmhierarchy[i]); CHKERRQ(ierr);
1122     }
1123     dmviz = dmhierarchy[viz_refine];
1124   }
1125   ierr = DMCreateGlobalVector(dm, &Q); CHKERRQ(ierr);
1126   ierr = DMGetLocalVector(dm, &Qloc); CHKERRQ(ierr);
1127   ierr = VecGetSize(Qloc, &lnodes); CHKERRQ(ierr);
1128   lnodes /= ncompq;
1129 
1130   // Initialize CEED
1131   CeedInit(ceedresource, &ceed);
1132   // Set memtype
1133   CeedMemType memtypebackend;
1134   CeedGetPreferredMemType(ceed, &memtypebackend);
1135   // Check memtype compatibility
1136   if (!setmemtyperequest)
1137     memtyperequested = memtypebackend;
1138   else if (!petschavecuda && memtyperequested == CEED_MEM_DEVICE)
1139     SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS,
1140              "PETSc was not built with CUDA. "
1141              "Requested MemType CEED_MEM_DEVICE is not supported.", NULL);
1142 
1143   // Set number of 1D nodes and quadrature points
1144   numP = degree + 1;
1145   numQ = numP + qextra;
1146 
1147     // Print summary
1148   if (testChoice == TEST_NONE) {
1149     CeedInt gdofs, odofs;
1150     int comm_size;
1151     char box_faces_str[PETSC_MAX_PATH_LEN] = "NONE";
1152     ierr = VecGetSize(Q, &gdofs); CHKERRQ(ierr);
1153     ierr = VecGetLocalSize(Q, &odofs); CHKERRQ(ierr);
1154     gnodes = gdofs/ncompq;
1155     ierr = MPI_Comm_size(comm, &comm_size); CHKERRQ(ierr);
1156     ierr = PetscOptionsGetString(NULL, NULL, "-dm_plex_box_faces", box_faces_str,
1157                                  sizeof(box_faces_str), NULL); CHKERRQ(ierr);
1158     const char *usedresource;
1159     CeedGetResource(ceed, &usedresource);
1160 
1161     ierr = PetscPrintf(comm,
1162                        "\n-- Navier-Stokes solver - libCEED + PETSc --\n"
1163                        "  rank(s)                              : %d\n"
1164                        "  Problem:\n"
1165                        "    Problem Name                       : %s\n"
1166                        "    Stabilization                      : %s\n"
1167                        "  PETSc:\n"
1168                        "    Box Faces                          : %s\n"
1169                        "  libCEED:\n"
1170                        "    libCEED Backend                    : %s\n"
1171                        "    libCEED Backend MemType            : %s\n"
1172                        "    libCEED User Requested MemType     : %s\n"
1173                        "  Mesh:\n"
1174                        "    Number of 1D Basis Nodes (P)       : %d\n"
1175                        "    Number of 1D Quadrature Points (Q) : %d\n"
1176                        "    Global DoFs                        : %D\n"
1177                        "    Owned DoFs                         : %D\n"
1178                        "    DoFs per node                      : %D\n"
1179                        "    Global nodes                       : %D\n"
1180                        "    Owned nodes                        : %D\n",
1181                        comm_size, problemTypes[problemChoice],
1182                        StabilizationTypes[stab], box_faces_str, usedresource,
1183                        CeedMemTypes[memtypebackend],
1184                        (setmemtyperequest) ?
1185                        CeedMemTypes[memtyperequested] : "none",
1186                        numP, numQ, gdofs, odofs, ncompq, gnodes, lnodes);
1187     CHKERRQ(ierr);
1188   }
1189 
1190   // Set up global mass vector
1191   ierr = VecDuplicate(Q, &user->M); CHKERRQ(ierr);
1192 
1193   // Set up libCEED
1194   // CEED Bases
1195   CeedInit(ceedresource, &ceed);
1196   CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompq, numP, numQ, CEED_GAUSS,
1197                                   &basisq);
1198   CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, numQ, CEED_GAUSS,
1199                                   &basisx);
1200   CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, numP,
1201                                   CEED_GAUSS_LOBATTO, &basisxc);
1202   ierr = DMGetCoordinateDM(dm, &dmcoord); CHKERRQ(ierr);
1203   ierr = DMPlexSetClosurePermutationTensor(dmcoord, PETSC_DETERMINE, NULL);
1204   CHKERRQ(ierr);
1205 
1206   // CEED Restrictions
1207   ierr = GetRestrictionForDomain(ceed, dm, ncompx, dim, 0, 0, 0, numP, numQ,
1208                                  qdatasizeVol, &restrictq, &restrictx,
1209                                  &restrictqdi); CHKERRQ(ierr);
1210 
1211   ierr = DMGetCoordinatesLocal(dm, &Xloc); CHKERRQ(ierr);
1212   ierr = CreateVectorFromPetscVec(ceed, Xloc, &xcorners); CHKERRQ(ierr);
1213 
1214   // Create the CEED vectors that will be needed in setup
1215   CeedInt NqptsVol;
1216   CeedBasisGetNumQuadraturePoints(basisq, &NqptsVol);
1217   CeedElemRestrictionGetNumElements(restrictq, &localNelemVol);
1218   CeedVectorCreate(ceed, qdatasizeVol*localNelemVol*NqptsVol, &qdata);
1219   CeedElemRestrictionCreateVector(restrictq, &q0ceed, NULL);
1220 
1221   // Create the Q-function that builds the quadrature data for the NS operator
1222   CeedQFunctionCreateInterior(ceed, 1, problem->setupVol, problem->setupVol_loc,
1223                               &qf_setupVol);
1224   CeedQFunctionAddInput(qf_setupVol, "dx", ncompx*dim, CEED_EVAL_GRAD);
1225   CeedQFunctionAddInput(qf_setupVol, "weight", 1, CEED_EVAL_WEIGHT);
1226   CeedQFunctionAddOutput(qf_setupVol, "qdata", qdatasizeVol, CEED_EVAL_NONE);
1227 
1228   // Create the Q-function that sets the ICs of the operator
1229   CeedQFunctionCreateInterior(ceed, 1, problem->ics, problem->ics_loc, &qf_ics);
1230   CeedQFunctionAddInput(qf_ics, "x", ncompx, CEED_EVAL_INTERP);
1231   CeedQFunctionAddOutput(qf_ics, "q0", ncompq, CEED_EVAL_NONE);
1232 
1233   qf_rhsVol = NULL;
1234   if (problem->applyVol_rhs) { // Create the Q-function that defines the action of the RHS operator
1235     CeedQFunctionCreateInterior(ceed, 1, problem->applyVol_rhs,
1236                                 problem->applyVol_rhs_loc, &qf_rhsVol);
1237     CeedQFunctionAddInput(qf_rhsVol, "q", ncompq, CEED_EVAL_INTERP);
1238     CeedQFunctionAddInput(qf_rhsVol, "dq", ncompq*dim, CEED_EVAL_GRAD);
1239     CeedQFunctionAddInput(qf_rhsVol, "qdata", qdatasizeVol, CEED_EVAL_NONE);
1240     CeedQFunctionAddInput(qf_rhsVol, "x", ncompx, CEED_EVAL_INTERP);
1241     CeedQFunctionAddOutput(qf_rhsVol, "v", ncompq, CEED_EVAL_INTERP);
1242     CeedQFunctionAddOutput(qf_rhsVol, "dv", ncompq*dim, CEED_EVAL_GRAD);
1243   }
1244 
1245   qf_ifunctionVol = NULL;
1246   if (problem->applyVol_ifunction) { // Create the Q-function that defines the action of the IFunction
1247     CeedQFunctionCreateInterior(ceed, 1, problem->applyVol_ifunction,
1248                                 problem->applyVol_ifunction_loc, &qf_ifunctionVol);
1249     CeedQFunctionAddInput(qf_ifunctionVol, "q", ncompq, CEED_EVAL_INTERP);
1250     CeedQFunctionAddInput(qf_ifunctionVol, "dq", ncompq*dim, CEED_EVAL_GRAD);
1251     CeedQFunctionAddInput(qf_ifunctionVol, "qdot", ncompq, CEED_EVAL_INTERP);
1252     CeedQFunctionAddInput(qf_ifunctionVol, "qdata", qdatasizeVol, CEED_EVAL_NONE);
1253     CeedQFunctionAddInput(qf_ifunctionVol, "x", ncompx, CEED_EVAL_INTERP);
1254     CeedQFunctionAddOutput(qf_ifunctionVol, "v", ncompq, CEED_EVAL_INTERP);
1255     CeedQFunctionAddOutput(qf_ifunctionVol, "dv", ncompq*dim, CEED_EVAL_GRAD);
1256   }
1257 
1258   // Create the operator that builds the quadrature data for the NS operator
1259   CeedOperatorCreate(ceed, qf_setupVol, NULL, NULL, &op_setupVol);
1260   CeedOperatorSetField(op_setupVol, "dx", restrictx, basisx, CEED_VECTOR_ACTIVE);
1261   CeedOperatorSetField(op_setupVol, "weight", CEED_ELEMRESTRICTION_NONE,
1262                        basisx, CEED_VECTOR_NONE);
1263   CeedOperatorSetField(op_setupVol, "qdata", restrictqdi,
1264                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1265 
1266   // Create the operator that sets the ICs
1267   CeedOperatorCreate(ceed, qf_ics, NULL, NULL, &op_ics);
1268   CeedOperatorSetField(op_ics, "x", restrictx, basisxc, CEED_VECTOR_ACTIVE);
1269   CeedOperatorSetField(op_ics, "q0", restrictq,
1270                        CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1271 
1272   CeedElemRestrictionCreateVector(restrictq, &user->qceed, NULL);
1273   CeedElemRestrictionCreateVector(restrictq, &user->qdotceed, NULL);
1274   CeedElemRestrictionCreateVector(restrictq, &user->gceed, NULL);
1275 
1276   if (qf_rhsVol) { // Create the RHS physics operator
1277     CeedOperator op;
1278     CeedOperatorCreate(ceed, qf_rhsVol, NULL, NULL, &op);
1279     CeedOperatorSetField(op, "q", restrictq, basisq, CEED_VECTOR_ACTIVE);
1280     CeedOperatorSetField(op, "dq", restrictq, basisq, CEED_VECTOR_ACTIVE);
1281     CeedOperatorSetField(op, "qdata", restrictqdi,
1282                          CEED_BASIS_COLLOCATED, qdata);
1283     CeedOperatorSetField(op, "x", restrictx, basisx, xcorners);
1284     CeedOperatorSetField(op, "v", restrictq, basisq, CEED_VECTOR_ACTIVE);
1285     CeedOperatorSetField(op, "dv", restrictq, basisq, CEED_VECTOR_ACTIVE);
1286     user->op_rhs_vol = op;
1287   }
1288 
1289   if (qf_ifunctionVol) { // Create the IFunction operator
1290     CeedOperator op;
1291     CeedOperatorCreate(ceed, qf_ifunctionVol, NULL, NULL, &op);
1292     CeedOperatorSetField(op, "q", restrictq, basisq, CEED_VECTOR_ACTIVE);
1293     CeedOperatorSetField(op, "dq", restrictq, basisq, CEED_VECTOR_ACTIVE);
1294     CeedOperatorSetField(op, "qdot", restrictq, basisq, user->qdotceed);
1295     CeedOperatorSetField(op, "qdata", restrictqdi,
1296                          CEED_BASIS_COLLOCATED, qdata);
1297     CeedOperatorSetField(op, "x", restrictx, basisx, xcorners);
1298     CeedOperatorSetField(op, "v", restrictq, basisq, CEED_VECTOR_ACTIVE);
1299     CeedOperatorSetField(op, "dv", restrictq, basisq, CEED_VECTOR_ACTIVE);
1300     user->op_ifunction_vol = op;
1301   }
1302 
1303   //--------------------------------------------------------------------------------------//
1304   // Outflow Boundary Condition
1305   //--------------------------------------------------------------------------------------//
1306   // Set up CEED for the boundaries
1307   CeedInt numP_Sur, numQ_Sur;
1308   CeedInt height = 1;
1309   CeedInt dimSur = dim - height;
1310   numP_Sur = degree + 1;
1311   numQ_Sur = numP_Sur + qextraSur;
1312   const CeedInt qdatasizeSur = problem->qdatasizeSur;
1313   CeedBasis basisxSur, basisxcSur, basisqSur;
1314   CeedElemRestriction restrictxSur[6], restrictqSur[6], restrictqdiSur[6];
1315   CeedInt NqptsSur;
1316   PetscInt localNelemSur[6];
1317   CeedVector qdataSur[6];
1318   CeedQFunction qf_setupSur, qf_rhsSur, qf_ifunctionSur;
1319   CeedOperator op_setupSur[6];
1320   PetscInt numOutFlow = bc.noutflow;
1321   DMLabel domainLabel;
1322 
1323   // Get Label for the boundaries
1324   ierr = DMGetLabel(dm, "Face Sets", &domainLabel); CHKERRQ(ierr);
1325 
1326   // CEED bases for the boundaries
1327   CeedBasisCreateTensorH1Lagrange(ceed, dimSur, ncompq, numP_Sur, numQ_Sur, CEED_GAUSS,
1328                                   &basisqSur);
1329   CeedBasisCreateTensorH1Lagrange(ceed, dimSur, ncompx, 2, numQ_Sur, CEED_GAUSS,
1330                                   &basisxSur);
1331   CeedBasisCreateTensorH1Lagrange(ceed, dimSur, ncompx, 2, numP_Sur,
1332                                   CEED_GAUSS_LOBATTO, &basisxcSur);
1333   CeedBasisGetNumQuadraturePoints(basisqSur, &NqptsSur);
1334 
1335   // Create the Q-function that builds the quadrature data for the Surface operator
1336   CeedQFunctionCreateInterior(ceed, 1, problem->setupSur, problem->setupSur_loc,
1337                               &qf_setupSur);
1338   CeedQFunctionAddInput(qf_setupSur, "dx", ncompx*dimSur, CEED_EVAL_GRAD);
1339   CeedQFunctionAddInput(qf_setupSur, "weight", 1, CEED_EVAL_WEIGHT);
1340   CeedQFunctionAddOutput(qf_setupSur, "qdataSur", qdatasizeSur, CEED_EVAL_NONE);
1341 
1342   qf_rhsSur = NULL;
1343   if (problem->applySur_rhs) { // Create the Q-function that defines the action of the RHS operator on the Surface
1344     CeedQFunctionCreateInterior(ceed, 1, problem->applySur_rhs,
1345                                 problem->applySur_rhs_loc, &qf_rhsSur);
1346     CeedQFunctionAddInput(qf_rhsSur, "q", ncompq, CEED_EVAL_INTERP);
1347     CeedQFunctionAddInput(qf_rhsSur, "qdataSur", qdatasizeSur, CEED_EVAL_NONE);
1348     CeedQFunctionAddInput(qf_rhsSur, "x", ncompx, CEED_EVAL_INTERP);
1349     CeedQFunctionAddOutput(qf_rhsSur, "v", ncompq, CEED_EVAL_INTERP);
1350   }
1351   qf_ifunctionSur = NULL;
1352   if (problem->applySur_ifunction) { // Create the Q-function that defines the action of the IFunction
1353     CeedQFunctionCreateInterior(ceed, 1, problem->applySur_ifunction,
1354                                 problem->applySur_ifunction_loc, &qf_ifunctionSur);
1355     CeedQFunctionAddInput(qf_ifunctionSur, "q", ncompq, CEED_EVAL_INTERP);
1356     CeedQFunctionAddInput(qf_ifunctionSur, "qdataSur", qdatasizeSur, CEED_EVAL_NONE);
1357     CeedQFunctionAddInput(qf_ifunctionSur, "x", ncompx, CEED_EVAL_INTERP);
1358     CeedQFunctionAddOutput(qf_ifunctionSur, "v", ncompq, CEED_EVAL_INTERP);
1359   }
1360   // Create CEED Operator for each face
1361   for(CeedInt i=0; i<numOutFlow; i++){
1362     ierr = GetRestrictionForDomain(ceed, dm, ncompx, dimSur, height, domainLabel, bc.outflow[i], numP_Sur,
1363                                    numQ_Sur, qdatasizeSur, &restrictqSur[i], &restrictxSur[i],
1364                                    &restrictqdiSur[i]); CHKERRQ(ierr);
1365     // Create the CEED vectors that will be needed in setup
1366     CeedElemRestrictionGetNumElements(restrictqSur[i], &localNelemSur[i]);
1367     CeedVectorCreate(ceed, qdatasizeSur*localNelemSur[i]*NqptsSur, &qdataSur[i]);
1368 
1369     // Create the operator that builds the quadrature data for the NS operator
1370     CeedOperatorCreate(ceed, qf_setupSur, NULL, NULL, &op_setupSur[i]);
1371     CeedOperatorSetField(op_setupSur[i], "dx", restrictxSur[i], basisxSur, CEED_VECTOR_ACTIVE);
1372     CeedOperatorSetField(op_setupSur[i], "weight", CEED_ELEMRESTRICTION_NONE,
1373                          basisxSur, CEED_VECTOR_NONE);
1374     CeedOperatorSetField(op_setupSur[i], "qdataSur", restrictqdiSur[i],
1375                          CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE);
1376 
1377     if (qf_rhsSur) { // Create the RHS physics operator
1378       CeedOperator op;
1379       CeedOperatorCreate(ceed, qf_rhsSur, NULL, NULL, &op);
1380       CeedOperatorSetField(op, "q", restrictqSur[i], basisqSur, CEED_VECTOR_ACTIVE);
1381       CeedOperatorSetField(op, "qdataSur", restrictqdiSur[i],
1382                            CEED_BASIS_COLLOCATED, qdataSur[i]);
1383       CeedOperatorSetField(op, "x", restrictxSur[i], basisxSur, xcorners);
1384       CeedOperatorSetField(op, "v", restrictqSur[i], basisqSur, CEED_VECTOR_ACTIVE);
1385       user->op_rhs_sur[i] = op;
1386     }
1387 
1388     if (qf_ifunctionSur) { // Create the IFunction operator
1389       CeedOperator op;
1390       CeedOperatorCreate(ceed, qf_ifunctionSur, NULL, NULL, &op);
1391       CeedOperatorSetField(op, "q", restrictqSur[i], basisqSur, CEED_VECTOR_ACTIVE);
1392       CeedOperatorSetField(op, "qdataSur", restrictqdiSur[i],
1393                            CEED_BASIS_COLLOCATED, qdataSur[i]);
1394       CeedOperatorSetField(op, "x", restrictxSur[i], basisxSur, xcorners);
1395       CeedOperatorSetField(op, "v", restrictqSur[i], basisqSur, CEED_VECTOR_ACTIVE);
1396       user->op_ifunction_sur[i] = op;
1397     }
1398   }
1399   // Composite Operaters
1400   // IFunction
1401   if (user->op_ifunction_vol) {
1402     if (numOutFlow) {
1403       // Composite Operators for the IFunction
1404       CeedCompositeOperatorCreate(ceed, &user->op_ifunction);
1405       CeedCompositeOperatorAddSub(user->op_ifunction, user->op_ifunction_vol);
1406       for(CeedInt i=0; i<numOutFlow; i++){
1407         CeedCompositeOperatorAddSub(user->op_ifunction, user->op_ifunction_sur[i]);
1408       }
1409     } else {
1410       user->op_ifunction = user->op_ifunction_vol;
1411     }
1412   }
1413   // RHS
1414   if (user->op_rhs_vol) {
1415     if (numOutFlow) {
1416       // Composite Operators for the RHS
1417       CeedCompositeOperatorCreate(ceed, &user->op_rhs);
1418       CeedCompositeOperatorAddSub(user->op_rhs, user->op_rhs_vol);
1419       for(CeedInt i=0; i<numOutFlow; i++){
1420         CeedCompositeOperatorAddSub(user->op_rhs, user->op_rhs_sur[i]);
1421       }
1422     } else {
1423       user->op_rhs = user->op_rhs_vol;
1424     }
1425   }
1426   //--------------------------------------------------------------------------------------//
1427   CeedQFunctionSetContext(qf_ics, &ctxSetup, sizeof ctxSetup);
1428   CeedScalar ctxNS[8] = {lambda, mu, k, cv, cp, g, Rd};
1429   struct Advection2dContext_ ctxAdvection2d = {
1430     .CtauS = CtauS,
1431     .strong_form = strong_form,
1432     .stabilization = stab,
1433   };
1434   switch (problemChoice) {
1435   case NS_DENSITY_CURRENT:
1436     if (qf_rhsVol) CeedQFunctionSetContext(qf_rhsVol, &ctxNS, sizeof ctxNS);
1437     if (qf_ifunctionVol) CeedQFunctionSetContext(qf_ifunctionVol, &ctxNS,
1438           sizeof ctxNS);
1439     if (qf_rhsSur) CeedQFunctionSetContext(qf_rhsSur, &ctxNS, sizeof ctxNS);
1440     if (qf_ifunctionSur) CeedQFunctionSetContext(qf_ifunctionSur, &ctxNS,
1441           sizeof ctxNS);
1442     break;
1443   case NS_ADVECTION:
1444   case NS_ADVECTION2D:
1445     if (qf_rhsVol) CeedQFunctionSetContext(qf_rhsVol, &ctxAdvection2d,
1446                                           sizeof ctxAdvection2d);
1447     if (qf_ifunctionVol) CeedQFunctionSetContext(qf_ifunctionVol, &ctxAdvection2d,
1448           sizeof ctxAdvection2d);
1449     if (qf_rhsSur) CeedQFunctionSetContext(qf_rhsSur, &ctxAdvection2d,
1450                                           sizeof ctxAdvection2d);
1451     if (qf_ifunctionSur) CeedQFunctionSetContext(qf_ifunctionSur, &ctxAdvection2d,
1452           sizeof ctxAdvection2d);
1453   }
1454 
1455   // Set up PETSc context
1456   // Set up units structure
1457   units->meter = meter;
1458   units->kilogram = kilogram;
1459   units->second = second;
1460   units->Kelvin = Kelvin;
1461   units->Pascal = Pascal;
1462   units->JperkgK = JperkgK;
1463   units->mpersquareds = mpersquareds;
1464   units->WpermK = WpermK;
1465   units->kgpercubicm = kgpercubicm;
1466   units->kgpersquaredms = kgpersquaredms;
1467   units->Joulepercubicm = Joulepercubicm;
1468 
1469   // Set up user structure
1470   user->comm = comm;
1471   user->outputfreq = outputfreq;
1472   user->contsteps = contsteps;
1473   user->units = units;
1474   user->dm = dm;
1475   user->dmviz = dmviz;
1476   user->interpviz = interpviz;
1477   user->ceed = ceed;
1478 
1479   // Calculate qdata and ICs
1480   // Set up state global and local vectors
1481   ierr = VecZeroEntries(Q); CHKERRQ(ierr);
1482 
1483   ierr = VectorPlacePetscVec(q0ceed, Qloc); CHKERRQ(ierr);
1484 
1485   // Apply Setup Ceed Operators
1486   ierr = VectorPlacePetscVec(xcorners, Xloc); CHKERRQ(ierr);
1487   CeedOperatorApply(op_setupVol, xcorners, qdata, CEED_REQUEST_IMMEDIATE);
1488   ierr = ComputeLumpedMassMatrix(ceed, dm, restrictq, basisq, restrictqdi, qdata,
1489                                  user->M); CHKERRQ(ierr);
1490 
1491   ierr = ICs_FixMultiplicity(op_ics, xcorners, q0ceed, dm, Qloc, Q, restrictq,
1492                              &ctxSetup, 0.0); CHKERRQ(ierr);
1493   if (1) { // Record boundary values from initial condition and override DMPlexInsertBoundaryValues()
1494     // We use this for the main simulation DM because the reference DMPlexInsertBoundaryValues() is very slow.  If we
1495     // disable this, we should still get the same results due to the problem->bc function, but with potentially much
1496     // slower execution.
1497     Vec Qbc;
1498     ierr = DMGetNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr);
1499     ierr = VecCopy(Qloc, Qbc); CHKERRQ(ierr);
1500     ierr = VecZeroEntries(Qloc); CHKERRQ(ierr);
1501     ierr = DMGlobalToLocal(dm, Q, INSERT_VALUES, Qloc); CHKERRQ(ierr);
1502     ierr = VecAXPY(Qbc, -1., Qloc); CHKERRQ(ierr);
1503     ierr = DMRestoreNamedLocalVector(dm, "Qbc", &Qbc); CHKERRQ(ierr);
1504     ierr = PetscObjectComposeFunction((PetscObject)dm,
1505                                       "DMPlexInsertBoundaryValues_C", DMPlexInsertBoundaryValues_NS);
1506     CHKERRQ(ierr);
1507   }
1508 
1509   MPI_Comm_rank(comm, &rank);
1510   if (!rank) {ierr = PetscMkdir(user->outputfolder); CHKERRQ(ierr);}
1511   // Gather initial Q values
1512   // In case of continuation of simulation, set up initial values from binary file
1513   if (contsteps) { // continue from existent solution
1514     PetscViewer viewer;
1515     char filepath[PETSC_MAX_PATH_LEN];
1516     // Read input
1517     ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-solution.bin",
1518                          user->outputfolder);
1519     CHKERRQ(ierr);
1520     ierr = PetscViewerBinaryOpen(comm, filepath, FILE_MODE_READ, &viewer);
1521     CHKERRQ(ierr);
1522     ierr = VecLoad(Q, viewer); CHKERRQ(ierr);
1523     ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
1524   }
1525   ierr = DMRestoreLocalVector(dm, &Qloc); CHKERRQ(ierr);
1526 
1527 // Create and setup TS
1528   ierr = TSCreate(comm, &ts); CHKERRQ(ierr);
1529   ierr = TSSetDM(ts, dm); CHKERRQ(ierr);
1530   if (implicit) {
1531     ierr = TSSetType(ts, TSBDF); CHKERRQ(ierr);
1532     if (user->op_ifunction) {
1533       ierr = TSSetIFunction(ts, NULL, IFunction_NS, &user); CHKERRQ(ierr);
1534     } else {                    // Implicit integrators can fall back to using an RHSFunction
1535       ierr = TSSetRHSFunction(ts, NULL, RHS_NS, &user); CHKERRQ(ierr);
1536     }
1537   } else {
1538     if (!user->op_rhs) SETERRQ(comm,PETSC_ERR_ARG_NULL,
1539                                  "Problem does not provide RHSFunction");
1540     ierr = TSSetType(ts, TSRK); CHKERRQ(ierr);
1541     ierr = TSRKSetType(ts, TSRK5F); CHKERRQ(ierr);
1542     ierr = TSSetRHSFunction(ts, NULL, RHS_NS, &user); CHKERRQ(ierr);
1543   }
1544   ierr = TSSetMaxTime(ts, 500. * units->second); CHKERRQ(ierr);
1545   ierr = TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER); CHKERRQ(ierr);
1546   ierr = TSSetTimeStep(ts, 1.e-2 * units->second); CHKERRQ(ierr);
1547   if (testChoice != TEST_NONE) {ierr = TSSetMaxSteps(ts, 10); CHKERRQ(ierr);}
1548   ierr = TSGetAdapt(ts, &adapt); CHKERRQ(ierr);
1549   ierr = TSAdaptSetStepLimits(adapt,
1550                               1.e-12 * units->second,
1551                               1.e2 * units->second); CHKERRQ(ierr);
1552   ierr = TSSetFromOptions(ts); CHKERRQ(ierr);
1553   if (!contsteps) { // print initial condition
1554     if (testChoice == TEST_NONE) {
1555       ierr = TSMonitor_NS(ts, 0, 0., Q, user); CHKERRQ(ierr);
1556     }
1557   } else { // continue from time of last output
1558     PetscReal time;
1559     PetscInt count;
1560     PetscViewer viewer;
1561     char filepath[PETSC_MAX_PATH_LEN];
1562     ierr = PetscSNPrintf(filepath, sizeof filepath, "%s/ns-time.bin",
1563                          user->outputfolder); CHKERRQ(ierr);
1564     ierr = PetscViewerBinaryOpen(comm, filepath, FILE_MODE_READ, &viewer);
1565     CHKERRQ(ierr);
1566     ierr = PetscViewerBinaryRead(viewer, &time, 1, &count, PETSC_REAL);
1567     CHKERRQ(ierr);
1568     ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
1569     ierr = TSSetTime(ts, time * user->units->second); CHKERRQ(ierr);
1570   }
1571   if (testChoice == TEST_NONE) {
1572     ierr = TSMonitorSet(ts, TSMonitor_NS, user, NULL); CHKERRQ(ierr);
1573   }
1574 
1575   // Solve
1576   start = MPI_Wtime();
1577   ierr = PetscBarrier((PetscObject)ts); CHKERRQ(ierr);
1578   ierr = TSSolve(ts, Q); CHKERRQ(ierr);
1579   cpu_time_used = MPI_Wtime() - start;
1580   ierr = TSGetSolveTime(ts, &ftime); CHKERRQ(ierr);
1581   ierr = MPI_Allreduce(MPI_IN_PLACE, &cpu_time_used, 1, MPI_DOUBLE, MPI_MIN,
1582                        comm); CHKERRQ(ierr);
1583   if (testChoice == TEST_NONE) {
1584     ierr = PetscPrintf(PETSC_COMM_WORLD,
1585                        "Time taken for solution (sec): %g\n",
1586                        (double)cpu_time_used); CHKERRQ(ierr);
1587   }
1588 
1589   // Get error
1590   if (problem->non_zero_time && testChoice == TEST_NONE) {
1591     Vec Qexact, Qexactloc;
1592     PetscReal norm;
1593     ierr = DMCreateGlobalVector(dm, &Qexact); CHKERRQ(ierr);
1594     ierr = DMGetLocalVector(dm, &Qexactloc); CHKERRQ(ierr);
1595     ierr = VecGetSize(Qexactloc, &lnodes); CHKERRQ(ierr);
1596 
1597     ierr = ICs_FixMultiplicity(op_ics, xcorners, q0ceed, dm, Qexactloc, Qexact,
1598                                restrictq, &ctxSetup, ftime); CHKERRQ(ierr);
1599 
1600     ierr = VecAXPY(Q, -1.0, Qexact);  CHKERRQ(ierr);
1601     ierr = VecNorm(Q, NORM_MAX, &norm); CHKERRQ(ierr);
1602     CeedVectorDestroy(&q0ceed);
1603     ierr = PetscPrintf(PETSC_COMM_WORLD,
1604                        "Max Error: %g\n",
1605                        (double)norm); CHKERRQ(ierr);
1606     // Clean up vectors
1607     ierr = DMRestoreLocalVector(dm, &Qexactloc); CHKERRQ(ierr);
1608     ierr = VecDestroy(&Qexact); CHKERRQ(ierr);
1609   }
1610 
1611   // Output Statistics
1612   ierr = TSGetStepNumber(ts,&steps); CHKERRQ(ierr);
1613   if (testChoice == TEST_NONE) {
1614     ierr = PetscPrintf(PETSC_COMM_WORLD,
1615                        "Time integrator took %D time steps to reach final time %g\n",
1616                        steps, (double)ftime); CHKERRQ(ierr);
1617   }
1618   // Output numerical values from command line
1619   ierr = VecViewFromOptions(Q, NULL, "-vec_view"); CHKERRQ(ierr);
1620 
1621   // compare reference solution values with current run
1622   if (testChoice != TEST_NONE) {
1623     PetscViewer viewer;
1624     // Read reference file
1625     Vec Qref;
1626     PetscReal error, Qrefnorm;
1627     ierr = VecDuplicate(Q, &Qref); CHKERRQ(ierr);
1628     ierr = PetscViewerBinaryOpen(comm, test->filepath, FILE_MODE_READ, &viewer);
1629     CHKERRQ(ierr);
1630     ierr = VecLoad(Qref, viewer); CHKERRQ(ierr);
1631     ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
1632 
1633     // Compute error with respect to reference solution
1634     ierr = VecAXPY(Q, -1.0, Qref);  CHKERRQ(ierr);
1635     ierr = VecNorm(Qref, NORM_MAX, &Qrefnorm); CHKERRQ(ierr);
1636     ierr = VecScale(Q, 1./Qrefnorm); CHKERRQ(ierr);
1637     ierr = VecNorm(Q, NORM_MAX, &error); CHKERRQ(ierr);
1638     ierr = VecDestroy(&Qref); CHKERRQ(ierr);
1639     // Check error
1640     if (error > test->testtol) {
1641       ierr = PetscPrintf(PETSC_COMM_WORLD,
1642                          "Test failed with error norm %g\n",
1643                          (double)error); CHKERRQ(ierr);
1644     }
1645     ierr = PetscViewerDestroy(&viewer); CHKERRQ(ierr);
1646   }
1647 
1648   // Clean up libCEED
1649   CeedVectorDestroy(&qdata);
1650   CeedVectorDestroy(&user->qceed);
1651   CeedVectorDestroy(&user->qdotceed);
1652   CeedVectorDestroy(&user->gceed);
1653   CeedVectorDestroy(&xcorners);
1654   CeedBasisDestroy(&basisq);
1655   CeedBasisDestroy(&basisx);
1656   CeedBasisDestroy(&basisxc);
1657   CeedElemRestrictionDestroy(&restrictq);
1658   CeedElemRestrictionDestroy(&restrictx);
1659   CeedElemRestrictionDestroy(&restrictqdi);
1660   CeedQFunctionDestroy(&qf_setupVol);
1661   CeedQFunctionDestroy(&qf_ics);
1662   CeedQFunctionDestroy(&qf_rhsVol);
1663   CeedQFunctionDestroy(&qf_ifunctionVol);
1664   CeedOperatorDestroy(&op_setupVol);
1665   CeedOperatorDestroy(&op_ics);
1666   CeedOperatorDestroy(&user->op_rhs_vol);
1667   CeedOperatorDestroy(&user->op_ifunction_vol);
1668   CeedDestroy(&ceed);
1669   CeedBasisDestroy(&basisqSur);
1670   CeedBasisDestroy(&basisxSur);
1671   CeedBasisDestroy(&basisxcSur);
1672   CeedQFunctionDestroy(&qf_setupSur);
1673   CeedQFunctionDestroy(&qf_rhsSur);
1674   CeedQFunctionDestroy(&qf_ifunctionSur);
1675   for(CeedInt i=0; i<numOutFlow; i++){
1676     CeedVectorDestroy(&qdataSur[i]);
1677     CeedElemRestrictionDestroy(&restrictqSur[i]);
1678     CeedElemRestrictionDestroy(&restrictxSur[i]);
1679     CeedElemRestrictionDestroy(&restrictqdiSur[i]);
1680     CeedOperatorDestroy(&op_setupSur[i]);
1681     CeedOperatorDestroy(&user->op_rhs_sur[i]);
1682     CeedOperatorDestroy(&user->op_ifunction_sur[i]);
1683   }
1684   CeedOperatorDestroy(&user->op_rhs);
1685   CeedOperatorDestroy(&user->op_ifunction);
1686 
1687   // Clean up PETSc
1688   ierr = VecDestroy(&Q); CHKERRQ(ierr);
1689   ierr = VecDestroy(&user->M); CHKERRQ(ierr);
1690   ierr = MatDestroy(&interpviz); CHKERRQ(ierr);
1691   ierr = DMDestroy(&dmviz); CHKERRQ(ierr);
1692   ierr = TSDestroy(&ts); CHKERRQ(ierr);
1693   ierr = DMDestroy(&dm); CHKERRQ(ierr);
1694   ierr = PetscFree(units); CHKERRQ(ierr);
1695   ierr = PetscFree(user); CHKERRQ(ierr);
1696   return PetscFinalize();
1697 }
1698 
1699