static char help[] = "Transient nonlinear driven cavity in 2d.\n\
  \n\
The 2D driven cavity problem is solved in a velocity-vorticity formulation.\n\
The flow can be driven with the lid or with buoyancy or both:\n\
  -lidvelocity <lid>, where <lid> = dimensionless velocity of lid\n\
  -grashof <gr>, where <gr> = dimensionless temperature gradent\n\
  -prandtl <pr>, where <pr> = dimensionless thermal/momentum diffusity ratio\n\
  -contours : draw contour plots of solution\n\n";
/*
      See src/snes/tutorials/ex19.c for the steady-state version.

      There used to be a SNES example (src/snes/tutorials/ex27.c) that
      implemented this algorithm without using TS and was used for the numerical
      results in the paper

        Todd S. Coffey and C. T. Kelley and David E. Keyes, Pseudotransient
        Continuation and Differential-Algebraic Equations, 2003.

      That example was removed because it used obsolete interfaces, but the
      algorithms from the paper can be reproduced using this example.

      Note: The paper describes the algorithm as being linearly implicit but the
      numerical results were created using nonlinearly implicit Euler.  The
      algorithm as described (linearly implicit) is more efficient and is the
      default when using TSPSEUDO.  If you want to reproduce the numerical
      results from the paper, you'll have to change the SNES to converge the
      nonlinear solve (e.g., -snes_type newtonls).  The DAE versus ODE variants
      are controlled using the -parabolic option.

      Comment preserved from snes/tutorials/ex27.c, since removed:

        [H]owever Figure 3.1 was generated with a slightly different algorithm
        (see targets runex27 and runex27_p) than described in the paper.  In
        particular, the described algorithm is linearly implicit, advancing to
        the next step after one Newton step, so that the steady state residual
        is always used, but the figure was generated by converging each step to
        a relative tolerance of 1.e-3.  On the example problem, setting
        -snes_type ksponly has only minor impact on number of steps, but
        significantly reduces the required number of linear solves.

      See also https://lists.mcs.anl.gov/pipermail/petsc-dev/2010-March/002362.html
*/

/* ------------------------------------------------------------------------

    We thank David E. Keyes for contributing the driven cavity discretization
    within this example code.

    This problem is modeled by the partial differential equation system

        - Lap(U) - Grad_y(Omega) = 0
        - Lap(V) + Grad_x(Omega) = 0
        Omega_t - Lap(Omega) + Div([U*Omega,V*Omega]) - GR*Grad_x(T) = 0
        T_t - Lap(T) + PR*Div([U*T,V*T]) = 0

    in the unit square, which is uniformly discretized in each of x and
    y in this simple encoding.

    No-slip, rigid-wall Dirichlet conditions are used for [U,V].
    Dirichlet conditions are used for Omega, based on the definition of
    vorticity: Omega = - Grad_y(U) + Grad_x(V), where along each
    constant coordinate boundary, the tangential derivative is zero.
    Dirichlet conditions are used for T on the left and right walls,
    and insulation homogeneous Neumann conditions are used for T on
    the top and bottom walls.

    A finite difference approximation with the usual 5-point stencil
    is used to discretize the boundary value problem to obtain a
    nonlinear system of equations.  Upwinding is used for the divergence
    (convective) terms and central for the gradient (source) terms.

    The Jacobian can be either
      * formed via finite differencing using coloring (the default), or
      * applied matrix-free via the option -snes_mf
        (for larger grid problems this variant may not converge
        without a preconditioner due to ill-conditioning).

  ------------------------------------------------------------------------- */

/*
   Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
   Include "petscts.h" so that we can use TS solvers.  Note that this
   file automatically includes:
     petscsys.h       - base PETSc routines   petscvec.h - vectors
     petscmat.h - matrices
     petscis.h     - index sets            petscksp.h - Krylov subspace methods
     petscviewer.h - viewers               petscpc.h  - preconditioners
     petscksp.h   - linear solvers         petscsnes.h - nonlinear solvers
*/
#include <petscts.h>
#include <petscdm.h>
#include <petscdmda.h>

/*
   User-defined routines and data structures
*/
typedef struct {
  PetscScalar u, v, omega, temp;
} Field;

PetscErrorCode FormIFunctionLocal(DMDALocalInfo *, PetscReal, Field **, Field **, Field **, void *);

typedef struct {
  PetscReal lidvelocity, prandtl, grashof; /* physical parameters */
  PetscBool parabolic;                     /* allow a transient term corresponding roughly to artificial compressibility */
  PetscReal cfl_initial;                   /* CFL for first time step */
} AppCtx;

PetscErrorCode FormInitialSolution(TS, Vec, AppCtx *);

int main(int argc, char **argv)
{
  AppCtx            user; /* user-defined work context */
  PetscInt          mx, my, steps;
  TS                ts;
  DM                da;
  Vec               X;
  PetscReal         ftime;
  TSConvergedReason reason;

  PetscFunctionBeginUser;
  PetscCall(PetscInitialize(&argc, &argv, NULL, help));
  PetscCall(TSCreate(PETSC_COMM_WORLD, &ts));
  PetscCall(DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_STAR, 4, 4, PETSC_DECIDE, PETSC_DECIDE, 4, 1, 0, 0, &da));
  PetscCall(DMSetFromOptions(da));
  PetscCall(DMSetUp(da));
  PetscCall(TSSetDM(ts, (DM)da));

  PetscCall(DMDAGetInfo(da, 0, &mx, &my, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
  /*
     Problem parameters (velocity of lid, prandtl, and grashof numbers)
  */
  user.lidvelocity = 1.0 / (mx * my);
  user.prandtl     = 1.0;
  user.grashof     = 1.0;
  user.parabolic   = PETSC_FALSE;
  user.cfl_initial = 50.;

  PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Driven cavity/natural convection options", "");
  PetscCall(PetscOptionsReal("-lidvelocity", "Lid velocity, related to Reynolds number", "", user.lidvelocity, &user.lidvelocity, NULL));
  PetscCall(PetscOptionsReal("-prandtl", "Ratio of viscous to thermal diffusivity", "", user.prandtl, &user.prandtl, NULL));
  PetscCall(PetscOptionsReal("-grashof", "Ratio of buoyant to viscous forces", "", user.grashof, &user.grashof, NULL));
  PetscCall(PetscOptionsBool("-parabolic", "Relax incompressibility to make the system parabolic instead of differential-algebraic", "", user.parabolic, &user.parabolic, NULL));
  PetscCall(PetscOptionsReal("-cfl_initial", "Advective CFL for the first time step", "", user.cfl_initial, &user.cfl_initial, NULL));
  PetscOptionsEnd();

  PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
  PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
  PetscCall(DMDASetFieldName(da, 2, "Omega"));
  PetscCall(DMDASetFieldName(da, 3, "temperature"));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Create user context, set problem data, create vector data structures.
     Also, compute the initial guess.
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Create time integration context
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(DMSetApplicationContext(da, &user));
  PetscCall(DMDATSSetIFunctionLocal(da, INSERT_VALUES, (DMDATSIFunctionLocalFn *)FormIFunctionLocal, &user));
  PetscCall(TSSetMaxSteps(ts, 10000));
  PetscCall(TSSetMaxTime(ts, 1e12));
  PetscCall(TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER));
  PetscCall(TSSetTimeStep(ts, user.cfl_initial / (user.lidvelocity * mx)));
  PetscCall(TSSetFromOptions(ts));

  PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%" PetscInt_FMT "x%" PetscInt_FMT " grid, lid velocity = %g, prandtl # = %g, grashof # = %g\n", mx, my, (double)user.lidvelocity, (double)user.prandtl, (double)user.grashof));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Solve the nonlinear system
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

  PetscCall(DMCreateGlobalVector(da, &X));
  PetscCall(FormInitialSolution(ts, X, &user));

  PetscCall(TSSolve(ts, X));
  PetscCall(TSGetSolveTime(ts, &ftime));
  PetscCall(TSGetStepNumber(ts, &steps));
  PetscCall(TSGetConvergedReason(ts, &reason));

  PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%s at time %g after %" PetscInt_FMT " steps\n", TSConvergedReasons[reason], (double)ftime, steps));

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Free work space.  All PETSc objects should be destroyed when they
     are no longer needed.
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  PetscCall(VecDestroy(&X));
  PetscCall(DMDestroy(&da));
  PetscCall(TSDestroy(&ts));

  PetscCall(PetscFinalize());
  return 0;
}

/* ------------------------------------------------------------------- */

/*
   FormInitialSolution - Forms initial approximation.

   Input Parameters:
   user - user-defined application context
   X - vector

   Output Parameter:
   X - vector
 */
PetscErrorCode FormInitialSolution(TS ts, Vec X, AppCtx *user)
{
  DM        da;
  PetscInt  i, j, mx, xs, ys, xm, ym;
  PetscReal grashof, dx;
  Field   **x;

  PetscFunctionBeginUser;
  grashof = user->grashof;
  PetscCall(TSGetDM(ts, &da));
  PetscCall(DMDAGetInfo(da, 0, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
  dx = 1.0 / (mx - 1);

  /*
     Get local grid boundaries (for 2-dimensional DMDA):
       xs, ys   - starting grid indices (no ghost points)
       xm, ym   - widths of local grid (no ghost points)
  */
  PetscCall(DMDAGetCorners(da, &xs, &ys, NULL, &xm, &ym, NULL));

  /*
     Get a pointer to vector data.
       - For default PETSc vectors, VecGetArray() returns a pointer to
         the data array.  Otherwise, the routine is implementation dependent.
       - You MUST call VecRestoreArray() when you no longer need access to
         the array.
  */
  PetscCall(DMDAVecGetArray(da, X, &x));

  /*
     Compute initial guess over the locally owned part of the grid
     Initial condition is motionless fluid and equilibrium temperature
  */
  for (j = ys; j < ys + ym; j++) {
    for (i = xs; i < xs + xm; i++) {
      x[j][i].u     = 0.0;
      x[j][i].v     = 0.0;
      x[j][i].omega = 0.0;
      x[j][i].temp  = (grashof > 0) * i * dx;
    }
  }

  /*
     Restore vector
  */
  PetscCall(DMDAVecRestoreArray(da, X, &x));
  PetscFunctionReturn(PETSC_SUCCESS);
}

PetscErrorCode FormIFunctionLocal(DMDALocalInfo *info, PetscReal ptime, Field **x, Field **xdot, Field **f, void *ptr)
{
  AppCtx     *user = (AppCtx *)ptr;
  PetscInt    xints, xinte, yints, yinte, i, j;
  PetscReal   hx, hy, dhx, dhy, hxdhy, hydhx;
  PetscReal   grashof, prandtl, lid;
  PetscScalar u, udot, uxx, uyy, vx, vy, avx, avy, vxp, vxm, vyp, vym;

  PetscFunctionBeginUser;
  grashof = user->grashof;
  prandtl = user->prandtl;
  lid     = user->lidvelocity;

  /*
     Define mesh intervals ratios for uniform grid.

     Note: FD formulae below are normalized by multiplying through by
     local volume element (i.e. hx*hy) to obtain coefficients O(1) in two dimensions.

  */
  dhx   = (PetscReal)(info->mx - 1);
  dhy   = (PetscReal)(info->my - 1);
  hx    = 1.0 / dhx;
  hy    = 1.0 / dhy;
  hxdhy = hx * dhy;
  hydhx = hy * dhx;

  xints = info->xs;
  xinte = info->xs + info->xm;
  yints = info->ys;
  yinte = info->ys + info->ym;

  /* Test whether we are on the bottom edge of the global array */
  if (yints == 0) {
    j     = 0;
    yints = yints + 1;
    /* bottom edge */
    for (i = info->xs; i < info->xs + info->xm; i++) {
      f[j][i].u     = x[j][i].u;
      f[j][i].v     = x[j][i].v;
      f[j][i].omega = x[j][i].omega + (x[j + 1][i].u - x[j][i].u) * dhy;
      f[j][i].temp  = x[j][i].temp - x[j + 1][i].temp;
    }
  }

  /* Test whether we are on the top edge of the global array */
  if (yinte == info->my) {
    j     = info->my - 1;
    yinte = yinte - 1;
    /* top edge */
    for (i = info->xs; i < info->xs + info->xm; i++) {
      f[j][i].u     = x[j][i].u - lid;
      f[j][i].v     = x[j][i].v;
      f[j][i].omega = x[j][i].omega + (x[j][i].u - x[j - 1][i].u) * dhy;
      f[j][i].temp  = x[j][i].temp - x[j - 1][i].temp;
    }
  }

  /* Test whether we are on the left edge of the global array */
  if (xints == 0) {
    i     = 0;
    xints = xints + 1;
    /* left edge */
    for (j = info->ys; j < info->ys + info->ym; j++) {
      f[j][i].u     = x[j][i].u;
      f[j][i].v     = x[j][i].v;
      f[j][i].omega = x[j][i].omega - (x[j][i + 1].v - x[j][i].v) * dhx;
      f[j][i].temp  = x[j][i].temp;
    }
  }

  /* Test whether we are on the right edge of the global array */
  if (xinte == info->mx) {
    i     = info->mx - 1;
    xinte = xinte - 1;
    /* right edge */
    for (j = info->ys; j < info->ys + info->ym; j++) {
      f[j][i].u     = x[j][i].u;
      f[j][i].v     = x[j][i].v;
      f[j][i].omega = x[j][i].omega - (x[j][i].v - x[j][i - 1].v) * dhx;
      f[j][i].temp  = x[j][i].temp - (PetscReal)(grashof > 0);
    }
  }

  /* Compute over the interior points */
  for (j = yints; j < yinte; j++) {
    for (i = xints; i < xinte; i++) {
      /*
        convective coefficients for upwinding
      */
      vx  = x[j][i].u;
      avx = PetscAbsScalar(vx);
      vxp = .5 * (vx + avx);
      vxm = .5 * (vx - avx);
      vy  = x[j][i].v;
      avy = PetscAbsScalar(vy);
      vyp = .5 * (vy + avy);
      vym = .5 * (vy - avy);

      /* U velocity */
      u         = x[j][i].u;
      udot      = user->parabolic ? xdot[j][i].u : 0.;
      uxx       = (2.0 * u - x[j][i - 1].u - x[j][i + 1].u) * hydhx;
      uyy       = (2.0 * u - x[j - 1][i].u - x[j + 1][i].u) * hxdhy;
      f[j][i].u = udot + uxx + uyy - .5 * (x[j + 1][i].omega - x[j - 1][i].omega) * hx;

      /* V velocity */
      u         = x[j][i].v;
      udot      = user->parabolic ? xdot[j][i].v : 0.;
      uxx       = (2.0 * u - x[j][i - 1].v - x[j][i + 1].v) * hydhx;
      uyy       = (2.0 * u - x[j - 1][i].v - x[j + 1][i].v) * hxdhy;
      f[j][i].v = udot + uxx + uyy + .5 * (x[j][i + 1].omega - x[j][i - 1].omega) * hy;

      /* Omega */
      u   = x[j][i].omega;
      uxx = (2.0 * u - x[j][i - 1].omega - x[j][i + 1].omega) * hydhx;
      uyy = (2.0 * u - x[j - 1][i].omega - x[j + 1][i].omega) * hxdhy;
      f[j][i].omega = (xdot[j][i].omega + uxx + uyy + (vxp * (u - x[j][i - 1].omega) + vxm * (x[j][i + 1].omega - u)) * hy + (vyp * (u - x[j - 1][i].omega) + vym * (x[j + 1][i].omega - u)) * hx - .5 * grashof * (x[j][i + 1].temp - x[j][i - 1].temp) * hy);

      /* Temperature */
      u            = x[j][i].temp;
      uxx          = (2.0 * u - x[j][i - 1].temp - x[j][i + 1].temp) * hydhx;
      uyy          = (2.0 * u - x[j - 1][i].temp - x[j + 1][i].temp) * hxdhy;
      f[j][i].temp = (xdot[j][i].temp + uxx + uyy + prandtl * ((vxp * (u - x[j][i - 1].temp) + vxm * (x[j][i + 1].temp - u)) * hy + (vyp * (u - x[j - 1][i].temp) + vym * (x[j + 1][i].temp - u)) * hx));
    }
  }

  /*
     Flop count (multiply-adds are counted as 2 operations)
  */
  PetscCall(PetscLogFlops(84.0 * info->ym * info->xm));
  PetscFunctionReturn(PETSC_SUCCESS);
}

/*TEST

    test:
      args: -da_grid_x 20 -da_grid_y 20 -lidvelocity 100 -grashof 1e3 -ts_max_steps 100 -ts_rtol 1e-3 -ts_atol 1e-3 -ts_type rosw -ts_rosw_type ra3pw -ts_monitor -ts_monitor_solution_vtk 'foo-%03d.vts'
      requires: !complex !single

    test:
      suffix: 1_steps
      args: -da_grid_x 20 -da_grid_y 20 -lidvelocity 100 -grashof 1e3 -ts_run_steps 100 -ts_rtol 1e-3 -ts_atol 1e-3 -ts_type rosw -ts_rosw_type ra3pw -ts_monitor -ts_monitor_solution_vtk 'foo-%03d.vts'
      requires: !complex !single

    test:
      suffix: 2
      nsize: 4
      args: -da_grid_x 20 -da_grid_y 20 -lidvelocity 100 -grashof 1e3 -ts_max_steps 100 -ts_rtol 1e-3 -ts_atol 1e-3 -ts_type rosw -ts_rosw_type ra3pw -ts_monitor -ts_monitor_solution_vtk 'foo-%03d.vts' -ts_monitor_solution_vtk_interval -1
      requires: !complex !single

    test:
      suffix: 3
      nsize: 4
      args: -da_refine 2 -lidvelocity 100 -grashof 1e3 -ts_max_steps 10 -ts_rtol 1e-3 -ts_atol 1e-3 -pc_type none -ts_type beuler -ts_monitor -snes_monitor_short -snes_type aspin -da_overlap 4 -npc_sub_ksp_type preonly -npc_sub_pc_type lu
      requires: !complex !single

    test:
      suffix: 4
      nsize: 2
      args: -da_refine 1 -lidvelocity 100 -grashof 1e3 -ts_max_steps 10 -ts_rtol 1e-3 -ts_atol 1e-3
      requires: !complex !single

    test:
      suffix: asm
      nsize: 4
      args: -da_refine 1 -lidvelocity 100 -grashof 1e3 -ts_max_steps 10 -ts_rtol 1e-3 -ts_atol 1e-3
      requires: !complex !single

TEST*/