xref: /petsc/src/snes/tutorials/ex48.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1*c4762a1bSJed Brown static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2*c4762a1bSJed Brown \n\
3*c4762a1bSJed Brown Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4*c4762a1bSJed Brown using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5*c4762a1bSJed Brown to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
6*c4762a1bSJed Brown boundary conditions in the x- and y-directions.\n\
7*c4762a1bSJed Brown \n\
8*c4762a1bSJed Brown Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9*c4762a1bSJed Brown can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10*c4762a1bSJed Brown \n\
11*c4762a1bSJed Brown A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12*c4762a1bSJed Brown \n\n";
13*c4762a1bSJed Brown 
14*c4762a1bSJed Brown /*
15*c4762a1bSJed Brown The equations for horizontal velocity (u,v) are
16*c4762a1bSJed Brown 
17*c4762a1bSJed Brown   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18*c4762a1bSJed Brown   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19*c4762a1bSJed Brown 
20*c4762a1bSJed Brown where
21*c4762a1bSJed Brown 
22*c4762a1bSJed Brown   eta = B/2 (epsilon + gamma)^((p-2)/2)
23*c4762a1bSJed Brown 
24*c4762a1bSJed Brown is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25*c4762a1bSJed Brown written in terms of the second invariant
26*c4762a1bSJed Brown 
27*c4762a1bSJed Brown   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28*c4762a1bSJed Brown 
29*c4762a1bSJed Brown The surface boundary conditions are the natural conditions.  The basal boundary conditions
30*c4762a1bSJed Brown are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31*c4762a1bSJed Brown 
32*c4762a1bSJed Brown In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33*c4762a1bSJed Brown 
34*c4762a1bSJed Brown The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
35*c4762a1bSJed Brown map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
36*c4762a1bSJed Brown the Jacobian of the coordinate transformation from a reference element to the physical element.
37*c4762a1bSJed Brown 
38*c4762a1bSJed Brown Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39*c4762a1bSJed Brown specially so that columns are never distributed, and are always contiguous in memory.
40*c4762a1bSJed Brown This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41*c4762a1bSJed Brown and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
42*c4762a1bSJed Brown use compatible domain decomposition relative to the 3D DMDAs.
43*c4762a1bSJed Brown 
44*c4762a1bSJed Brown There are two compile-time options:
45*c4762a1bSJed Brown 
46*c4762a1bSJed Brown   NO_SSE2:
47*c4762a1bSJed Brown     If the host supports SSE2, we use integration code that has been vectorized with SSE2
48*c4762a1bSJed Brown     intrinsics, unless this macro is defined.  The intrinsics speed up integration by about
49*c4762a1bSJed Brown     30% on my architecture (P8700, gcc-4.5 snapshot).
50*c4762a1bSJed Brown 
51*c4762a1bSJed Brown   COMPUTE_LOWER_TRIANGULAR:
52*c4762a1bSJed Brown     The element matrices we assemble are lower-triangular so it is not necessary to compute
53*c4762a1bSJed Brown     all entries explicitly.  If this macro is defined, the lower-triangular entries are
54*c4762a1bSJed Brown     computed explicitly.
55*c4762a1bSJed Brown 
56*c4762a1bSJed Brown */
57*c4762a1bSJed Brown 
58*c4762a1bSJed Brown #if defined(PETSC_APPLE_FRAMEWORK)
59*c4762a1bSJed Brown #import <PETSc/petscsnes.h>
60*c4762a1bSJed Brown #import <PETSc/petsc/private/dmdaimpl.h>     /* There is not yet a public interface to manipulate dm->ops */
61*c4762a1bSJed Brown #else
62*c4762a1bSJed Brown 
63*c4762a1bSJed Brown #include <petscsnes.h>
64*c4762a1bSJed Brown #include <petsc/private/dmdaimpl.h>     /* There is not yet a public interface to manipulate dm->ops */
65*c4762a1bSJed Brown #endif
66*c4762a1bSJed Brown #include <ctype.h>              /* toupper() */
67*c4762a1bSJed Brown 
68*c4762a1bSJed Brown #if defined(__cplusplus) || defined (PETSC_HAVE_WINDOWS_COMPILERS) || defined (__PGI)
69*c4762a1bSJed Brown /*  c++ cannot handle  [_restrict_] notation like C does */
70*c4762a1bSJed Brown #undef PETSC_RESTRICT
71*c4762a1bSJed Brown #define PETSC_RESTRICT
72*c4762a1bSJed Brown #endif
73*c4762a1bSJed Brown 
74*c4762a1bSJed Brown #if defined __SSE2__
75*c4762a1bSJed Brown #  include <emmintrin.h>
76*c4762a1bSJed Brown #endif
77*c4762a1bSJed Brown 
78*c4762a1bSJed Brown /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
79*c4762a1bSJed Brown #if !defined NO_SSE2                           \
80*c4762a1bSJed Brown      && !defined PETSC_USE_COMPLEX             \
81*c4762a1bSJed Brown      && !defined PETSC_USE_REAL_SINGLE         \
82*c4762a1bSJed Brown      && !defined PETSC_USE_REAL___FLOAT128     \
83*c4762a1bSJed Brown      && !defined PETSC_USE_REAL___FP16         \
84*c4762a1bSJed Brown      && defined __SSE2__
85*c4762a1bSJed Brown #define USE_SSE2_KERNELS 1
86*c4762a1bSJed Brown #else
87*c4762a1bSJed Brown #define USE_SSE2_KERNELS 0
88*c4762a1bSJed Brown #endif
89*c4762a1bSJed Brown 
90*c4762a1bSJed Brown static PetscClassId THI_CLASSID;
91*c4762a1bSJed Brown 
92*c4762a1bSJed Brown typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
93*c4762a1bSJed Brown static const char      *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
94*c4762a1bSJed Brown PETSC_UNUSED static const PetscReal HexQWeights[8]     = {1,1,1,1,1,1,1,1};
95*c4762a1bSJed Brown PETSC_UNUSED static const PetscReal HexQNodes[]        = {-0.57735026918962573, 0.57735026918962573};
96*c4762a1bSJed Brown #define G 0.57735026918962573
97*c4762a1bSJed Brown #define H (0.5*(1.+G))
98*c4762a1bSJed Brown #define L (0.5*(1.-G))
99*c4762a1bSJed Brown #define M (-0.5)
100*c4762a1bSJed Brown #define P (0.5)
101*c4762a1bSJed Brown /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
102*c4762a1bSJed Brown static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
103*c4762a1bSJed Brown                                                    {0,H,0,0,0,L,0,0},
104*c4762a1bSJed Brown                                                    {0,0,H,0,0,0,L,0},
105*c4762a1bSJed Brown                                                    {0,0,0,H,0,0,0,L},
106*c4762a1bSJed Brown                                                    {L,0,0,0,H,0,0,0},
107*c4762a1bSJed Brown                                                    {0,L,0,0,0,H,0,0},
108*c4762a1bSJed Brown                                                    {0,0,L,0,0,0,H,0},
109*c4762a1bSJed Brown                                                    {0,0,0,L,0,0,0,H}};
110*c4762a1bSJed Brown static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
111*c4762a1bSJed Brown   {{M*H,M*H,M},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  ,{M*L,M*L,P},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  },
112*c4762a1bSJed Brown   {{M*H,0,0}  ,{P*H,M*H,M},{0,P*H,0}  ,{0,0,0}    ,{M*L,0,0}  ,{P*L,M*L,P},{0,P*L,0}  ,{0,0,0}    },
113*c4762a1bSJed Brown   {{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,M},{M*H,0,0}  ,{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,P},{M*L,0,0}  },
114*c4762a1bSJed Brown   {{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,M},{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,P}},
115*c4762a1bSJed Brown   {{M*L,M*L,M},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  ,{M*H,M*H,P},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  },
116*c4762a1bSJed Brown   {{M*L,0,0}  ,{P*L,M*L,M},{0,P*L,0}  ,{0,0,0}    ,{M*H,0,0}  ,{P*H,M*H,P},{0,P*H,0}  ,{0,0,0}    },
117*c4762a1bSJed Brown   {{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,M},{M*L,0,0}  ,{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,P},{M*H,0,0}  },
118*c4762a1bSJed Brown   {{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,M},{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,P}}};
119*c4762a1bSJed Brown /* Stanndard Gauss */
120*c4762a1bSJed Brown static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
121*c4762a1bSJed Brown                                                  {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
122*c4762a1bSJed Brown                                                  {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
123*c4762a1bSJed Brown                                                  {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
124*c4762a1bSJed Brown                                                  {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
125*c4762a1bSJed Brown                                                  {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
126*c4762a1bSJed Brown                                                  {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
127*c4762a1bSJed Brown                                                  {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
128*c4762a1bSJed Brown static const PetscReal HexQDeriv_Gauss[8][8][3] = {
129*c4762a1bSJed Brown   {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
130*c4762a1bSJed Brown   {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
131*c4762a1bSJed Brown   {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
132*c4762a1bSJed Brown   {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
133*c4762a1bSJed Brown   {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
134*c4762a1bSJed Brown   {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
135*c4762a1bSJed Brown   {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
136*c4762a1bSJed Brown   {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
137*c4762a1bSJed Brown static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
138*c4762a1bSJed Brown /* Standard 2x2 Gauss quadrature for the bottom layer. */
139*c4762a1bSJed Brown static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
140*c4762a1bSJed Brown                                             {L*H,H*H,H*L,L*L},
141*c4762a1bSJed Brown                                             {L*L,H*L,H*H,L*H},
142*c4762a1bSJed Brown                                             {H*L,L*L,L*H,H*H}};
143*c4762a1bSJed Brown static const PetscReal QuadQDeriv[4][4][2] = {
144*c4762a1bSJed Brown   {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
145*c4762a1bSJed Brown   {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
146*c4762a1bSJed Brown   {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
147*c4762a1bSJed Brown   {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
148*c4762a1bSJed Brown #undef G
149*c4762a1bSJed Brown #undef H
150*c4762a1bSJed Brown #undef L
151*c4762a1bSJed Brown #undef M
152*c4762a1bSJed Brown #undef P
153*c4762a1bSJed Brown 
154*c4762a1bSJed Brown #define HexExtract(x,i,j,k,n) do {              \
155*c4762a1bSJed Brown     (n)[0] = (x)[i][j][k];                      \
156*c4762a1bSJed Brown     (n)[1] = (x)[i+1][j][k];                    \
157*c4762a1bSJed Brown     (n)[2] = (x)[i+1][j+1][k];                  \
158*c4762a1bSJed Brown     (n)[3] = (x)[i][j+1][k];                    \
159*c4762a1bSJed Brown     (n)[4] = (x)[i][j][k+1];                    \
160*c4762a1bSJed Brown     (n)[5] = (x)[i+1][j][k+1];                  \
161*c4762a1bSJed Brown     (n)[6] = (x)[i+1][j+1][k+1];                \
162*c4762a1bSJed Brown     (n)[7] = (x)[i][j+1][k+1];                  \
163*c4762a1bSJed Brown   } while (0)
164*c4762a1bSJed Brown 
165*c4762a1bSJed Brown #define HexExtractRef(x,i,j,k,n) do {           \
166*c4762a1bSJed Brown     (n)[0] = &(x)[i][j][k];                     \
167*c4762a1bSJed Brown     (n)[1] = &(x)[i+1][j][k];                   \
168*c4762a1bSJed Brown     (n)[2] = &(x)[i+1][j+1][k];                 \
169*c4762a1bSJed Brown     (n)[3] = &(x)[i][j+1][k];                   \
170*c4762a1bSJed Brown     (n)[4] = &(x)[i][j][k+1];                   \
171*c4762a1bSJed Brown     (n)[5] = &(x)[i+1][j][k+1];                 \
172*c4762a1bSJed Brown     (n)[6] = &(x)[i+1][j+1][k+1];               \
173*c4762a1bSJed Brown     (n)[7] = &(x)[i][j+1][k+1];                 \
174*c4762a1bSJed Brown   } while (0)
175*c4762a1bSJed Brown 
176*c4762a1bSJed Brown #define QuadExtract(x,i,j,n) do {               \
177*c4762a1bSJed Brown     (n)[0] = (x)[i][j];                         \
178*c4762a1bSJed Brown     (n)[1] = (x)[i+1][j];                       \
179*c4762a1bSJed Brown     (n)[2] = (x)[i+1][j+1];                     \
180*c4762a1bSJed Brown     (n)[3] = (x)[i][j+1];                       \
181*c4762a1bSJed Brown   } while (0)
182*c4762a1bSJed Brown 
183*c4762a1bSJed Brown static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
184*c4762a1bSJed Brown {
185*c4762a1bSJed Brown   PetscInt i;
186*c4762a1bSJed Brown   dz[0] = dz[1] = dz[2] = 0;
187*c4762a1bSJed Brown   for (i=0; i<8; i++) {
188*c4762a1bSJed Brown     dz[0] += dphi[i][0] * zn[i];
189*c4762a1bSJed Brown     dz[1] += dphi[i][1] * zn[i];
190*c4762a1bSJed Brown     dz[2] += dphi[i][2] * zn[i];
191*c4762a1bSJed Brown   }
192*c4762a1bSJed Brown }
193*c4762a1bSJed Brown 
194*c4762a1bSJed Brown static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw)
195*c4762a1bSJed Brown {
196*c4762a1bSJed Brown   const PetscReal jac[3][3]  = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}};
197*c4762a1bSJed Brown   const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}};
198*c4762a1bSJed Brown   const PetscReal jdet       = jac[0][0]*jac[1][1]*jac[2][2];
199*c4762a1bSJed Brown   PetscInt        i;
200*c4762a1bSJed Brown 
201*c4762a1bSJed Brown   for (i=0; i<8; i++) {
202*c4762a1bSJed Brown     const PetscReal *dphir = HexQDeriv[q][i];
203*c4762a1bSJed Brown     phi[i]     = HexQInterp[q][i];
204*c4762a1bSJed Brown     dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
205*c4762a1bSJed Brown     dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
206*c4762a1bSJed Brown     dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
207*c4762a1bSJed Brown   }
208*c4762a1bSJed Brown   *jw = 1.0 * jdet;
209*c4762a1bSJed Brown }
210*c4762a1bSJed Brown 
211*c4762a1bSJed Brown typedef struct _p_THI   *THI;
212*c4762a1bSJed Brown typedef struct _n_Units *Units;
213*c4762a1bSJed Brown 
214*c4762a1bSJed Brown typedef struct {
215*c4762a1bSJed Brown   PetscScalar u,v;
216*c4762a1bSJed Brown } Node;
217*c4762a1bSJed Brown 
218*c4762a1bSJed Brown typedef struct {
219*c4762a1bSJed Brown   PetscScalar b;                /* bed */
220*c4762a1bSJed Brown   PetscScalar h;                /* thickness */
221*c4762a1bSJed Brown   PetscScalar beta2;            /* friction */
222*c4762a1bSJed Brown } PrmNode;
223*c4762a1bSJed Brown 
224*c4762a1bSJed Brown typedef struct {
225*c4762a1bSJed Brown   PetscReal min,max,cmin,cmax;
226*c4762a1bSJed Brown } PRange;
227*c4762a1bSJed Brown 
228*c4762a1bSJed Brown typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode;
229*c4762a1bSJed Brown 
230*c4762a1bSJed Brown struct _p_THI {
231*c4762a1bSJed Brown   PETSCHEADER(int);
232*c4762a1bSJed Brown   void      (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
233*c4762a1bSJed Brown   PetscInt  zlevels;
234*c4762a1bSJed Brown   PetscReal Lx,Ly,Lz;           /* Model domain */
235*c4762a1bSJed Brown   PetscReal alpha;              /* Bed angle */
236*c4762a1bSJed Brown   Units     units;
237*c4762a1bSJed Brown   PetscReal dirichlet_scale;
238*c4762a1bSJed Brown   PetscReal ssa_friction_scale;
239*c4762a1bSJed Brown   PRange    eta;
240*c4762a1bSJed Brown   PRange    beta2;
241*c4762a1bSJed Brown   struct {
242*c4762a1bSJed Brown     PetscReal Bd2,eps,exponent;
243*c4762a1bSJed Brown   } viscosity;
244*c4762a1bSJed Brown   struct {
245*c4762a1bSJed Brown     PetscReal irefgam,eps2,exponent,refvel,epsvel;
246*c4762a1bSJed Brown   } friction;
247*c4762a1bSJed Brown   PetscReal rhog;
248*c4762a1bSJed Brown   PetscBool no_slip;
249*c4762a1bSJed Brown   PetscBool tridiagonal;
250*c4762a1bSJed Brown   PetscBool coarse2d;
251*c4762a1bSJed Brown   PetscBool verbose;
252*c4762a1bSJed Brown   MatType   mattype;
253*c4762a1bSJed Brown };
254*c4762a1bSJed Brown 
255*c4762a1bSJed Brown struct _n_Units {
256*c4762a1bSJed Brown   /* fundamental */
257*c4762a1bSJed Brown   PetscReal meter;
258*c4762a1bSJed Brown   PetscReal kilogram;
259*c4762a1bSJed Brown   PetscReal second;
260*c4762a1bSJed Brown   /* derived */
261*c4762a1bSJed Brown   PetscReal Pascal;
262*c4762a1bSJed Brown   PetscReal year;
263*c4762a1bSJed Brown };
264*c4762a1bSJed Brown 
265*c4762a1bSJed Brown static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI);
266*c4762a1bSJed Brown static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI);
267*c4762a1bSJed Brown static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI);
268*c4762a1bSJed Brown 
269*c4762a1bSJed Brown static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
270*c4762a1bSJed Brown {
271*c4762a1bSJed Brown   const PetscScalar zm1    = zm-1,
272*c4762a1bSJed Brown                     znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
273*c4762a1bSJed Brown                               pn[1].b + pn[1].h*(PetscScalar)k/zm1,
274*c4762a1bSJed Brown                               pn[2].b + pn[2].h*(PetscScalar)k/zm1,
275*c4762a1bSJed Brown                               pn[3].b + pn[3].h*(PetscScalar)k/zm1,
276*c4762a1bSJed Brown                               pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
277*c4762a1bSJed Brown                               pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
278*c4762a1bSJed Brown                               pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
279*c4762a1bSJed Brown                               pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
280*c4762a1bSJed Brown   PetscInt i;
281*c4762a1bSJed Brown   for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
282*c4762a1bSJed Brown }
283*c4762a1bSJed Brown 
284*c4762a1bSJed Brown /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
285*c4762a1bSJed Brown static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
286*c4762a1bSJed Brown {
287*c4762a1bSJed Brown   Units     units = thi->units;
288*c4762a1bSJed Brown   PetscReal s     = -x*PetscSinReal(thi->alpha);
289*c4762a1bSJed Brown 
290*c4762a1bSJed Brown   p->b     = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
291*c4762a1bSJed Brown   p->h     = s - p->b;
292*c4762a1bSJed Brown   p->beta2 = 1e30;
293*c4762a1bSJed Brown }
294*c4762a1bSJed Brown 
295*c4762a1bSJed Brown static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
296*c4762a1bSJed Brown {
297*c4762a1bSJed Brown   Units     units = thi->units;
298*c4762a1bSJed Brown   PetscReal s     = -x*PetscSinReal(thi->alpha);
299*c4762a1bSJed Brown 
300*c4762a1bSJed Brown   p->b = s - 1000*units->meter;
301*c4762a1bSJed Brown   p->h = s - p->b;
302*c4762a1bSJed Brown   /* tau_b = beta2 v   is a stress (Pa) */
303*c4762a1bSJed Brown   p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter;
304*c4762a1bSJed Brown }
305*c4762a1bSJed Brown 
306*c4762a1bSJed Brown /* These are just toys */
307*c4762a1bSJed Brown 
308*c4762a1bSJed Brown /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
309*c4762a1bSJed Brown static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
310*c4762a1bSJed Brown {
311*c4762a1bSJed Brown   Units     units = thi->units;
312*c4762a1bSJed Brown   PetscReal x     = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
313*c4762a1bSJed Brown   PetscReal r     = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
314*c4762a1bSJed Brown   p->b     = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
315*c4762a1bSJed Brown   p->h     = s - p->b;
316*c4762a1bSJed Brown   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
317*c4762a1bSJed Brown }
318*c4762a1bSJed Brown 
319*c4762a1bSJed Brown /* Like Z, but with 200 meter cliffs */
320*c4762a1bSJed Brown static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
321*c4762a1bSJed Brown {
322*c4762a1bSJed Brown   Units     units = thi->units;
323*c4762a1bSJed Brown   PetscReal x     = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
324*c4762a1bSJed Brown   PetscReal r     = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
325*c4762a1bSJed Brown 
326*c4762a1bSJed Brown   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
327*c4762a1bSJed Brown   if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
328*c4762a1bSJed Brown   p->h     = s - p->b;
329*c4762a1bSJed Brown   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter;
330*c4762a1bSJed Brown }
331*c4762a1bSJed Brown 
332*c4762a1bSJed Brown /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
333*c4762a1bSJed Brown static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
334*c4762a1bSJed Brown {
335*c4762a1bSJed Brown   Units     units = thi->units;
336*c4762a1bSJed Brown   PetscReal x     = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
337*c4762a1bSJed Brown   PetscReal r     = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
338*c4762a1bSJed Brown 
339*c4762a1bSJed Brown   p->b     = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
340*c4762a1bSJed Brown   p->h     = s - p->b;
341*c4762a1bSJed Brown   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter;
342*c4762a1bSJed Brown }
343*c4762a1bSJed Brown 
344*c4762a1bSJed Brown static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
345*c4762a1bSJed Brown {
346*c4762a1bSJed Brown   if (thi->friction.irefgam == 0) {
347*c4762a1bSJed Brown     Units units = thi->units;
348*c4762a1bSJed Brown     thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year));
349*c4762a1bSJed Brown     thi->friction.eps2    = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam;
350*c4762a1bSJed Brown   }
351*c4762a1bSJed Brown   if (thi->friction.exponent == 0) {
352*c4762a1bSJed Brown     *beta2  = rbeta2;
353*c4762a1bSJed Brown     *dbeta2 = 0;
354*c4762a1bSJed Brown   } else {
355*c4762a1bSJed Brown     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
356*c4762a1bSJed Brown     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
357*c4762a1bSJed Brown   }
358*c4762a1bSJed Brown }
359*c4762a1bSJed Brown 
360*c4762a1bSJed Brown static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
361*c4762a1bSJed Brown {
362*c4762a1bSJed Brown   PetscReal Bd2,eps,exponent;
363*c4762a1bSJed Brown   if (thi->viscosity.Bd2 == 0) {
364*c4762a1bSJed Brown     Units units = thi->units;
365*c4762a1bSJed Brown     const PetscReal
366*c4762a1bSJed Brown       n = 3.,                                           /* Glen exponent */
367*c4762a1bSJed Brown       p = 1. + 1./n,                                    /* for Stokes */
368*c4762a1bSJed Brown       A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
369*c4762a1bSJed Brown       B = PetscPowReal(A,-1./n);                                 /* hardness parameter */
370*c4762a1bSJed Brown     thi->viscosity.Bd2      = B/2;
371*c4762a1bSJed Brown     thi->viscosity.exponent = (p-2)/2;
372*c4762a1bSJed Brown     thi->viscosity.eps      = 0.5*PetscSqr(1e-5 / units->year);
373*c4762a1bSJed Brown   }
374*c4762a1bSJed Brown   Bd2      = thi->viscosity.Bd2;
375*c4762a1bSJed Brown   exponent = thi->viscosity.exponent;
376*c4762a1bSJed Brown   eps      = thi->viscosity.eps;
377*c4762a1bSJed Brown   *eta     = Bd2 * PetscPowReal(eps + gam,exponent);
378*c4762a1bSJed Brown   *deta    = exponent * (*eta) / (eps + gam);
379*c4762a1bSJed Brown }
380*c4762a1bSJed Brown 
381*c4762a1bSJed Brown static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
382*c4762a1bSJed Brown {
383*c4762a1bSJed Brown   if (x < *min) *min = x;
384*c4762a1bSJed Brown   if (x > *max) *max = x;
385*c4762a1bSJed Brown }
386*c4762a1bSJed Brown 
387*c4762a1bSJed Brown static void PRangeClear(PRange *p)
388*c4762a1bSJed Brown {
389*c4762a1bSJed Brown   p->cmin = p->min = 1e100;
390*c4762a1bSJed Brown   p->cmax = p->max = -1e100;
391*c4762a1bSJed Brown }
392*c4762a1bSJed Brown 
393*c4762a1bSJed Brown static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
394*c4762a1bSJed Brown {
395*c4762a1bSJed Brown 
396*c4762a1bSJed Brown   PetscFunctionBeginUser;
397*c4762a1bSJed Brown   p->cmin = min;
398*c4762a1bSJed Brown   p->cmax = max;
399*c4762a1bSJed Brown   if (min < p->min) p->min = min;
400*c4762a1bSJed Brown   if (max > p->max) p->max = max;
401*c4762a1bSJed Brown   PetscFunctionReturn(0);
402*c4762a1bSJed Brown }
403*c4762a1bSJed Brown 
404*c4762a1bSJed Brown static PetscErrorCode THIDestroy(THI *thi)
405*c4762a1bSJed Brown {
406*c4762a1bSJed Brown   PetscErrorCode ierr;
407*c4762a1bSJed Brown 
408*c4762a1bSJed Brown   PetscFunctionBeginUser;
409*c4762a1bSJed Brown   if (!*thi) PetscFunctionReturn(0);
410*c4762a1bSJed Brown   if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; PetscFunctionReturn(0);}
411*c4762a1bSJed Brown   ierr = PetscFree((*thi)->units);CHKERRQ(ierr);
412*c4762a1bSJed Brown   ierr = PetscFree((*thi)->mattype);CHKERRQ(ierr);
413*c4762a1bSJed Brown   ierr = PetscHeaderDestroy(thi);CHKERRQ(ierr);
414*c4762a1bSJed Brown   PetscFunctionReturn(0);
415*c4762a1bSJed Brown }
416*c4762a1bSJed Brown 
417*c4762a1bSJed Brown static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
418*c4762a1bSJed Brown {
419*c4762a1bSJed Brown   static PetscBool registered = PETSC_FALSE;
420*c4762a1bSJed Brown   THI              thi;
421*c4762a1bSJed Brown   Units            units;
422*c4762a1bSJed Brown   PetscErrorCode   ierr;
423*c4762a1bSJed Brown 
424*c4762a1bSJed Brown   PetscFunctionBeginUser;
425*c4762a1bSJed Brown   *inthi = 0;
426*c4762a1bSJed Brown   if (!registered) {
427*c4762a1bSJed Brown     ierr       = PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);CHKERRQ(ierr);
428*c4762a1bSJed Brown     registered = PETSC_TRUE;
429*c4762a1bSJed Brown   }
430*c4762a1bSJed Brown   ierr = PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0);CHKERRQ(ierr);
431*c4762a1bSJed Brown 
432*c4762a1bSJed Brown   ierr            = PetscNew(&thi->units);CHKERRQ(ierr);
433*c4762a1bSJed Brown   units           = thi->units;
434*c4762a1bSJed Brown   units->meter    = 1e-2;
435*c4762a1bSJed Brown   units->second   = 1e-7;
436*c4762a1bSJed Brown   units->kilogram = 1e-12;
437*c4762a1bSJed Brown 
438*c4762a1bSJed Brown   ierr = PetscOptionsBegin(comm,NULL,"Scaled units options","");CHKERRQ(ierr);
439*c4762a1bSJed Brown   {
440*c4762a1bSJed Brown     ierr = PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);CHKERRQ(ierr);
441*c4762a1bSJed Brown     ierr = PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);CHKERRQ(ierr);
442*c4762a1bSJed Brown     ierr = PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);CHKERRQ(ierr);
443*c4762a1bSJed Brown   }
444*c4762a1bSJed Brown   ierr          = PetscOptionsEnd();CHKERRQ(ierr);
445*c4762a1bSJed Brown   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
446*c4762a1bSJed Brown   units->year   = 31556926. * units->second; /* seconds per year */
447*c4762a1bSJed Brown 
448*c4762a1bSJed Brown   thi->Lx              = 10.e3;
449*c4762a1bSJed Brown   thi->Ly              = 10.e3;
450*c4762a1bSJed Brown   thi->Lz              = 1000;
451*c4762a1bSJed Brown   thi->dirichlet_scale = 1;
452*c4762a1bSJed Brown   thi->verbose         = PETSC_FALSE;
453*c4762a1bSJed Brown 
454*c4762a1bSJed Brown   ierr = PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");CHKERRQ(ierr);
455*c4762a1bSJed Brown   {
456*c4762a1bSJed Brown     QuadratureType quad       = QUAD_GAUSS;
457*c4762a1bSJed Brown     char           homexp[]   = "A";
458*c4762a1bSJed Brown     char           mtype[256] = MATSBAIJ;
459*c4762a1bSJed Brown     PetscReal      L,m = 1.0;
460*c4762a1bSJed Brown     PetscBool      flg;
461*c4762a1bSJed Brown     L    = thi->Lx;
462*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);CHKERRQ(ierr);
463*c4762a1bSJed Brown     if (flg) thi->Lx = thi->Ly = L;
464*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);CHKERRQ(ierr);
465*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);CHKERRQ(ierr);
466*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);CHKERRQ(ierr);
467*c4762a1bSJed Brown     ierr = PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);CHKERRQ(ierr);
468*c4762a1bSJed Brown     switch (homexp[0] = toupper(homexp[0])) {
469*c4762a1bSJed Brown     case 'A':
470*c4762a1bSJed Brown       thi->initialize = THIInitialize_HOM_A;
471*c4762a1bSJed Brown       thi->no_slip    = PETSC_TRUE;
472*c4762a1bSJed Brown       thi->alpha      = 0.5;
473*c4762a1bSJed Brown       break;
474*c4762a1bSJed Brown     case 'C':
475*c4762a1bSJed Brown       thi->initialize = THIInitialize_HOM_C;
476*c4762a1bSJed Brown       thi->no_slip    = PETSC_FALSE;
477*c4762a1bSJed Brown       thi->alpha      = 0.1;
478*c4762a1bSJed Brown       break;
479*c4762a1bSJed Brown     case 'X':
480*c4762a1bSJed Brown       thi->initialize = THIInitialize_HOM_X;
481*c4762a1bSJed Brown       thi->no_slip    = PETSC_FALSE;
482*c4762a1bSJed Brown       thi->alpha      = 0.3;
483*c4762a1bSJed Brown       break;
484*c4762a1bSJed Brown     case 'Y':
485*c4762a1bSJed Brown       thi->initialize = THIInitialize_HOM_Y;
486*c4762a1bSJed Brown       thi->no_slip    = PETSC_FALSE;
487*c4762a1bSJed Brown       thi->alpha      = 0.5;
488*c4762a1bSJed Brown       break;
489*c4762a1bSJed Brown     case 'Z':
490*c4762a1bSJed Brown       thi->initialize = THIInitialize_HOM_Z;
491*c4762a1bSJed Brown       thi->no_slip    = PETSC_FALSE;
492*c4762a1bSJed Brown       thi->alpha      = 0.5;
493*c4762a1bSJed Brown       break;
494*c4762a1bSJed Brown     default:
495*c4762a1bSJed Brown       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
496*c4762a1bSJed Brown     }
497*c4762a1bSJed Brown     ierr = PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);CHKERRQ(ierr);
498*c4762a1bSJed Brown     switch (quad) {
499*c4762a1bSJed Brown     case QUAD_GAUSS:
500*c4762a1bSJed Brown       HexQInterp = HexQInterp_Gauss;
501*c4762a1bSJed Brown       HexQDeriv  = HexQDeriv_Gauss;
502*c4762a1bSJed Brown       break;
503*c4762a1bSJed Brown     case QUAD_LOBATTO:
504*c4762a1bSJed Brown       HexQInterp = HexQInterp_Lobatto;
505*c4762a1bSJed Brown       HexQDeriv  = HexQDeriv_Lobatto;
506*c4762a1bSJed Brown       break;
507*c4762a1bSJed Brown     }
508*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);CHKERRQ(ierr);
509*c4762a1bSJed Brown 
510*c4762a1bSJed Brown     thi->friction.refvel = 100.;
511*c4762a1bSJed Brown     thi->friction.epsvel = 1.;
512*c4762a1bSJed Brown 
513*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL);CHKERRQ(ierr);
514*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL);CHKERRQ(ierr);
515*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);CHKERRQ(ierr);
516*c4762a1bSJed Brown 
517*c4762a1bSJed Brown     thi->friction.exponent = (m-1)/2;
518*c4762a1bSJed Brown 
519*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);CHKERRQ(ierr);
520*c4762a1bSJed Brown     ierr = PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);CHKERRQ(ierr);
521*c4762a1bSJed Brown     ierr = PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL);CHKERRQ(ierr);
522*c4762a1bSJed Brown     ierr = PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL);CHKERRQ(ierr);
523*c4762a1bSJed Brown     ierr = PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);CHKERRQ(ierr);
524*c4762a1bSJed Brown     ierr = PetscStrallocpy(mtype,(char**)&thi->mattype);CHKERRQ(ierr);
525*c4762a1bSJed Brown     ierr = PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);CHKERRQ(ierr);
526*c4762a1bSJed Brown   }
527*c4762a1bSJed Brown   ierr = PetscOptionsEnd();CHKERRQ(ierr);
528*c4762a1bSJed Brown 
529*c4762a1bSJed Brown   /* dimensionalize */
530*c4762a1bSJed Brown   thi->Lx    *= units->meter;
531*c4762a1bSJed Brown   thi->Ly    *= units->meter;
532*c4762a1bSJed Brown   thi->Lz    *= units->meter;
533*c4762a1bSJed Brown   thi->alpha *= PETSC_PI / 180;
534*c4762a1bSJed Brown 
535*c4762a1bSJed Brown   PRangeClear(&thi->eta);
536*c4762a1bSJed Brown   PRangeClear(&thi->beta2);
537*c4762a1bSJed Brown 
538*c4762a1bSJed Brown   {
539*c4762a1bSJed Brown     PetscReal u       = 1000*units->meter/(3e7*units->second),
540*c4762a1bSJed Brown               gradu   = u / (100*units->meter),eta,deta,
541*c4762a1bSJed Brown               rho     = 910 * units->kilogram/PetscPowReal(units->meter,3),
542*c4762a1bSJed Brown               grav    = 9.81 * units->meter/PetscSqr(units->second),
543*c4762a1bSJed Brown               driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
544*c4762a1bSJed Brown     THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
545*c4762a1bSJed Brown     thi->rhog = rho * grav;
546*c4762a1bSJed Brown     if (thi->verbose) {
547*c4762a1bSJed Brown       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal);CHKERRQ(ierr);
548*c4762a1bSJed Brown       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving);CHKERRQ(ierr);
549*c4762a1bSJed Brown       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving));CHKERRQ(ierr);
550*c4762a1bSJed Brown       THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
551*c4762a1bSJed Brown       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving));CHKERRQ(ierr);
552*c4762a1bSJed Brown     }
553*c4762a1bSJed Brown   }
554*c4762a1bSJed Brown 
555*c4762a1bSJed Brown   *inthi = thi;
556*c4762a1bSJed Brown   PetscFunctionReturn(0);
557*c4762a1bSJed Brown }
558*c4762a1bSJed Brown 
559*c4762a1bSJed Brown static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm)
560*c4762a1bSJed Brown {
561*c4762a1bSJed Brown   PrmNode        **p;
562*c4762a1bSJed Brown   PetscInt       i,j,xs,xm,ys,ym,mx,my;
563*c4762a1bSJed Brown   PetscErrorCode ierr;
564*c4762a1bSJed Brown 
565*c4762a1bSJed Brown   PetscFunctionBeginUser;
566*c4762a1bSJed Brown   ierr = DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);CHKERRQ(ierr);
567*c4762a1bSJed Brown   ierr = DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
568*c4762a1bSJed Brown   ierr = DMDAVecGetArray(da2prm,prm,&p);CHKERRQ(ierr);
569*c4762a1bSJed Brown   for (i=xs; i<xs+xm; i++) {
570*c4762a1bSJed Brown     for (j=ys; j<ys+ym; j++) {
571*c4762a1bSJed Brown       PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
572*c4762a1bSJed Brown       thi->initialize(thi,xx,yy,&p[i][j]);
573*c4762a1bSJed Brown     }
574*c4762a1bSJed Brown   }
575*c4762a1bSJed Brown   ierr = DMDAVecRestoreArray(da2prm,prm,&p);CHKERRQ(ierr);
576*c4762a1bSJed Brown   PetscFunctionReturn(0);
577*c4762a1bSJed Brown }
578*c4762a1bSJed Brown 
579*c4762a1bSJed Brown static PetscErrorCode THISetUpDM(THI thi,DM dm)
580*c4762a1bSJed Brown {
581*c4762a1bSJed Brown   PetscErrorCode  ierr;
582*c4762a1bSJed Brown   PetscInt        refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s;
583*c4762a1bSJed Brown   DMDAStencilType st;
584*c4762a1bSJed Brown   DM              da2prm;
585*c4762a1bSJed Brown   Vec             X;
586*c4762a1bSJed Brown 
587*c4762a1bSJed Brown   PetscFunctionBeginUser;
588*c4762a1bSJed Brown   ierr = DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);CHKERRQ(ierr);
589*c4762a1bSJed Brown   if (dim == 2) {
590*c4762a1bSJed Brown     ierr = DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st);CHKERRQ(ierr);
591*c4762a1bSJed Brown   }
592*c4762a1bSJed Brown   ierr  = DMGetRefineLevel(dm,&refinelevel);CHKERRQ(ierr);
593*c4762a1bSJed Brown   ierr  = DMGetCoarsenLevel(dm,&coarsenlevel);CHKERRQ(ierr);
594*c4762a1bSJed Brown   level = refinelevel - coarsenlevel;
595*c4762a1bSJed Brown   ierr  = DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm);CHKERRQ(ierr);
596*c4762a1bSJed Brown   ierr  = DMSetUp(da2prm);CHKERRQ(ierr);
597*c4762a1bSJed Brown   ierr  = DMCreateLocalVector(da2prm,&X);CHKERRQ(ierr);
598*c4762a1bSJed Brown   {
599*c4762a1bSJed Brown     PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
600*c4762a1bSJed Brown     if (dim == 2) {
601*c4762a1bSJed Brown       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g, num elements %D x %D (%D), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My));CHKERRQ(ierr);
602*c4762a1bSJed Brown     } else {
603*c4762a1bSJed Brown       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %D x %D x %D (%D), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)));CHKERRQ(ierr);
604*c4762a1bSJed Brown     }
605*c4762a1bSJed Brown   }
606*c4762a1bSJed Brown   ierr = THIInitializePrm(thi,da2prm,X);CHKERRQ(ierr);
607*c4762a1bSJed Brown   if (thi->tridiagonal) {       /* Reset coarse Jacobian evaluation */
608*c4762a1bSJed Brown     ierr = DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);CHKERRQ(ierr);
609*c4762a1bSJed Brown   }
610*c4762a1bSJed Brown   if (thi->coarse2d) {
611*c4762a1bSJed Brown     ierr = DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi);CHKERRQ(ierr);
612*c4762a1bSJed Brown   }
613*c4762a1bSJed Brown   ierr = PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm);CHKERRQ(ierr);
614*c4762a1bSJed Brown   ierr = PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X);CHKERRQ(ierr);
615*c4762a1bSJed Brown   ierr = DMDestroy(&da2prm);CHKERRQ(ierr);
616*c4762a1bSJed Brown   ierr = VecDestroy(&X);CHKERRQ(ierr);
617*c4762a1bSJed Brown   PetscFunctionReturn(0);
618*c4762a1bSJed Brown }
619*c4762a1bSJed Brown 
620*c4762a1bSJed Brown static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx)
621*c4762a1bSJed Brown {
622*c4762a1bSJed Brown   THI            thi = (THI)ctx;
623*c4762a1bSJed Brown   PetscErrorCode ierr;
624*c4762a1bSJed Brown   PetscInt       rlevel,clevel;
625*c4762a1bSJed Brown 
626*c4762a1bSJed Brown   PetscFunctionBeginUser;
627*c4762a1bSJed Brown   ierr = THISetUpDM(thi,dmc);CHKERRQ(ierr);
628*c4762a1bSJed Brown   ierr = DMGetRefineLevel(dmc,&rlevel);CHKERRQ(ierr);
629*c4762a1bSJed Brown   ierr = DMGetCoarsenLevel(dmc,&clevel);CHKERRQ(ierr);
630*c4762a1bSJed Brown   if (rlevel-clevel == 0) {ierr = DMSetMatType(dmc,MATAIJ);CHKERRQ(ierr);}
631*c4762a1bSJed Brown   ierr = DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr);
632*c4762a1bSJed Brown   PetscFunctionReturn(0);
633*c4762a1bSJed Brown }
634*c4762a1bSJed Brown 
635*c4762a1bSJed Brown static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx)
636*c4762a1bSJed Brown {
637*c4762a1bSJed Brown   THI            thi = (THI)ctx;
638*c4762a1bSJed Brown   PetscErrorCode ierr;
639*c4762a1bSJed Brown 
640*c4762a1bSJed Brown   PetscFunctionBeginUser;
641*c4762a1bSJed Brown   ierr = THISetUpDM(thi,dmf);CHKERRQ(ierr);
642*c4762a1bSJed Brown   ierr = DMSetMatType(dmf,thi->mattype);CHKERRQ(ierr);
643*c4762a1bSJed Brown   ierr = DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi);CHKERRQ(ierr);
644*c4762a1bSJed Brown   /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */
645*c4762a1bSJed Brown   ierr = DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr);
646*c4762a1bSJed Brown   PetscFunctionReturn(0);
647*c4762a1bSJed Brown }
648*c4762a1bSJed Brown 
649*c4762a1bSJed Brown static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm)
650*c4762a1bSJed Brown {
651*c4762a1bSJed Brown   PetscErrorCode ierr;
652*c4762a1bSJed Brown   DM             da2prm;
653*c4762a1bSJed Brown   Vec            X;
654*c4762a1bSJed Brown 
655*c4762a1bSJed Brown   PetscFunctionBeginUser;
656*c4762a1bSJed Brown   ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);CHKERRQ(ierr);
657*c4762a1bSJed Brown   if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
658*c4762a1bSJed Brown   ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);CHKERRQ(ierr);
659*c4762a1bSJed Brown   if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
660*c4762a1bSJed Brown   ierr = DMDAVecGetArray(da2prm,X,prm);CHKERRQ(ierr);
661*c4762a1bSJed Brown   PetscFunctionReturn(0);
662*c4762a1bSJed Brown }
663*c4762a1bSJed Brown 
664*c4762a1bSJed Brown static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm)
665*c4762a1bSJed Brown {
666*c4762a1bSJed Brown   PetscErrorCode ierr;
667*c4762a1bSJed Brown   DM             da2prm;
668*c4762a1bSJed Brown   Vec            X;
669*c4762a1bSJed Brown 
670*c4762a1bSJed Brown   PetscFunctionBeginUser;
671*c4762a1bSJed Brown   ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);CHKERRQ(ierr);
672*c4762a1bSJed Brown   if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA");
673*c4762a1bSJed Brown   ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);CHKERRQ(ierr);
674*c4762a1bSJed Brown   if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA");
675*c4762a1bSJed Brown   ierr = DMDAVecRestoreArray(da2prm,X,prm);CHKERRQ(ierr);
676*c4762a1bSJed Brown   PetscFunctionReturn(0);
677*c4762a1bSJed Brown }
678*c4762a1bSJed Brown 
679*c4762a1bSJed Brown static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx)
680*c4762a1bSJed Brown {
681*c4762a1bSJed Brown   THI            thi;
682*c4762a1bSJed Brown   PetscInt       i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
683*c4762a1bSJed Brown   PetscReal      hx,hy;
684*c4762a1bSJed Brown   PrmNode        **prm;
685*c4762a1bSJed Brown   Node           ***x;
686*c4762a1bSJed Brown   PetscErrorCode ierr;
687*c4762a1bSJed Brown   DM             da;
688*c4762a1bSJed Brown 
689*c4762a1bSJed Brown   PetscFunctionBeginUser;
690*c4762a1bSJed Brown   ierr = SNESGetDM(snes,&da);CHKERRQ(ierr);
691*c4762a1bSJed Brown   ierr = DMGetApplicationContext(da,&thi);CHKERRQ(ierr);
692*c4762a1bSJed Brown   ierr = DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
693*c4762a1bSJed Brown   ierr = DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
694*c4762a1bSJed Brown   ierr = DMDAVecGetArray(da,X,&x);CHKERRQ(ierr);
695*c4762a1bSJed Brown   ierr = THIDAGetPrm(da,&prm);CHKERRQ(ierr);
696*c4762a1bSJed Brown   hx   = thi->Lx / mx;
697*c4762a1bSJed Brown   hy   = thi->Ly / my;
698*c4762a1bSJed Brown   for (i=xs; i<xs+xm; i++) {
699*c4762a1bSJed Brown     for (j=ys; j<ys+ym; j++) {
700*c4762a1bSJed Brown       for (k=zs; k<zs+zm; k++) {
701*c4762a1bSJed Brown         const PetscScalar zm1      = zm-1,
702*c4762a1bSJed Brown                           drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
703*c4762a1bSJed Brown                           drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
704*c4762a1bSJed Brown         x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
705*c4762a1bSJed Brown         x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
706*c4762a1bSJed Brown       }
707*c4762a1bSJed Brown     }
708*c4762a1bSJed Brown   }
709*c4762a1bSJed Brown   ierr = DMDAVecRestoreArray(da,X,&x);CHKERRQ(ierr);
710*c4762a1bSJed Brown   ierr = THIDARestorePrm(da,&prm);CHKERRQ(ierr);
711*c4762a1bSJed Brown   PetscFunctionReturn(0);
712*c4762a1bSJed Brown }
713*c4762a1bSJed Brown 
714*c4762a1bSJed Brown static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta)
715*c4762a1bSJed Brown {
716*c4762a1bSJed Brown   PetscInt    l,ll;
717*c4762a1bSJed Brown   PetscScalar gam;
718*c4762a1bSJed Brown 
719*c4762a1bSJed Brown   du[0] = du[1] = du[2] = 0;
720*c4762a1bSJed Brown   dv[0] = dv[1] = dv[2] = 0;
721*c4762a1bSJed Brown   *u    = 0;
722*c4762a1bSJed Brown   *v    = 0;
723*c4762a1bSJed Brown   for (l=0; l<8; l++) {
724*c4762a1bSJed Brown     *u += phi[l] * n[l].u;
725*c4762a1bSJed Brown     *v += phi[l] * n[l].v;
726*c4762a1bSJed Brown     for (ll=0; ll<3; ll++) {
727*c4762a1bSJed Brown       du[ll] += dphi[l][ll] * n[l].u;
728*c4762a1bSJed Brown       dv[ll] += dphi[l][ll] * n[l].v;
729*c4762a1bSJed Brown     }
730*c4762a1bSJed Brown   }
731*c4762a1bSJed Brown   gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]);
732*c4762a1bSJed Brown   THIViscosity(thi,PetscRealPart(gam),eta,deta);
733*c4762a1bSJed Brown }
734*c4762a1bSJed Brown 
735*c4762a1bSJed Brown static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta)
736*c4762a1bSJed Brown {
737*c4762a1bSJed Brown   PetscInt    l,ll;
738*c4762a1bSJed Brown   PetscScalar gam;
739*c4762a1bSJed Brown 
740*c4762a1bSJed Brown   du[0] = du[1] = 0;
741*c4762a1bSJed Brown   dv[0] = dv[1] = 0;
742*c4762a1bSJed Brown   *u    = 0;
743*c4762a1bSJed Brown   *v    = 0;
744*c4762a1bSJed Brown   for (l=0; l<4; l++) {
745*c4762a1bSJed Brown     *u += phi[l] * n[l].u;
746*c4762a1bSJed Brown     *v += phi[l] * n[l].v;
747*c4762a1bSJed Brown     for (ll=0; ll<2; ll++) {
748*c4762a1bSJed Brown       du[ll] += dphi[l][ll] * n[l].u;
749*c4762a1bSJed Brown       dv[ll] += dphi[l][ll] * n[l].v;
750*c4762a1bSJed Brown     }
751*c4762a1bSJed Brown   }
752*c4762a1bSJed Brown   gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]);
753*c4762a1bSJed Brown   THIViscosity(thi,PetscRealPart(gam),eta,deta);
754*c4762a1bSJed Brown }
755*c4762a1bSJed Brown 
756*c4762a1bSJed Brown static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi)
757*c4762a1bSJed Brown {
758*c4762a1bSJed Brown   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l;
759*c4762a1bSJed Brown   PetscReal      hx,hy,etamin,etamax,beta2min,beta2max;
760*c4762a1bSJed Brown   PrmNode        **prm;
761*c4762a1bSJed Brown   PetscErrorCode ierr;
762*c4762a1bSJed Brown 
763*c4762a1bSJed Brown   PetscFunctionBeginUser;
764*c4762a1bSJed Brown   xs = info->zs;
765*c4762a1bSJed Brown   ys = info->ys;
766*c4762a1bSJed Brown   xm = info->zm;
767*c4762a1bSJed Brown   ym = info->ym;
768*c4762a1bSJed Brown   zm = info->xm;
769*c4762a1bSJed Brown   hx = thi->Lx / info->mz;
770*c4762a1bSJed Brown   hy = thi->Ly / info->my;
771*c4762a1bSJed Brown 
772*c4762a1bSJed Brown   etamin   = 1e100;
773*c4762a1bSJed Brown   etamax   = 0;
774*c4762a1bSJed Brown   beta2min = 1e100;
775*c4762a1bSJed Brown   beta2max = 0;
776*c4762a1bSJed Brown 
777*c4762a1bSJed Brown   ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr);
778*c4762a1bSJed Brown 
779*c4762a1bSJed Brown   for (i=xs; i<xs+xm; i++) {
780*c4762a1bSJed Brown     for (j=ys; j<ys+ym; j++) {
781*c4762a1bSJed Brown       PrmNode pn[4];
782*c4762a1bSJed Brown       QuadExtract(prm,i,j,pn);
783*c4762a1bSJed Brown       for (k=0; k<zm-1; k++) {
784*c4762a1bSJed Brown         PetscInt  ls = 0;
785*c4762a1bSJed Brown         Node      n[8],*fn[8];
786*c4762a1bSJed Brown         PetscReal zn[8],etabase = 0;
787*c4762a1bSJed Brown         PrmHexGetZ(pn,k,zm,zn);
788*c4762a1bSJed Brown         HexExtract(x,i,j,k,n);
789*c4762a1bSJed Brown         HexExtractRef(f,i,j,k,fn);
790*c4762a1bSJed Brown         if (thi->no_slip && k == 0) {
791*c4762a1bSJed Brown           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
792*c4762a1bSJed Brown           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
793*c4762a1bSJed Brown           ls = 4;
794*c4762a1bSJed Brown         }
795*c4762a1bSJed Brown         for (q=0; q<8; q++) {
796*c4762a1bSJed Brown           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
797*c4762a1bSJed Brown           PetscScalar du[3],dv[3],u,v;
798*c4762a1bSJed Brown           HexGrad(HexQDeriv[q],zn,dz);
799*c4762a1bSJed Brown           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
800*c4762a1bSJed Brown           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
801*c4762a1bSJed Brown           jw /= thi->rhog;      /* scales residuals to be O(1) */
802*c4762a1bSJed Brown           if (q == 0) etabase = eta;
803*c4762a1bSJed Brown           RangeUpdate(&etamin,&etamax,eta);
804*c4762a1bSJed Brown           for (l=ls; l<8; l++) { /* test functions */
805*c4762a1bSJed Brown             const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0};
806*c4762a1bSJed Brown             const PetscReal pp    = phi[l],*dp = dphi[l];
807*c4762a1bSJed Brown             fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
808*c4762a1bSJed Brown             fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
809*c4762a1bSJed Brown           }
810*c4762a1bSJed Brown         }
811*c4762a1bSJed Brown         if (k == 0) { /* we are on a bottom face */
812*c4762a1bSJed Brown           if (thi->no_slip) {
813*c4762a1bSJed Brown             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
814*c4762a1bSJed Brown             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
815*c4762a1bSJed Brown             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
816*c4762a1bSJed Brown             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
817*c4762a1bSJed Brown             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
818*c4762a1bSJed Brown             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
819*c4762a1bSJed Brown             * assembled matrix (see the similar block in THIJacobianLocal).
820*c4762a1bSJed Brown             *
821*c4762a1bSJed Brown             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
822*c4762a1bSJed Brown             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
823*c4762a1bSJed Brown             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
824*c4762a1bSJed Brown             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
825*c4762a1bSJed Brown             */
826*c4762a1bSJed Brown             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1.);
827*c4762a1bSJed Brown             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
828*c4762a1bSJed Brown             fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
829*c4762a1bSJed Brown             fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
830*c4762a1bSJed Brown           } else {              /* Integrate over bottom face to apply boundary condition */
831*c4762a1bSJed Brown             for (q=0; q<4; q++) {
832*c4762a1bSJed Brown               const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
833*c4762a1bSJed Brown               PetscScalar     u  =0,v=0,rbeta2=0;
834*c4762a1bSJed Brown               PetscReal       beta2,dbeta2;
835*c4762a1bSJed Brown               for (l=0; l<4; l++) {
836*c4762a1bSJed Brown                 u      += phi[l]*n[l].u;
837*c4762a1bSJed Brown                 v      += phi[l]*n[l].v;
838*c4762a1bSJed Brown                 rbeta2 += phi[l]*pn[l].beta2;
839*c4762a1bSJed Brown               }
840*c4762a1bSJed Brown               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
841*c4762a1bSJed Brown               RangeUpdate(&beta2min,&beta2max,beta2);
842*c4762a1bSJed Brown               for (l=0; l<4; l++) {
843*c4762a1bSJed Brown                 const PetscReal pp = phi[l];
844*c4762a1bSJed Brown                 fn[ls+l]->u += pp*jw*beta2*u;
845*c4762a1bSJed Brown                 fn[ls+l]->v += pp*jw*beta2*v;
846*c4762a1bSJed Brown               }
847*c4762a1bSJed Brown             }
848*c4762a1bSJed Brown           }
849*c4762a1bSJed Brown         }
850*c4762a1bSJed Brown       }
851*c4762a1bSJed Brown     }
852*c4762a1bSJed Brown   }
853*c4762a1bSJed Brown 
854*c4762a1bSJed Brown   ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr);
855*c4762a1bSJed Brown 
856*c4762a1bSJed Brown   ierr = PRangeMinMax(&thi->eta,etamin,etamax);CHKERRQ(ierr);
857*c4762a1bSJed Brown   ierr = PRangeMinMax(&thi->beta2,beta2min,beta2max);CHKERRQ(ierr);
858*c4762a1bSJed Brown   PetscFunctionReturn(0);
859*c4762a1bSJed Brown }
860*c4762a1bSJed Brown 
861*c4762a1bSJed Brown static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
862*c4762a1bSJed Brown {
863*c4762a1bSJed Brown   PetscErrorCode ierr;
864*c4762a1bSJed Brown   PetscReal      nrm;
865*c4762a1bSJed Brown   PetscInt       m;
866*c4762a1bSJed Brown   PetscMPIInt    rank;
867*c4762a1bSJed Brown 
868*c4762a1bSJed Brown   PetscFunctionBeginUser;
869*c4762a1bSJed Brown   ierr = MatNorm(B,NORM_FROBENIUS,&nrm);CHKERRQ(ierr);
870*c4762a1bSJed Brown   ierr = MatGetSize(B,&m,0);CHKERRQ(ierr);
871*c4762a1bSJed Brown   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank);CHKERRQ(ierr);
872*c4762a1bSJed Brown   if (!rank) {
873*c4762a1bSJed Brown     PetscScalar val0,val2;
874*c4762a1bSJed Brown     ierr = MatGetValue(B,0,0,&val0);CHKERRQ(ierr);
875*c4762a1bSJed Brown     ierr = MatGetValue(B,2,2,&val2);CHKERRQ(ierr);
876*c4762a1bSJed Brown     ierr = PetscViewerASCIIPrintf(viewer,"Matrix dim %D norm %8.2e (0,0) %8.2e  (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax);CHKERRQ(ierr);
877*c4762a1bSJed Brown   }
878*c4762a1bSJed Brown   PetscFunctionReturn(0);
879*c4762a1bSJed Brown }
880*c4762a1bSJed Brown 
881*c4762a1bSJed Brown static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
882*c4762a1bSJed Brown {
883*c4762a1bSJed Brown   PetscErrorCode ierr;
884*c4762a1bSJed Brown   Node           ***x;
885*c4762a1bSJed Brown   PetscInt       i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
886*c4762a1bSJed Brown   PetscReal      umin = 1e100,umax=-1e100;
887*c4762a1bSJed Brown   PetscScalar    usum = 0.0,gusum;
888*c4762a1bSJed Brown 
889*c4762a1bSJed Brown   PetscFunctionBeginUser;
890*c4762a1bSJed Brown   *min = *max = *mean = 0;
891*c4762a1bSJed Brown   ierr = DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
892*c4762a1bSJed Brown   ierr = DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
893*c4762a1bSJed Brown   if (zs != 0 || zm != mz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected decomposition");
894*c4762a1bSJed Brown   ierr = DMDAVecGetArray(da,X,&x);CHKERRQ(ierr);
895*c4762a1bSJed Brown   for (i=xs; i<xs+xm; i++) {
896*c4762a1bSJed Brown     for (j=ys; j<ys+ym; j++) {
897*c4762a1bSJed Brown       PetscReal u = PetscRealPart(x[i][j][zm-1].u);
898*c4762a1bSJed Brown       RangeUpdate(&umin,&umax,u);
899*c4762a1bSJed Brown       usum += u;
900*c4762a1bSJed Brown     }
901*c4762a1bSJed Brown   }
902*c4762a1bSJed Brown   ierr  = DMDAVecRestoreArray(da,X,&x);CHKERRQ(ierr);
903*c4762a1bSJed Brown   ierr  = MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
904*c4762a1bSJed Brown   ierr  = MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
905*c4762a1bSJed Brown   ierr  = MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da));CHKERRQ(ierr);
906*c4762a1bSJed Brown   *mean = PetscRealPart(gusum) / (mx*my);
907*c4762a1bSJed Brown   PetscFunctionReturn(0);
908*c4762a1bSJed Brown }
909*c4762a1bSJed Brown 
910*c4762a1bSJed Brown static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[])
911*c4762a1bSJed Brown {
912*c4762a1bSJed Brown   MPI_Comm       comm;
913*c4762a1bSJed Brown   Vec            X;
914*c4762a1bSJed Brown   DM             dm;
915*c4762a1bSJed Brown   PetscErrorCode ierr;
916*c4762a1bSJed Brown 
917*c4762a1bSJed Brown   PetscFunctionBeginUser;
918*c4762a1bSJed Brown   ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
919*c4762a1bSJed Brown   ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr);
920*c4762a1bSJed Brown   ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr);
921*c4762a1bSJed Brown   ierr = PetscPrintf(comm,"Solution statistics after solve: %s\n",name);CHKERRQ(ierr);
922*c4762a1bSJed Brown   {
923*c4762a1bSJed Brown     PetscInt            its,lits;
924*c4762a1bSJed Brown     SNESConvergedReason reason;
925*c4762a1bSJed Brown     ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
926*c4762a1bSJed Brown     ierr = SNESGetConvergedReason(snes,&reason);CHKERRQ(ierr);
927*c4762a1bSJed Brown     ierr = SNESGetLinearSolveIterations(snes,&lits);CHKERRQ(ierr);
928*c4762a1bSJed Brown     ierr = PetscPrintf(comm,"%s: Number of SNES iterations = %D, total linear iterations = %D\n",SNESConvergedReasons[reason],its,lits);CHKERRQ(ierr);
929*c4762a1bSJed Brown   }
930*c4762a1bSJed Brown   {
931*c4762a1bSJed Brown     PetscReal         nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
932*c4762a1bSJed Brown     PetscInt          i,j,m;
933*c4762a1bSJed Brown     const PetscScalar *x;
934*c4762a1bSJed Brown     ierr = VecNorm(X,NORM_2,&nrm2);CHKERRQ(ierr);
935*c4762a1bSJed Brown     ierr = VecGetLocalSize(X,&m);CHKERRQ(ierr);
936*c4762a1bSJed Brown     ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr);
937*c4762a1bSJed Brown     for (i=0; i<m; i+=2) {
938*c4762a1bSJed Brown       PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
939*c4762a1bSJed Brown       tmin[0] = PetscMin(u,tmin[0]);
940*c4762a1bSJed Brown       tmin[1] = PetscMin(v,tmin[1]);
941*c4762a1bSJed Brown       tmin[2] = PetscMin(c,tmin[2]);
942*c4762a1bSJed Brown       tmax[0] = PetscMax(u,tmax[0]);
943*c4762a1bSJed Brown       tmax[1] = PetscMax(v,tmax[1]);
944*c4762a1bSJed Brown       tmax[2] = PetscMax(c,tmax[2]);
945*c4762a1bSJed Brown     }
946*c4762a1bSJed Brown     ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr);
947*c4762a1bSJed Brown     ierr = MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi));CHKERRQ(ierr);
948*c4762a1bSJed Brown     ierr = MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi));CHKERRQ(ierr);
949*c4762a1bSJed Brown     /* Dimensionalize to meters/year */
950*c4762a1bSJed Brown     nrm2 *= thi->units->year / thi->units->meter;
951*c4762a1bSJed Brown     for (j=0; j<3; j++) {
952*c4762a1bSJed Brown       min[j] *= thi->units->year / thi->units->meter;
953*c4762a1bSJed Brown       max[j] *= thi->units->year / thi->units->meter;
954*c4762a1bSJed Brown     }
955*c4762a1bSJed Brown     if (min[0] == 0.0) min[0] = 0.0;
956*c4762a1bSJed Brown     ierr = PetscPrintf(comm,"|X|_2 %g   %g <= u <=  %g   %g <= v <=  %g   %g <= c <=  %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2]);CHKERRQ(ierr);
957*c4762a1bSJed Brown     {
958*c4762a1bSJed Brown       PetscReal umin,umax,umean;
959*c4762a1bSJed Brown       ierr   = THISurfaceStatistics(dm,X,&umin,&umax,&umean);CHKERRQ(ierr);
960*c4762a1bSJed Brown       umin  *= thi->units->year / thi->units->meter;
961*c4762a1bSJed Brown       umax  *= thi->units->year / thi->units->meter;
962*c4762a1bSJed Brown       umean *= thi->units->year / thi->units->meter;
963*c4762a1bSJed Brown       ierr   = PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean);CHKERRQ(ierr);
964*c4762a1bSJed Brown     }
965*c4762a1bSJed Brown     /* These values stay nondimensional */
966*c4762a1bSJed Brown     ierr = PetscPrintf(comm,"Global eta range   %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax);CHKERRQ(ierr);
967*c4762a1bSJed Brown     ierr = PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax);CHKERRQ(ierr);
968*c4762a1bSJed Brown   }
969*c4762a1bSJed Brown   PetscFunctionReturn(0);
970*c4762a1bSJed Brown }
971*c4762a1bSJed Brown 
972*c4762a1bSJed Brown static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi)
973*c4762a1bSJed Brown {
974*c4762a1bSJed Brown   PetscInt       xs,ys,xm,ym,i,j,q,l,ll;
975*c4762a1bSJed Brown   PetscReal      hx,hy;
976*c4762a1bSJed Brown   PrmNode        **prm;
977*c4762a1bSJed Brown   PetscErrorCode ierr;
978*c4762a1bSJed Brown 
979*c4762a1bSJed Brown   PetscFunctionBeginUser;
980*c4762a1bSJed Brown   xs = info->ys;
981*c4762a1bSJed Brown   ys = info->xs;
982*c4762a1bSJed Brown   xm = info->ym;
983*c4762a1bSJed Brown   ym = info->xm;
984*c4762a1bSJed Brown   hx = thi->Lx / info->my;
985*c4762a1bSJed Brown   hy = thi->Ly / info->mx;
986*c4762a1bSJed Brown 
987*c4762a1bSJed Brown   ierr = MatZeroEntries(B);CHKERRQ(ierr);
988*c4762a1bSJed Brown   ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr);
989*c4762a1bSJed Brown 
990*c4762a1bSJed Brown   for (i=xs; i<xs+xm; i++) {
991*c4762a1bSJed Brown     for (j=ys; j<ys+ym; j++) {
992*c4762a1bSJed Brown       Node        n[4];
993*c4762a1bSJed Brown       PrmNode     pn[4];
994*c4762a1bSJed Brown       PetscScalar Ke[4*2][4*2];
995*c4762a1bSJed Brown       QuadExtract(prm,i,j,pn);
996*c4762a1bSJed Brown       QuadExtract(x,i,j,n);
997*c4762a1bSJed Brown       ierr = PetscMemzero(Ke,sizeof(Ke));CHKERRQ(ierr);
998*c4762a1bSJed Brown       for (q=0; q<4; q++) {
999*c4762a1bSJed Brown         PetscReal   phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2;
1000*c4762a1bSJed Brown         PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0;
1001*c4762a1bSJed Brown         for (l=0; l<4; l++) {
1002*c4762a1bSJed Brown           phi[l]     = QuadQInterp[q][l];
1003*c4762a1bSJed Brown           dphi[l][0] = QuadQDeriv[q][l][0]*2./hx;
1004*c4762a1bSJed Brown           dphi[l][1] = QuadQDeriv[q][l][1]*2./hy;
1005*c4762a1bSJed Brown           h         += phi[l] * pn[l].h;
1006*c4762a1bSJed Brown           rbeta2    += phi[l] * pn[l].beta2;
1007*c4762a1bSJed Brown         }
1008*c4762a1bSJed Brown         jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */
1009*c4762a1bSJed Brown         PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1010*c4762a1bSJed Brown         THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1011*c4762a1bSJed Brown         for (l=0; l<4; l++) {
1012*c4762a1bSJed Brown           const PetscReal pp = phi[l],*dp = dphi[l];
1013*c4762a1bSJed Brown           for (ll=0; ll<4; ll++) {
1014*c4762a1bSJed Brown             const PetscReal ppl = phi[ll],*dpl = dphi[ll];
1015*c4762a1bSJed Brown             PetscScalar     dgdu,dgdv;
1016*c4762a1bSJed Brown             dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1];
1017*c4762a1bSJed Brown             dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0];
1018*c4762a1bSJed Brown             /* Picard part */
1019*c4762a1bSJed Brown             Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1020*c4762a1bSJed Brown             Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1021*c4762a1bSJed Brown             Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1022*c4762a1bSJed Brown             Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
1023*c4762a1bSJed Brown             /* extra Newton terms */
1024*c4762a1bSJed Brown             Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale;
1025*c4762a1bSJed Brown             Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale;
1026*c4762a1bSJed Brown             Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale;
1027*c4762a1bSJed Brown             Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale;
1028*c4762a1bSJed Brown           }
1029*c4762a1bSJed Brown         }
1030*c4762a1bSJed Brown       }
1031*c4762a1bSJed Brown       {
1032*c4762a1bSJed Brown         const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}};
1033*c4762a1bSJed Brown         ierr = MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES);CHKERRQ(ierr);
1034*c4762a1bSJed Brown       }
1035*c4762a1bSJed Brown     }
1036*c4762a1bSJed Brown   }
1037*c4762a1bSJed Brown   ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr);
1038*c4762a1bSJed Brown 
1039*c4762a1bSJed Brown   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040*c4762a1bSJed Brown   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041*c4762a1bSJed Brown   ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1042*c4762a1bSJed Brown   if (thi->verbose) {ierr = THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
1043*c4762a1bSJed Brown   PetscFunctionReturn(0);
1044*c4762a1bSJed Brown }
1045*c4762a1bSJed Brown 
1046*c4762a1bSJed Brown static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode)
1047*c4762a1bSJed Brown {
1048*c4762a1bSJed Brown   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1049*c4762a1bSJed Brown   PetscReal      hx,hy;
1050*c4762a1bSJed Brown   PrmNode        **prm;
1051*c4762a1bSJed Brown   PetscErrorCode ierr;
1052*c4762a1bSJed Brown 
1053*c4762a1bSJed Brown   PetscFunctionBeginUser;
1054*c4762a1bSJed Brown   xs = info->zs;
1055*c4762a1bSJed Brown   ys = info->ys;
1056*c4762a1bSJed Brown   xm = info->zm;
1057*c4762a1bSJed Brown   ym = info->ym;
1058*c4762a1bSJed Brown   zm = info->xm;
1059*c4762a1bSJed Brown   hx = thi->Lx / info->mz;
1060*c4762a1bSJed Brown   hy = thi->Ly / info->my;
1061*c4762a1bSJed Brown 
1062*c4762a1bSJed Brown   ierr = MatZeroEntries(B);CHKERRQ(ierr);
1063*c4762a1bSJed Brown   ierr = MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
1064*c4762a1bSJed Brown   ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr);
1065*c4762a1bSJed Brown 
1066*c4762a1bSJed Brown   for (i=xs; i<xs+xm; i++) {
1067*c4762a1bSJed Brown     for (j=ys; j<ys+ym; j++) {
1068*c4762a1bSJed Brown       PrmNode pn[4];
1069*c4762a1bSJed Brown       QuadExtract(prm,i,j,pn);
1070*c4762a1bSJed Brown       for (k=0; k<zm-1; k++) {
1071*c4762a1bSJed Brown         Node        n[8];
1072*c4762a1bSJed Brown         PetscReal   zn[8],etabase = 0;
1073*c4762a1bSJed Brown         PetscScalar Ke[8*2][8*2];
1074*c4762a1bSJed Brown         PetscInt    ls = 0;
1075*c4762a1bSJed Brown 
1076*c4762a1bSJed Brown         PrmHexGetZ(pn,k,zm,zn);
1077*c4762a1bSJed Brown         HexExtract(x,i,j,k,n);
1078*c4762a1bSJed Brown         ierr = PetscMemzero(Ke,sizeof(Ke));CHKERRQ(ierr);
1079*c4762a1bSJed Brown         if (thi->no_slip && k == 0) {
1080*c4762a1bSJed Brown           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1081*c4762a1bSJed Brown           ls = 4;
1082*c4762a1bSJed Brown         }
1083*c4762a1bSJed Brown         for (q=0; q<8; q++) {
1084*c4762a1bSJed Brown           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
1085*c4762a1bSJed Brown           PetscScalar du[3],dv[3],u,v;
1086*c4762a1bSJed Brown           HexGrad(HexQDeriv[q],zn,dz);
1087*c4762a1bSJed Brown           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1088*c4762a1bSJed Brown           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1089*c4762a1bSJed Brown           jw /= thi->rhog;      /* residuals are scaled by this factor */
1090*c4762a1bSJed Brown           if (q == 0) etabase = eta;
1091*c4762a1bSJed Brown           for (l=ls; l<8; l++) { /* test functions */
1092*c4762a1bSJed Brown             const PetscReal *PETSC_RESTRICT dp = dphi[l];
1093*c4762a1bSJed Brown #if USE_SSE2_KERNELS
1094*c4762a1bSJed Brown             /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1095*c4762a1bSJed Brown             __m128d
1096*c4762a1bSJed Brown               p4         = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5),
1097*c4762a1bSJed Brown               p42        = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)),
1098*c4762a1bSJed Brown               du0        = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]),
1099*c4762a1bSJed Brown               dv0        = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]),
1100*c4762a1bSJed Brown               jweta      = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta),
1101*c4762a1bSJed Brown               dp0        = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]),
1102*c4762a1bSJed Brown               dp0jweta   = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta),
1103*c4762a1bSJed Brown               p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */
1104*c4762a1bSJed Brown               p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */
1105*c4762a1bSJed Brown               pdu2dv2    = _mm_unpacklo_pd(du2,dv2),                          /* [du2, dv2] */
1106*c4762a1bSJed Brown               du1pdv0    = _mm_add_pd(du1,dv0),                               /* du1 + dv0 */
1107*c4762a1bSJed Brown               t1         = _mm_mul_pd(dp0,p4du0p2dv1),                        /* dp0 (4 du0 + 2 dv1) */
1108*c4762a1bSJed Brown               t2         = _mm_mul_pd(dp1,p4dv1p2du0);                        /* dp1 (4 dv1 + 2 du0) */
1109*c4762a1bSJed Brown 
1110*c4762a1bSJed Brown #endif
1111*c4762a1bSJed Brown #if defined COMPUTE_LOWER_TRIANGULAR  /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1112*c4762a1bSJed Brown             for (ll=ls; ll<8; ll++) { /* trial functions */
1113*c4762a1bSJed Brown #else
1114*c4762a1bSJed Brown             for (ll=l; ll<8; ll++) {
1115*c4762a1bSJed Brown #endif
1116*c4762a1bSJed Brown               const PetscReal *PETSC_RESTRICT dpl = dphi[ll];
1117*c4762a1bSJed Brown               if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */
1118*c4762a1bSJed Brown #if !USE_SSE2_KERNELS
1119*c4762a1bSJed Brown               /* The analytic Jacobian in nice, easy-to-read form */
1120*c4762a1bSJed Brown               {
1121*c4762a1bSJed Brown                 PetscScalar dgdu,dgdv;
1122*c4762a1bSJed Brown                 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1123*c4762a1bSJed Brown                 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1124*c4762a1bSJed Brown                 /* Picard part */
1125*c4762a1bSJed Brown                 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1126*c4762a1bSJed Brown                 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1127*c4762a1bSJed Brown                 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1128*c4762a1bSJed Brown                 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1129*c4762a1bSJed Brown                 /* extra Newton terms */
1130*c4762a1bSJed Brown                 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1131*c4762a1bSJed Brown                 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1132*c4762a1bSJed Brown                 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1133*c4762a1bSJed Brown                 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1134*c4762a1bSJed Brown               }
1135*c4762a1bSJed Brown #else
1136*c4762a1bSJed Brown               /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1137*c4762a1bSJed Brown               * benefit.  On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1138*c4762a1bSJed Brown               * by 25 to 30 percent. */
1139*c4762a1bSJed Brown               {
1140*c4762a1bSJed Brown                 __m128d
1141*c4762a1bSJed Brown                   keu   = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]),
1142*c4762a1bSJed Brown                   kev   = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]),
1143*c4762a1bSJed Brown                   dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]),
1144*c4762a1bSJed Brown                   t0,t3,pdgduv;
1145*c4762a1bSJed Brown                 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01),
1146*c4762a1bSJed Brown                                                 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10),
1147*c4762a1bSJed Brown                                                            _mm_mul_pd(dp2jweta,dpl2))));
1148*c4762a1bSJed Brown                 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01),
1149*c4762a1bSJed Brown                                                 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10),
1150*c4762a1bSJed Brown                                                            _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1))))));
1151*c4762a1bSJed Brown                 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)),
1152*c4762a1bSJed Brown                                                               _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))),
1153*c4762a1bSJed Brown                                                    _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10),
1154*c4762a1bSJed Brown                                                               _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1155*c4762a1bSJed Brown                 t0 = _mm_mul_pd(jwdeta,pdgduv);  /* jw deta [dgdu, dgdv] */
1156*c4762a1bSJed Brown                 t3 = _mm_mul_pd(t0,du1pdv0);     /* t0 (du1 + dv0) */
1157*c4762a1bSJed Brown                 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0),
1158*c4762a1bSJed Brown                                                                            _mm_add_pd(_mm_mul_pd(dp1,t3),
1159*c4762a1bSJed Brown                                                                                       _mm_mul_pd(t0,_mm_mul_pd(dp2,du2))))));
1160*c4762a1bSJed Brown                 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0),
1161*c4762a1bSJed Brown                                                                            _mm_add_pd(_mm_mul_pd(dp0,t3),
1162*c4762a1bSJed Brown                                                                                       _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2))))));
1163*c4762a1bSJed Brown               }
1164*c4762a1bSJed Brown #endif
1165*c4762a1bSJed Brown             }
1166*c4762a1bSJed Brown           }
1167*c4762a1bSJed Brown         }
1168*c4762a1bSJed Brown         if (k == 0) { /* on a bottom face */
1169*c4762a1bSJed Brown           if (thi->no_slip) {
1170*c4762a1bSJed Brown             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1);
1171*c4762a1bSJed Brown             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1172*c4762a1bSJed Brown             Ke[0][0] = thi->dirichlet_scale*diagu;
1173*c4762a1bSJed Brown             Ke[1][1] = thi->dirichlet_scale*diagv;
1174*c4762a1bSJed Brown           } else {
1175*c4762a1bSJed Brown             for (q=0; q<4; q++) {
1176*c4762a1bSJed Brown               const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
1177*c4762a1bSJed Brown               PetscScalar     u  =0,v=0,rbeta2=0;
1178*c4762a1bSJed Brown               PetscReal       beta2,dbeta2;
1179*c4762a1bSJed Brown               for (l=0; l<4; l++) {
1180*c4762a1bSJed Brown                 u      += phi[l]*n[l].u;
1181*c4762a1bSJed Brown                 v      += phi[l]*n[l].v;
1182*c4762a1bSJed Brown                 rbeta2 += phi[l]*pn[l].beta2;
1183*c4762a1bSJed Brown               }
1184*c4762a1bSJed Brown               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1185*c4762a1bSJed Brown               for (l=0; l<4; l++) {
1186*c4762a1bSJed Brown                 const PetscReal pp = phi[l];
1187*c4762a1bSJed Brown                 for (ll=0; ll<4; ll++) {
1188*c4762a1bSJed Brown                   const PetscReal ppl = phi[ll];
1189*c4762a1bSJed Brown                   Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1190*c4762a1bSJed Brown                   Ke[l*2+0][ll*2+1] +=                   pp*jw*dbeta2*u*v*ppl;
1191*c4762a1bSJed Brown                   Ke[l*2+1][ll*2+0] +=                   pp*jw*dbeta2*v*u*ppl;
1192*c4762a1bSJed Brown                   Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1193*c4762a1bSJed Brown                 }
1194*c4762a1bSJed Brown               }
1195*c4762a1bSJed Brown             }
1196*c4762a1bSJed Brown           }
1197*c4762a1bSJed Brown         }
1198*c4762a1bSJed Brown         {
1199*c4762a1bSJed Brown           const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}};
1200*c4762a1bSJed Brown           if (amode == THIASSEMBLY_TRIDIAGONAL) {
1201*c4762a1bSJed Brown             for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1202*c4762a1bSJed Brown               const PetscInt   l4     = l+4;
1203*c4762a1bSJed Brown               const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}};
1204*c4762a1bSJed Brown #if defined COMPUTE_LOWER_TRIANGULAR
1205*c4762a1bSJed Brown               const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1206*c4762a1bSJed Brown                                              {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1207*c4762a1bSJed Brown                                              {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1208*c4762a1bSJed Brown                                              {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1209*c4762a1bSJed Brown #else
1210*c4762a1bSJed Brown               /* Same as above except for the lower-left block */
1211*c4762a1bSJed Brown               const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1212*c4762a1bSJed Brown                                              {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1213*c4762a1bSJed Brown                                              {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1214*c4762a1bSJed Brown                                              {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1215*c4762a1bSJed Brown #endif
1216*c4762a1bSJed Brown               ierr = MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES);CHKERRQ(ierr);
1217*c4762a1bSJed Brown             }
1218*c4762a1bSJed Brown           } else {
1219*c4762a1bSJed Brown #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1220*c4762a1bSJed Brown             for (l=0; l<8; l++) {
1221*c4762a1bSJed Brown               for (ll=l+1; ll<8; ll++) {
1222*c4762a1bSJed Brown                 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1223*c4762a1bSJed Brown                 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1224*c4762a1bSJed Brown                 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1225*c4762a1bSJed Brown                 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1226*c4762a1bSJed Brown               }
1227*c4762a1bSJed Brown             }
1228*c4762a1bSJed Brown #endif
1229*c4762a1bSJed Brown             ierr = MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES);CHKERRQ(ierr);
1230*c4762a1bSJed Brown           }
1231*c4762a1bSJed Brown         }
1232*c4762a1bSJed Brown       }
1233*c4762a1bSJed Brown     }
1234*c4762a1bSJed Brown   }
1235*c4762a1bSJed Brown   ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr);
1236*c4762a1bSJed Brown 
1237*c4762a1bSJed Brown   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1238*c4762a1bSJed Brown   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1239*c4762a1bSJed Brown   ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1240*c4762a1bSJed Brown   if (thi->verbose) {ierr = THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
1241*c4762a1bSJed Brown   PetscFunctionReturn(0);
1242*c4762a1bSJed Brown }
1243*c4762a1bSJed Brown 
1244*c4762a1bSJed Brown static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi)
1245*c4762a1bSJed Brown {
1246*c4762a1bSJed Brown   PetscErrorCode ierr;
1247*c4762a1bSJed Brown 
1248*c4762a1bSJed Brown   PetscFunctionBeginUser;
1249*c4762a1bSJed Brown   ierr  = THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL);CHKERRQ(ierr);
1250*c4762a1bSJed Brown   PetscFunctionReturn(0);
1251*c4762a1bSJed Brown }
1252*c4762a1bSJed Brown 
1253*c4762a1bSJed Brown static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi)
1254*c4762a1bSJed Brown {
1255*c4762a1bSJed Brown   PetscErrorCode ierr;
1256*c4762a1bSJed Brown 
1257*c4762a1bSJed Brown   PetscFunctionBeginUser;
1258*c4762a1bSJed Brown   ierr = THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL);CHKERRQ(ierr);
1259*c4762a1bSJed Brown   PetscFunctionReturn(0);
1260*c4762a1bSJed Brown }
1261*c4762a1bSJed Brown 
1262*c4762a1bSJed Brown static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[])
1263*c4762a1bSJed Brown {
1264*c4762a1bSJed Brown   PetscErrorCode  ierr;
1265*c4762a1bSJed Brown   THI             thi;
1266*c4762a1bSJed Brown   PetscInt        dim,M,N,m,n,s,dof;
1267*c4762a1bSJed Brown   DM              dac,daf;
1268*c4762a1bSJed Brown   DMDAStencilType st;
1269*c4762a1bSJed Brown   DM_DA           *ddf,*ddc;
1270*c4762a1bSJed Brown 
1271*c4762a1bSJed Brown   PetscFunctionBeginUser;
1272*c4762a1bSJed Brown   ierr = PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi);CHKERRQ(ierr);
1273*c4762a1bSJed Brown   if (!thi) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance");
1274*c4762a1bSJed Brown   if (nlevels > 1) {
1275*c4762a1bSJed Brown     ierr = DMRefineHierarchy(dac0,nlevels-1,hierarchy);CHKERRQ(ierr);
1276*c4762a1bSJed Brown     dac  = hierarchy[nlevels-2];
1277*c4762a1bSJed Brown   } else {
1278*c4762a1bSJed Brown     dac = dac0;
1279*c4762a1bSJed Brown   }
1280*c4762a1bSJed Brown   ierr = DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st);CHKERRQ(ierr);
1281*c4762a1bSJed Brown   if (dim != 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs");
1282*c4762a1bSJed Brown 
1283*c4762a1bSJed Brown   /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */
1284*c4762a1bSJed Brown   ierr = DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf);CHKERRQ(ierr);
1285*c4762a1bSJed Brown   ierr = DMSetUp(daf);CHKERRQ(ierr);
1286*c4762a1bSJed Brown 
1287*c4762a1bSJed Brown   daf->ops->creatematrix        = dac->ops->creatematrix;
1288*c4762a1bSJed Brown   daf->ops->createinterpolation = dac->ops->createinterpolation;
1289*c4762a1bSJed Brown   daf->ops->getcoloring         = dac->ops->getcoloring;
1290*c4762a1bSJed Brown   ddf                           = (DM_DA*)daf->data;
1291*c4762a1bSJed Brown   ddc                           = (DM_DA*)dac->data;
1292*c4762a1bSJed Brown   ddf->interptype               = ddc->interptype;
1293*c4762a1bSJed Brown 
1294*c4762a1bSJed Brown   ierr = DMDASetFieldName(daf,0,"x-velocity");CHKERRQ(ierr);
1295*c4762a1bSJed Brown   ierr = DMDASetFieldName(daf,1,"y-velocity");CHKERRQ(ierr);
1296*c4762a1bSJed Brown 
1297*c4762a1bSJed Brown   hierarchy[nlevels-1] = daf;
1298*c4762a1bSJed Brown   PetscFunctionReturn(0);
1299*c4762a1bSJed Brown }
1300*c4762a1bSJed Brown 
1301*c4762a1bSJed Brown static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale)
1302*c4762a1bSJed Brown {
1303*c4762a1bSJed Brown   PetscErrorCode ierr;
1304*c4762a1bSJed Brown   PetscInt       dim;
1305*c4762a1bSJed Brown 
1306*c4762a1bSJed Brown   PetscFunctionBeginUser;
1307*c4762a1bSJed Brown   PetscValidHeaderSpecific(dac,DM_CLASSID,1);
1308*c4762a1bSJed Brown   PetscValidHeaderSpecific(daf,DM_CLASSID,2);
1309*c4762a1bSJed Brown   PetscValidPointer(A,3);
1310*c4762a1bSJed Brown   if (scale) PetscValidPointer(scale,4);
1311*c4762a1bSJed Brown   ierr = DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr);
1312*c4762a1bSJed Brown   if (dim  == 2) {
1313*c4762a1bSJed Brown     /* We are in the 2D problem and use normal DMDA interpolation */
1314*c4762a1bSJed Brown     ierr = DMCreateInterpolation(dac,daf,A,scale);CHKERRQ(ierr);
1315*c4762a1bSJed Brown   } else {
1316*c4762a1bSJed Brown     PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart;
1317*c4762a1bSJed Brown     Mat      B;
1318*c4762a1bSJed Brown 
1319*c4762a1bSJed Brown     ierr = DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1320*c4762a1bSJed Brown     ierr = DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
1321*c4762a1bSJed Brown     if (zs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"unexpected");
1322*c4762a1bSJed Brown     ierr = MatCreate(PetscObjectComm((PetscObject)daf),&B);CHKERRQ(ierr);
1323*c4762a1bSJed Brown     ierr = MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my);CHKERRQ(ierr);
1324*c4762a1bSJed Brown 
1325*c4762a1bSJed Brown     ierr = MatSetType(B,MATAIJ);CHKERRQ(ierr);
1326*c4762a1bSJed Brown     ierr = MatSeqAIJSetPreallocation(B,1,NULL);CHKERRQ(ierr);
1327*c4762a1bSJed Brown     ierr = MatMPIAIJSetPreallocation(B,1,NULL,0,NULL);CHKERRQ(ierr);
1328*c4762a1bSJed Brown     ierr = MatGetOwnershipRange(B,&rstart,NULL);CHKERRQ(ierr);
1329*c4762a1bSJed Brown     ierr = MatGetOwnershipRangeColumn(B,&cstart,NULL);CHKERRQ(ierr);
1330*c4762a1bSJed Brown     for (i=xs; i<xs+xm; i++) {
1331*c4762a1bSJed Brown       for (j=ys; j<ys+ym; j++) {
1332*c4762a1bSJed Brown         for (k=zs; k<zs+zm; k++) {
1333*c4762a1bSJed Brown           PetscInt    i2  = i*ym+j,i3 = i2*zm+k;
1334*c4762a1bSJed Brown           PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */
1335*c4762a1bSJed Brown           ierr = MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES);CHKERRQ(ierr);
1336*c4762a1bSJed Brown         }
1337*c4762a1bSJed Brown       }
1338*c4762a1bSJed Brown     }
1339*c4762a1bSJed Brown     ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1340*c4762a1bSJed Brown     ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1341*c4762a1bSJed Brown     ierr = MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A);CHKERRQ(ierr);
1342*c4762a1bSJed Brown     ierr = MatDestroy(&B);CHKERRQ(ierr);
1343*c4762a1bSJed Brown   }
1344*c4762a1bSJed Brown   PetscFunctionReturn(0);
1345*c4762a1bSJed Brown }
1346*c4762a1bSJed Brown 
1347*c4762a1bSJed Brown static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J)
1348*c4762a1bSJed Brown {
1349*c4762a1bSJed Brown   PetscErrorCode         ierr;
1350*c4762a1bSJed Brown   Mat                    A;
1351*c4762a1bSJed Brown   PetscInt               xm,ym,zm,dim,dof = 2,starts[3],dims[3];
1352*c4762a1bSJed Brown   ISLocalToGlobalMapping ltog;
1353*c4762a1bSJed Brown 
1354*c4762a1bSJed Brown   PetscFunctionBeginUser;
1355*c4762a1bSJed Brown   ierr = DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1356*c4762a1bSJed Brown   if (dim != 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D");
1357*c4762a1bSJed Brown   ierr = DMDAGetCorners(da,0,0,0,&zm,&ym,&xm);CHKERRQ(ierr);
1358*c4762a1bSJed Brown   ierr = DMGetLocalToGlobalMapping(da,&ltog);CHKERRQ(ierr);
1359*c4762a1bSJed Brown   ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr);
1360*c4762a1bSJed Brown   ierr = MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
1361*c4762a1bSJed Brown   ierr = MatSetType(A,da->mattype);CHKERRQ(ierr);
1362*c4762a1bSJed Brown   ierr = MatSetFromOptions(A);CHKERRQ(ierr);
1363*c4762a1bSJed Brown   ierr = MatSeqAIJSetPreallocation(A,3*2,NULL);CHKERRQ(ierr);
1364*c4762a1bSJed Brown   ierr = MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL);CHKERRQ(ierr);
1365*c4762a1bSJed Brown   ierr = MatSeqBAIJSetPreallocation(A,2,3,NULL);CHKERRQ(ierr);
1366*c4762a1bSJed Brown   ierr = MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL);CHKERRQ(ierr);
1367*c4762a1bSJed Brown   ierr = MatSeqSBAIJSetPreallocation(A,2,2,NULL);CHKERRQ(ierr);
1368*c4762a1bSJed Brown   ierr = MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL);CHKERRQ(ierr);
1369*c4762a1bSJed Brown   ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr);
1370*c4762a1bSJed Brown   ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr);
1371*c4762a1bSJed Brown   ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr);
1372*c4762a1bSJed Brown   *J   = A;
1373*c4762a1bSJed Brown   PetscFunctionReturn(0);
1374*c4762a1bSJed Brown }
1375*c4762a1bSJed Brown 
1376*c4762a1bSJed Brown static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[])
1377*c4762a1bSJed Brown {
1378*c4762a1bSJed Brown   const PetscInt    dof   = 2;
1379*c4762a1bSJed Brown   Units             units = thi->units;
1380*c4762a1bSJed Brown   MPI_Comm          comm;
1381*c4762a1bSJed Brown   PetscErrorCode    ierr;
1382*c4762a1bSJed Brown   PetscViewer       viewer;
1383*c4762a1bSJed Brown   PetscMPIInt       rank,size,tag,nn,nmax;
1384*c4762a1bSJed Brown   PetscInt          mx,my,mz,r,range[6];
1385*c4762a1bSJed Brown   const PetscScalar *x;
1386*c4762a1bSJed Brown 
1387*c4762a1bSJed Brown   PetscFunctionBeginUser;
1388*c4762a1bSJed Brown   ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
1389*c4762a1bSJed Brown   ierr = DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1390*c4762a1bSJed Brown   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1391*c4762a1bSJed Brown   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1392*c4762a1bSJed Brown   ierr = PetscViewerASCIIOpen(comm,filename,&viewer);CHKERRQ(ierr);
1393*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");CHKERRQ(ierr);
1394*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,"  <StructuredGrid WholeExtent=\"%d %D %d %D %d %D\">\n",0,mz-1,0,my-1,0,mx-1);CHKERRQ(ierr);
1395*c4762a1bSJed Brown 
1396*c4762a1bSJed Brown   ierr = DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5);CHKERRQ(ierr);
1397*c4762a1bSJed Brown   ierr = PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn);CHKERRQ(ierr);
1398*c4762a1bSJed Brown   ierr = MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1399*c4762a1bSJed Brown   tag  = ((PetscObject) viewer)->tag;
1400*c4762a1bSJed Brown   ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr);
1401*c4762a1bSJed Brown   if (!rank) {
1402*c4762a1bSJed Brown     PetscScalar *array;
1403*c4762a1bSJed Brown     ierr = PetscMalloc1(nmax,&array);CHKERRQ(ierr);
1404*c4762a1bSJed Brown     for (r=0; r<size; r++) {
1405*c4762a1bSJed Brown       PetscInt          i,j,k,xs,xm,ys,ym,zs,zm;
1406*c4762a1bSJed Brown       const PetscScalar *ptr;
1407*c4762a1bSJed Brown       MPI_Status        status;
1408*c4762a1bSJed Brown       if (r) {
1409*c4762a1bSJed Brown         ierr = MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
1410*c4762a1bSJed Brown       }
1411*c4762a1bSJed Brown       zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1412*c4762a1bSJed Brown       if (xm*ym*zm*dof > nmax) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen");
1413*c4762a1bSJed Brown       if (r) {
1414*c4762a1bSJed Brown         ierr = MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);CHKERRQ(ierr);
1415*c4762a1bSJed Brown         ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn);CHKERRQ(ierr);
1416*c4762a1bSJed Brown         if (nn != xm*ym*zm*dof) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen");
1417*c4762a1bSJed Brown         ptr = array;
1418*c4762a1bSJed Brown       } else ptr = x;
1419*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"    <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);CHKERRQ(ierr);
1420*c4762a1bSJed Brown 
1421*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"      <Points>\n");CHKERRQ(ierr);
1422*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1423*c4762a1bSJed Brown       for (i=xs; i<xs+xm; i++) {
1424*c4762a1bSJed Brown         for (j=ys; j<ys+ym; j++) {
1425*c4762a1bSJed Brown           for (k=zs; k<zs+zm; k++) {
1426*c4762a1bSJed Brown             PrmNode   p;
1427*c4762a1bSJed Brown             PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz;
1428*c4762a1bSJed Brown             thi->initialize(thi,xx,yy,&p);
1429*c4762a1bSJed Brown             zz   = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1);
1430*c4762a1bSJed Brown             ierr = PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz);CHKERRQ(ierr);
1431*c4762a1bSJed Brown           }
1432*c4762a1bSJed Brown         }
1433*c4762a1bSJed Brown       }
1434*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");CHKERRQ(ierr);
1435*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"      </Points>\n");CHKERRQ(ierr);
1436*c4762a1bSJed Brown 
1437*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"      <PointData>\n");CHKERRQ(ierr);
1438*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1439*c4762a1bSJed Brown       for (i=0; i<nn; i+=dof) {
1440*c4762a1bSJed Brown         ierr = PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0);CHKERRQ(ierr);
1441*c4762a1bSJed Brown       }
1442*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");CHKERRQ(ierr);
1443*c4762a1bSJed Brown 
1444*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");CHKERRQ(ierr);
1445*c4762a1bSJed Brown       for (i=0; i<nn; i+=dof) {
1446*c4762a1bSJed Brown         ierr = PetscViewerASCIIPrintf(viewer,"%D\n",r);CHKERRQ(ierr);
1447*c4762a1bSJed Brown       }
1448*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"        </DataArray>\n");CHKERRQ(ierr);
1449*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"      </PointData>\n");CHKERRQ(ierr);
1450*c4762a1bSJed Brown 
1451*c4762a1bSJed Brown       ierr = PetscViewerASCIIPrintf(viewer,"    </Piece>\n");CHKERRQ(ierr);
1452*c4762a1bSJed Brown     }
1453*c4762a1bSJed Brown     ierr = PetscFree(array);CHKERRQ(ierr);
1454*c4762a1bSJed Brown   } else {
1455*c4762a1bSJed Brown     ierr = MPI_Send(range,6,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
1456*c4762a1bSJed Brown     ierr = MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm);CHKERRQ(ierr);
1457*c4762a1bSJed Brown   }
1458*c4762a1bSJed Brown   ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr);
1459*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,"  </StructuredGrid>\n");CHKERRQ(ierr);
1460*c4762a1bSJed Brown   ierr = PetscViewerASCIIPrintf(viewer,"</VTKFile>\n");CHKERRQ(ierr);
1461*c4762a1bSJed Brown   ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1462*c4762a1bSJed Brown   PetscFunctionReturn(0);
1463*c4762a1bSJed Brown }
1464*c4762a1bSJed Brown 
1465*c4762a1bSJed Brown int main(int argc,char *argv[])
1466*c4762a1bSJed Brown {
1467*c4762a1bSJed Brown   MPI_Comm       comm;
1468*c4762a1bSJed Brown   THI            thi;
1469*c4762a1bSJed Brown   PetscErrorCode ierr;
1470*c4762a1bSJed Brown   DM             da;
1471*c4762a1bSJed Brown   SNES           snes;
1472*c4762a1bSJed Brown 
1473*c4762a1bSJed Brown   ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr;
1474*c4762a1bSJed Brown   comm = PETSC_COMM_WORLD;
1475*c4762a1bSJed Brown 
1476*c4762a1bSJed Brown   ierr = THICreate(comm,&thi);CHKERRQ(ierr);
1477*c4762a1bSJed Brown   {
1478*c4762a1bSJed Brown     PetscInt M = 3,N = 3,P = 2;
1479*c4762a1bSJed Brown     ierr = PetscOptionsBegin(comm,NULL,"Grid resolution options","");CHKERRQ(ierr);
1480*c4762a1bSJed Brown     {
1481*c4762a1bSJed Brown       ierr = PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);CHKERRQ(ierr);
1482*c4762a1bSJed Brown       N    = M;
1483*c4762a1bSJed Brown       ierr = PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);CHKERRQ(ierr);
1484*c4762a1bSJed Brown       if (thi->coarse2d) {
1485*c4762a1bSJed Brown         ierr = PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL);CHKERRQ(ierr);
1486*c4762a1bSJed Brown       } else {
1487*c4762a1bSJed Brown         ierr = PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);CHKERRQ(ierr);
1488*c4762a1bSJed Brown       }
1489*c4762a1bSJed Brown     }
1490*c4762a1bSJed Brown     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1491*c4762a1bSJed Brown     if (thi->coarse2d) {
1492*c4762a1bSJed Brown       ierr = DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da);CHKERRQ(ierr);
1493*c4762a1bSJed Brown       ierr = DMSetFromOptions(da);CHKERRQ(ierr);
1494*c4762a1bSJed Brown       ierr = DMSetUp(da);CHKERRQ(ierr);
1495*c4762a1bSJed Brown       da->ops->refinehierarchy     = DMRefineHierarchy_THI;
1496*c4762a1bSJed Brown       da->ops->createinterpolation = DMCreateInterpolation_DA_THI;
1497*c4762a1bSJed Brown 
1498*c4762a1bSJed Brown       ierr = PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi);CHKERRQ(ierr);
1499*c4762a1bSJed Brown     } else {
1500*c4762a1bSJed Brown       ierr = DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);CHKERRQ(ierr);
1501*c4762a1bSJed Brown       ierr = DMSetFromOptions(da);CHKERRQ(ierr);
1502*c4762a1bSJed Brown       ierr = DMSetUp(da);CHKERRQ(ierr);
1503*c4762a1bSJed Brown     }
1504*c4762a1bSJed Brown     ierr = DMDASetFieldName(da,0,"x-velocity");CHKERRQ(ierr);
1505*c4762a1bSJed Brown     ierr = DMDASetFieldName(da,1,"y-velocity");CHKERRQ(ierr);
1506*c4762a1bSJed Brown   }
1507*c4762a1bSJed Brown   ierr = THISetUpDM(thi,da);CHKERRQ(ierr);
1508*c4762a1bSJed Brown   if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal;
1509*c4762a1bSJed Brown 
1510*c4762a1bSJed Brown   {                             /* Set the fine level matrix type if -da_refine */
1511*c4762a1bSJed Brown     PetscInt rlevel,clevel;
1512*c4762a1bSJed Brown     ierr = DMGetRefineLevel(da,&rlevel);CHKERRQ(ierr);
1513*c4762a1bSJed Brown     ierr = DMGetCoarsenLevel(da,&clevel);CHKERRQ(ierr);
1514*c4762a1bSJed Brown     if (rlevel - clevel > 0) {ierr = DMSetMatType(da,thi->mattype);CHKERRQ(ierr);}
1515*c4762a1bSJed Brown   }
1516*c4762a1bSJed Brown 
1517*c4762a1bSJed Brown   ierr = DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi);CHKERRQ(ierr);
1518*c4762a1bSJed Brown   if (thi->tridiagonal) {
1519*c4762a1bSJed Brown     ierr = DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi);CHKERRQ(ierr);
1520*c4762a1bSJed Brown   } else {
1521*c4762a1bSJed Brown     ierr = DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);CHKERRQ(ierr);
1522*c4762a1bSJed Brown   }
1523*c4762a1bSJed Brown   ierr = DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr);
1524*c4762a1bSJed Brown   ierr = DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi);CHKERRQ(ierr);
1525*c4762a1bSJed Brown 
1526*c4762a1bSJed Brown   ierr = DMSetApplicationContext(da,thi);CHKERRQ(ierr);
1527*c4762a1bSJed Brown 
1528*c4762a1bSJed Brown   ierr = SNESCreate(comm,&snes);CHKERRQ(ierr);
1529*c4762a1bSJed Brown   ierr = SNESSetDM(snes,da);CHKERRQ(ierr);
1530*c4762a1bSJed Brown   ierr = DMDestroy(&da);CHKERRQ(ierr);
1531*c4762a1bSJed Brown   ierr = SNESSetComputeInitialGuess(snes,THIInitial,NULL);CHKERRQ(ierr);
1532*c4762a1bSJed Brown   ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
1533*c4762a1bSJed Brown 
1534*c4762a1bSJed Brown   ierr = SNESSolve(snes,NULL,NULL);CHKERRQ(ierr);
1535*c4762a1bSJed Brown 
1536*c4762a1bSJed Brown   ierr = THISolveStatistics(thi,snes,0,"Full");CHKERRQ(ierr);
1537*c4762a1bSJed Brown 
1538*c4762a1bSJed Brown   {
1539*c4762a1bSJed Brown     PetscBool flg;
1540*c4762a1bSJed Brown     char      filename[PETSC_MAX_PATH_LEN] = "";
1541*c4762a1bSJed Brown     ierr = PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg);CHKERRQ(ierr);
1542*c4762a1bSJed Brown     if (flg) {
1543*c4762a1bSJed Brown       Vec X;
1544*c4762a1bSJed Brown       DM  dm;
1545*c4762a1bSJed Brown       ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr);
1546*c4762a1bSJed Brown       ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr);
1547*c4762a1bSJed Brown       ierr = THIDAVecView_VTK_XML(thi,dm,X,filename);CHKERRQ(ierr);
1548*c4762a1bSJed Brown     }
1549*c4762a1bSJed Brown   }
1550*c4762a1bSJed Brown 
1551*c4762a1bSJed Brown   ierr = DMDestroy(&da);CHKERRQ(ierr);
1552*c4762a1bSJed Brown   ierr = SNESDestroy(&snes);CHKERRQ(ierr);
1553*c4762a1bSJed Brown   ierr = THIDestroy(&thi);CHKERRQ(ierr);
1554*c4762a1bSJed Brown   ierr = PetscFinalize();
1555*c4762a1bSJed Brown   return ierr;
1556*c4762a1bSJed Brown }
1557*c4762a1bSJed Brown 
1558*c4762a1bSJed Brown 
1559*c4762a1bSJed Brown /*TEST
1560*c4762a1bSJed Brown 
1561*c4762a1bSJed Brown    build:
1562*c4762a1bSJed Brown       requires: c99 !single
1563*c4762a1bSJed Brown 
1564*c4762a1bSJed Brown    test:
1565*c4762a1bSJed Brown       args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc
1566*c4762a1bSJed Brown 
1567*c4762a1bSJed Brown    test:
1568*c4762a1bSJed Brown       suffix: 2
1569*c4762a1bSJed Brown       nsize: 2
1570*c4762a1bSJed Brown       args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1
1571*c4762a1bSJed Brown 
1572*c4762a1bSJed Brown    test:
1573*c4762a1bSJed Brown       suffix: 3
1574*c4762a1bSJed Brown       nsize: 3
1575*c4762a1bSJed Brown       args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current
1576*c4762a1bSJed Brown 
1577*c4762a1bSJed Brown    test:
1578*c4762a1bSJed Brown       suffix: 4
1579*c4762a1bSJed Brown       nsize: 6
1580*c4762a1bSJed Brown       args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1
1581*c4762a1bSJed Brown 
1582*c4762a1bSJed Brown    test:
1583*c4762a1bSJed Brown       suffix: 5
1584*c4762a1bSJed Brown       nsize: 6
1585*c4762a1bSJed Brown       args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}}
1586*c4762a1bSJed Brown 
1587*c4762a1bSJed Brown TEST*/
1588