xref: /petsc/src/ts/tutorials/ex14.c (revision 6a98f8dc3f2c9149905a87dc2e9d0fedaf64e09a)
1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2 \n\
3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4 using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5 to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
6 boundary conditions in the x- and y-directions.\n\
7 \n\
8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10 \n\
11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12 \n\n";
13 
14 /*
15 The equations for horizontal velocity (u,v) are
16 
17   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19 
20 where
21 
22   eta = B/2 (epsilon + gamma)^((p-2)/2)
23 
24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25 written in terms of the second invariant
26 
27   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28 
29 The surface boundary conditions are the natural conditions.  The basal boundary conditions
30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31 
32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33 
34 The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
35 map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
36 the Jacobian of the coordinate transformation from a reference element to the physical element.
37 
38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39 specially so that columns are never distributed, and are always contiguous in memory.
40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41 and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
42 use compatible domain decomposition relative to the 3D DMDAs.
43 
44 */
45 
46 #include <petscts.h>
47 #include <petscdm.h>
48 #include <petscdmda.h>
49 #include <petscdmcomposite.h>
50 #include <ctype.h>              /* toupper() */
51 #include <petsc/private/petscimpl.h>
52 
53 #if defined __SSE2__
54 #  include <emmintrin.h>
55 #endif
56 
57 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
58 #define USE_SSE2_KERNELS (!defined NO_SSE2                              \
59                           && !defined PETSC_USE_COMPLEX                 \
60                           && !defined PETSC_USE_REAL_SINGLE           \
61                           && defined __SSE2__)
62 
63 #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
64 #  if defined __cplusplus       /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
65 #    define restrict
66 #  else
67 #    define restrict PETSC_RESTRICT
68 #  endif
69 #endif
70 
71 static PetscClassId THI_CLASSID;
72 
73 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
74 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
75 static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
76 static const PetscReal HexQNodes[]    = {-0.57735026918962573, 0.57735026918962573};
77 #define G 0.57735026918962573
78 #define H (0.5*(1.+G))
79 #define L (0.5*(1.-G))
80 #define M (-0.5)
81 #define P (0.5)
82 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
83 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
84                                                    {0,H,0,0,0,L,0,0},
85                                                    {0,0,H,0,0,0,L,0},
86                                                    {0,0,0,H,0,0,0,L},
87                                                    {L,0,0,0,H,0,0,0},
88                                                    {0,L,0,0,0,H,0,0},
89                                                    {0,0,L,0,0,0,H,0},
90                                                    {0,0,0,L,0,0,0,H}};
91 static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
92   {{M*H,M*H,M},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  ,{M*L,M*L,P},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  },
93   {{M*H,0,0}  ,{P*H,M*H,M},{0,P*H,0}  ,{0,0,0}    ,{M*L,0,0}  ,{P*L,M*L,P},{0,P*L,0}  ,{0,0,0}    },
94   {{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,M},{M*H,0,0}  ,{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,P},{M*L,0,0}  },
95   {{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,M},{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,P}},
96   {{M*L,M*L,M},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  ,{M*H,M*H,P},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  },
97   {{M*L,0,0}  ,{P*L,M*L,M},{0,P*L,0}  ,{0,0,0}    ,{M*H,0,0}  ,{P*H,M*H,P},{0,P*H,0}  ,{0,0,0}    },
98   {{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,M},{M*L,0,0}  ,{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,P},{M*H,0,0}  },
99   {{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,M},{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,P}}};
100 /* Stanndard Gauss */
101 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
102                                                  {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
103                                                  {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
104                                                  {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
105                                                  {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
106                                                  {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
107                                                  {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
108                                                  {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
109 static const PetscReal HexQDeriv_Gauss[8][8][3] = {
110   {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
111   {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
112   {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
113   {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
114   {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
115   {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
116   {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
117   {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
118 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
119 /* Standard 2x2 Gauss quadrature for the bottom layer. */
120 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
121                                             {L*H,H*H,H*L,L*L},
122                                             {L*L,H*L,H*H,L*H},
123                                             {H*L,L*L,L*H,H*H}};
124 static const PetscReal QuadQDeriv[4][4][2] = {
125   {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
126   {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
127   {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
128   {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
129 #undef G
130 #undef H
131 #undef L
132 #undef M
133 #undef P
134 
135 #define HexExtract(x,i,j,k,n) do {              \
136     (n)[0] = (x)[i][j][k];                      \
137     (n)[1] = (x)[i+1][j][k];                    \
138     (n)[2] = (x)[i+1][j+1][k];                  \
139     (n)[3] = (x)[i][j+1][k];                    \
140     (n)[4] = (x)[i][j][k+1];                    \
141     (n)[5] = (x)[i+1][j][k+1];                  \
142     (n)[6] = (x)[i+1][j+1][k+1];                \
143     (n)[7] = (x)[i][j+1][k+1];                  \
144   } while (0)
145 
146 #define HexExtractRef(x,i,j,k,n) do {           \
147     (n)[0] = &(x)[i][j][k];                     \
148     (n)[1] = &(x)[i+1][j][k];                   \
149     (n)[2] = &(x)[i+1][j+1][k];                 \
150     (n)[3] = &(x)[i][j+1][k];                   \
151     (n)[4] = &(x)[i][j][k+1];                   \
152     (n)[5] = &(x)[i+1][j][k+1];                 \
153     (n)[6] = &(x)[i+1][j+1][k+1];               \
154     (n)[7] = &(x)[i][j+1][k+1];                 \
155   } while (0)
156 
157 #define QuadExtract(x,i,j,n) do {               \
158     (n)[0] = (x)[i][j];                         \
159     (n)[1] = (x)[i+1][j];                       \
160     (n)[2] = (x)[i+1][j+1];                     \
161     (n)[3] = (x)[i][j+1];                       \
162   } while (0)
163 
164 static PetscScalar Sqr(PetscScalar a) {return a*a;}
165 
166 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
167 {
168   PetscInt i;
169   dz[0] = dz[1] = dz[2] = 0;
170   for (i=0; i<8; i++) {
171     dz[0] += dphi[i][0] * zn[i];
172     dz[1] += dphi[i][1] * zn[i];
173     dz[2] += dphi[i][2] * zn[i];
174   }
175 }
176 
177 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[restrict],PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscReal *restrict jw)
178 {
179   const PetscReal
180     jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}
181   ,ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}
182   ,jdet = jac[0][0]*jac[1][1]*jac[2][2];
183   PetscInt i;
184 
185   for (i=0; i<8; i++) {
186     const PetscReal *dphir = HexQDeriv[q][i];
187     phi[i] = HexQInterp[q][i];
188     dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
189     dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
190     dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
191   }
192   *jw = 1.0 * jdet;
193 }
194 
195 typedef struct _p_THI   *THI;
196 typedef struct _n_Units *Units;
197 
198 typedef struct {
199   PetscScalar u,v;
200 } Node;
201 
202 typedef struct {
203   PetscScalar b;                /* bed */
204   PetscScalar h;                /* thickness */
205   PetscScalar beta2;            /* friction */
206 } PrmNode;
207 
208 #define FieldSize(ntype) ((PetscInt)(sizeof(ntype)/sizeof(PetscScalar)))
209 #define FieldOffset(ntype,member) ((PetscInt)(offsetof(ntype,member)/sizeof(PetscScalar)))
210 #define FieldIndex(ntype,i,member) ((PetscInt)((i)*FieldSize(ntype) + FieldOffset(ntype,member)))
211 #define NODE_SIZE FieldSize(Node)
212 #define PRMNODE_SIZE FieldSize(PrmNode)
213 
214 typedef struct {
215   PetscReal min,max,cmin,cmax;
216 } PRange;
217 
218 struct _p_THI {
219   PETSCHEADER(int);
220   void      (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
221   PetscInt  nlevels;
222   PetscInt  zlevels;
223   PetscReal Lx,Ly,Lz;           /* Model domain */
224   PetscReal alpha;              /* Bed angle */
225   Units     units;
226   PetscReal dirichlet_scale;
227   PetscReal ssa_friction_scale;
228   PetscReal inertia;
229   PRange    eta;
230   PRange    beta2;
231   struct {
232     PetscReal Bd2,eps,exponent,glen_n;
233   } viscosity;
234   struct {
235     PetscReal irefgam,eps2,exponent;
236   } friction;
237   struct {
238     PetscReal rate,exponent,refvel;
239   } erosion;
240   PetscReal rhog;
241   PetscBool no_slip;
242   PetscBool verbose;
243   char      *mattype;
244   char      *monitor_basename;
245   PetscInt  monitor_interval;
246 };
247 
248 struct _n_Units {
249   /* fundamental */
250   PetscReal meter;
251   PetscReal kilogram;
252   PetscReal second;
253   /* derived */
254   PetscReal Pascal;
255   PetscReal year;
256 };
257 
258 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
259 {
260   const PetscScalar zm1 = zm-1,
261     znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
262               pn[1].b + pn[1].h*(PetscScalar)k/zm1,
263               pn[2].b + pn[2].h*(PetscScalar)k/zm1,
264               pn[3].b + pn[3].h*(PetscScalar)k/zm1,
265               pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
266               pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
267               pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
268               pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
269   PetscInt i;
270   for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
271 }
272 
273 /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
274 static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2],PetscReal hx,PetscReal hy,const PrmNode pn[4],PrmNode dp[4][2])
275 {
276   PetscErrorCode ierr;
277   PetscInt       q,i,f;
278   const PetscScalar (*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
279   PetscScalar (*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;
280 
281   PetscFunctionBeginUser;
282   ierr = PetscArrayzero(dpg,4);CHKERRQ(ierr);
283   for (q=0; q<4; q++) {
284     for (i=0; i<4; i++) {
285       for (f=0; f<PRMNODE_SIZE; f++) {
286         dpg[q][0][f] += dphi[q][i][0]/hx * pg[i][f];
287         dpg[q][1][f] += dphi[q][i][1]/hy * pg[i][f];
288       }
289     }
290   }
291   PetscFunctionReturn(0);
292 }
293 
294 static inline PetscReal StaggeredMidpoint2D(PetscScalar a,PetscScalar b,PetscScalar c,PetscScalar d)
295 {return 0.5*PetscRealPart(0.75*a + 0.75*b + 0.25*c + 0.25*d);}
296 static inline PetscReal UpwindFlux1D(PetscReal u,PetscReal hL,PetscReal hR)
297 {return (u > 0) ? hL*u : hR*u;}
298 
299 #define UpwindFluxXW(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i-1][j][k].u, x3[i-1][j+dj][k].u,x3[i][k+dj][k].u), \
300                                                     PetscRealPart(0.75*x2[i-1][j  ].h+0.25*x2[i-1][j+dj].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h))
301 #define UpwindFluxXE(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i+1][j][k].u, x3[i+1][j+dj][k].u,x3[i][k+dj][k].u), \
302                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h), PetscRealPart(0.75*x2[i+1][j  ].h+0.25*x2[i+1][j+dj].h))
303 #define UpwindFluxYS(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j-1][k].v, x3[i+di][j-1][k].v,x3[i+di][j][k].v), \
304                                                     PetscRealPart(0.75*x2[i  ][j-1].h+0.25*x2[i+di][j-1].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h))
305 #define UpwindFluxYN(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j+1][k].v, x3[i+di][j+1][k].v,x3[i+di][j][k].v), \
306                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h), PetscRealPart(0.75*x2[i  ][j+1].h+0.25*x2[i+di][j+1].h))
307 
308 static void PrmNodeGetFaceMeasure(const PrmNode **p,PetscInt i,PetscInt j,PetscScalar h[])
309 {
310   /* West */
311   h[0] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j-1].h,p[i][j-1].h);
312   h[1] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j+1].h,p[i][j+1].h);
313   /* East */
314   h[2] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j+1].h,p[i][j+1].h);
315   h[3] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j-1].h,p[i][j-1].h);
316   /* South */
317   h[4] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i+1][j-1].h,p[i+1][j].h);
318   h[5] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i-1][j-1].h,p[i-1][j].h);
319   /* North */
320   h[6] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i-1][j+1].h,p[i-1][j].h);
321   h[7] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i+1][j+1].h,p[i+1][j].h);
322 }
323 
324 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
325 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
326 {
327   Units units = thi->units;
328   PetscReal s = -x*PetscSinReal(thi->alpha);
329   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
330   p->h = s - p->b;
331   p->beta2 = -1e-10;             /* This value is not used, but it should not be huge because that would change the finite difference step size  */
332 }
333 
334 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
335 {
336   Units units = thi->units;
337   PetscReal s = -x*PetscSinReal(thi->alpha);
338   p->b = s - 1000*units->meter;
339   p->h = s - p->b;
340   /* tau_b = beta2 v   is a stress (Pa).
341    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
342   p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
343 }
344 
345 /* These are just toys */
346 
347 /* From Fred Herman */
348 static void THIInitialize_HOM_F(THI thi,PetscReal x,PetscReal y,PrmNode *p)
349 {
350   Units units = thi->units;
351   PetscReal s = -x*PetscSinReal(thi->alpha);
352   p->b = s - 1000*units->meter + 100*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx);/* * sin(y*2*PETSC_PI/thi->Ly); */
353   p->h = s - p->b;
354   p->h = (1-(atan((x-thi->Lx/2)/1.)+PETSC_PI/2.)/PETSC_PI)*500*units->meter+1*units->meter;
355   s = PetscRealPart(p->b + p->h);
356   p->beta2 = -1e-10;
357   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
358 }
359 
360 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
361 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
362 {
363   Units units = thi->units;
364   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
365   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
366   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
367   p->h = s - p->b;
368   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
369 }
370 
371 /* Like Z, but with 200 meter cliffs */
372 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
373 {
374   Units units = thi->units;
375   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
376   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
377   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
378   if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
379   p->h = s - p->b;
380   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
381 }
382 
383 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
384 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
385 {
386   Units units = thi->units;
387   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
388   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
389   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
390   p->h = s - p->b;
391   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
392 }
393 
394 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
395 {
396   if (thi->friction.irefgam == 0) {
397     Units units = thi->units;
398     thi->friction.irefgam = 1./(0.5*PetscSqr(100 * units->meter / units->year));
399     thi->friction.eps2 = 0.5*PetscSqr(1.e-4 / thi->friction.irefgam);
400   }
401   if (thi->friction.exponent == 0) {
402     *beta2  = rbeta2;
403     *dbeta2 = 0;
404   } else {
405     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
406     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
407   }
408 }
409 
410 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
411 {
412   PetscReal Bd2,eps,exponent;
413   if (thi->viscosity.Bd2 == 0) {
414     Units units = thi->units;
415     const PetscReal
416       n = thi->viscosity.glen_n,                        /* Glen exponent */
417       p = 1. + 1./n,                                    /* for Stokes */
418       A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
419       B = PetscPowReal(A,-1./n);                                 /* hardness parameter */
420     thi->viscosity.Bd2      = B/2;
421     thi->viscosity.exponent = (p-2)/2;
422     thi->viscosity.eps      = 0.5*PetscSqr(1e-5 / units->year);
423   }
424   Bd2      = thi->viscosity.Bd2;
425   exponent = thi->viscosity.exponent;
426   eps      = thi->viscosity.eps;
427   *eta     = Bd2 * PetscPowReal(eps + gam,exponent);
428   *deta    = exponent * (*eta) / (eps + gam);
429 }
430 
431 static void THIErosion(THI thi,const Node *vel,PetscScalar *erate,Node *derate)
432 {
433   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel),
434                     rate    = -thi->erosion.rate*PetscPowScalar(magref2, 0.5*thi->erosion.exponent);
435   if (erate) *erate = rate;
436   if (derate) {
437     if (thi->erosion.exponent == 1) {
438       derate->u = 0;
439       derate->v = 0;
440     } else {
441       derate->u = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
442       derate->v = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
443     }
444   }
445 }
446 
447 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
448 {
449   if (x < *min) *min = x;
450   if (x > *max) *max = x;
451 }
452 
453 static void PRangeClear(PRange *p)
454 {
455   p->cmin = p->min = 1e100;
456   p->cmax = p->max = -1e100;
457 }
458 
459 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
460 {
461 
462   PetscFunctionBeginUser;
463   p->cmin = min;
464   p->cmax = max;
465   if (min < p->min) p->min = min;
466   if (max > p->max) p->max = max;
467   PetscFunctionReturn(0);
468 }
469 
470 static PetscErrorCode THIDestroy(THI *thi)
471 {
472   PetscErrorCode ierr;
473 
474   PetscFunctionBeginUser;
475   if (--((PetscObject)(*thi))->refct > 0) PetscFunctionReturn(0);
476   ierr = PetscFree((*thi)->units);CHKERRQ(ierr);
477   ierr = PetscFree((*thi)->mattype);CHKERRQ(ierr);
478   ierr = PetscFree((*thi)->monitor_basename);CHKERRQ(ierr);
479   ierr = PetscHeaderDestroy(thi);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
484 {
485   static PetscBool registered = PETSC_FALSE;
486   THI              thi;
487   Units            units;
488   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
489   PetscErrorCode   ierr;
490 
491   PetscFunctionBeginUser;
492   *inthi = 0;
493   if (!registered) {
494     ierr = PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);CHKERRQ(ierr);
495     registered = PETSC_TRUE;
496   }
497   ierr = PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","THI",comm,THIDestroy,0);CHKERRQ(ierr);
498 
499   ierr = PetscNew(&thi->units);CHKERRQ(ierr);
500 
501   units           = thi->units;
502   units->meter    = 1e-2;
503   units->second   = 1e-7;
504   units->kilogram = 1e-12;
505 
506   ierr = PetscOptionsBegin(comm,NULL,"Scaled units options","");CHKERRQ(ierr);
507   {
508     ierr = PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);CHKERRQ(ierr);
509     ierr = PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);CHKERRQ(ierr);
510     ierr = PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);CHKERRQ(ierr);
511   }
512   ierr = PetscOptionsEnd();CHKERRQ(ierr);
513   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
514   units->year   = 31556926. * units->second, /* seconds per year */
515 
516   thi->Lx              = 10.e3;
517   thi->Ly              = 10.e3;
518   thi->Lz              = 1000;
519   thi->nlevels         = 1;
520   thi->dirichlet_scale = 1;
521   thi->verbose         = PETSC_FALSE;
522 
523   thi->viscosity.glen_n = 3.;
524   thi->erosion.rate     = 1e-3; /* m/a */
525   thi->erosion.exponent = 1.;
526   thi->erosion.refvel   = 1.;   /* m/a */
527 
528   ierr = PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");CHKERRQ(ierr);
529   {
530     QuadratureType quad       = QUAD_GAUSS;
531     char           homexp[]   = "A";
532     char           mtype[256] = MATSBAIJ;
533     PetscReal      L,m = 1.0;
534     PetscBool      flg;
535     L    = thi->Lx;
536     ierr = PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);CHKERRQ(ierr);
537     if (flg) thi->Lx = thi->Ly = L;
538     ierr = PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);CHKERRQ(ierr);
539     ierr = PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);CHKERRQ(ierr);
540     ierr = PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);CHKERRQ(ierr);
541     ierr = PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);CHKERRQ(ierr);
542     switch (homexp[0] = toupper(homexp[0])) {
543     case 'A':
544       thi->initialize = THIInitialize_HOM_A;
545       thi->no_slip    = PETSC_TRUE;
546       thi->alpha      = 0.5;
547       break;
548     case 'C':
549       thi->initialize = THIInitialize_HOM_C;
550       thi->no_slip    = PETSC_FALSE;
551       thi->alpha      = 0.1;
552       break;
553     case 'F':
554       thi->initialize = THIInitialize_HOM_F;
555       thi->no_slip    = PETSC_FALSE;
556       thi->alpha      = 0.5;
557       break;
558     case 'X':
559       thi->initialize = THIInitialize_HOM_X;
560       thi->no_slip    = PETSC_FALSE;
561       thi->alpha      = 0.3;
562       break;
563     case 'Y':
564       thi->initialize = THIInitialize_HOM_Y;
565       thi->no_slip    = PETSC_FALSE;
566       thi->alpha      = 0.5;
567       break;
568     case 'Z':
569       thi->initialize = THIInitialize_HOM_Z;
570       thi->no_slip    = PETSC_FALSE;
571       thi->alpha      = 0.5;
572       break;
573     default:
574       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
575     }
576     ierr = PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);CHKERRQ(ierr);
577     switch (quad) {
578     case QUAD_GAUSS:
579       HexQInterp = HexQInterp_Gauss;
580       HexQDeriv  = HexQDeriv_Gauss;
581       break;
582     case QUAD_LOBATTO:
583       HexQInterp = HexQInterp_Lobatto;
584       HexQDeriv  = HexQDeriv_Lobatto;
585       break;
586     }
587     ierr = PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);CHKERRQ(ierr);
588     ierr = PetscOptionsReal("-thi_viscosity_glen_n","Exponent in Glen flow law, 1=linear, infty=ideal plastic",NULL,thi->viscosity.glen_n,&thi->viscosity.glen_n,NULL);CHKERRQ(ierr);
589     ierr = PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);CHKERRQ(ierr);
590     thi->friction.exponent = (m-1)/2;
591     ierr = PetscOptionsReal("-thi_erosion_rate","Rate of erosion relative to sliding velocity at reference velocity (m/a)",NULL,thi->erosion.rate,&thi->erosion.rate,NULL);CHKERRQ(ierr);
592     ierr = PetscOptionsReal("-thi_erosion_exponent","Power of sliding velocity appearing in erosion relation",NULL,thi->erosion.exponent,&thi->erosion.exponent,NULL);CHKERRQ(ierr);
593     ierr = PetscOptionsReal("-thi_erosion_refvel","Reference sliding velocity for erosion (m/a)",NULL,thi->erosion.refvel,&thi->erosion.refvel,NULL);CHKERRQ(ierr);
594     thi->erosion.rate   *= units->meter / units->year;
595     thi->erosion.refvel *= units->meter / units->year;
596     ierr = PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);CHKERRQ(ierr);
597     ierr = PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);CHKERRQ(ierr);
598     ierr = PetscOptionsReal("-thi_inertia","Coefficient of accelaration term in velocity system, physical is almost zero",NULL,thi->inertia,&thi->inertia,NULL);CHKERRQ(ierr);
599     ierr = PetscOptionsInt("-thi_nlevels","Number of levels of refinement","",thi->nlevels,&thi->nlevels,NULL);CHKERRQ(ierr);
600     ierr = PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);CHKERRQ(ierr);
601     ierr = PetscStrallocpy(mtype,&thi->mattype);CHKERRQ(ierr);
602     ierr = PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);CHKERRQ(ierr);
603     ierr = PetscOptionsString("-thi_monitor","Basename to write state files to",NULL,monitor_basename,monitor_basename,sizeof(monitor_basename),&flg);CHKERRQ(ierr);
604     if (flg) {
605       ierr = PetscStrallocpy(monitor_basename,&thi->monitor_basename);CHKERRQ(ierr);
606       thi->monitor_interval = 1;
607       ierr = PetscOptionsInt("-thi_monitor_interval","Frequency at which to write state files",NULL,thi->monitor_interval,&thi->monitor_interval,NULL);CHKERRQ(ierr);
608     }
609   }
610   ierr = PetscOptionsEnd();CHKERRQ(ierr);
611 
612   /* dimensionalize */
613   thi->Lx    *= units->meter;
614   thi->Ly    *= units->meter;
615   thi->Lz    *= units->meter;
616   thi->alpha *= PETSC_PI / 180;
617 
618   PRangeClear(&thi->eta);
619   PRangeClear(&thi->beta2);
620 
621   {
622     PetscReal u       = 1000*units->meter/(3e7*units->second),
623               gradu   = u / (100*units->meter),eta,deta,
624               rho     = 910 * units->kilogram/PetscPowRealInt(units->meter,3),
625               grav    = 9.81 * units->meter/PetscSqr(units->second),
626               driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
627     THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
628     thi->rhog = rho * grav;
629     if (thi->verbose) {
630       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n",units->meter,units->second,units->kilogram,units->Pascal);CHKERRQ(ierr);
631       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",thi->Lx,thi->Ly,thi->Lz,rho*grav*1e3*units->meter,driving);CHKERRQ(ierr);
632       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",u,gradu,eta,2*eta*gradu,2*eta*gradu/driving);CHKERRQ(ierr);
633       THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
634       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",1e-3*u,1e-3*gradu,eta,2*eta*1e-3*gradu,2*eta*1e-3*gradu/driving);CHKERRQ(ierr);
635     }
636   }
637 
638   *inthi = thi;
639   PetscFunctionReturn(0);
640 }
641 
642 /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
643  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
644  * the horizontal. */
645 static PetscErrorCode THIFixGhosts(THI thi,DM da3,DM da2,Vec X3,Vec X2)
646 {
647   PetscErrorCode ierr;
648   DMDALocalInfo  info;
649   PrmNode        **x2;
650   PetscInt       i,j;
651 
652   PetscFunctionBeginUser;
653   ierr = DMDAGetLocalInfo(da3,&info);CHKERRQ(ierr);
654   /* ierr = VecView(X2,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
655   ierr = DMDAVecGetArray(da2,X2,&x2);CHKERRQ(ierr);
656   for (i=info.gzs; i<info.gzs+info.gzm; i++) {
657     if (i > -1 && i < info.mz) continue;
658     for (j=info.gys; j<info.gys+info.gym; j++) {
659       x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i<0 ? 1.0 : -1.0);
660     }
661   }
662   ierr = DMDAVecRestoreArray(da2,X2,&x2);CHKERRQ(ierr);
663   /* ierr = VecView(X2,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
664   PetscFunctionReturn(0);
665 }
666 
667 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,PrmNode **p)
668 {
669   PetscInt       i,j,xs,xm,ys,ym,mx,my;
670   PetscErrorCode ierr;
671 
672   PetscFunctionBeginUser;
673   ierr = DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);CHKERRQ(ierr);
674   ierr = DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
675   for (i=xs; i<xs+xm; i++) {
676     for (j=ys; j<ys+ym; j++) {
677       PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
678       thi->initialize(thi,xx,yy,&p[i][j]);
679     }
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 static PetscErrorCode THIInitial(THI thi,DM pack,Vec X)
685 {
686   DM             da3,da2;
687   PetscInt       i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
688   PetscReal      hx,hy;
689   PrmNode        **prm;
690   Node           ***x;
691   Vec            X3g,X2g,X2;
692   PetscErrorCode ierr;
693 
694   PetscFunctionBeginUser;
695   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
696   ierr = DMCompositeGetAccess(pack,X,&X3g,&X2g);CHKERRQ(ierr);
697   ierr = DMGetLocalVector(da2,&X2);CHKERRQ(ierr);
698 
699   ierr = DMDAGetInfo(da3,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
700   ierr = DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
701   ierr = DMDAVecGetArray(da3,X3g,&x);CHKERRQ(ierr);
702   ierr = DMDAVecGetArray(da2,X2,&prm);CHKERRQ(ierr);
703 
704   ierr = THIInitializePrm(thi,da2,prm);CHKERRQ(ierr);
705 
706   hx = thi->Lx / mx;
707   hy = thi->Ly / my;
708   for (i=xs; i<xs+xm; i++) {
709     for (j=ys; j<ys+ym; j++) {
710       for (k=zs; k<zs+zm; k++) {
711         const PetscScalar zm1      = zm-1,
712                           drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
713                           drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
714         x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
715         x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
716       }
717     }
718   }
719 
720   ierr = DMDAVecRestoreArray(da3,X3g,&x);CHKERRQ(ierr);
721   ierr = DMDAVecRestoreArray(da2,X2,&prm);CHKERRQ(ierr);
722 
723   ierr = DMLocalToGlobalBegin(da2,X2,INSERT_VALUES,X2g);CHKERRQ(ierr);
724   ierr = DMLocalToGlobalEnd  (da2,X2,INSERT_VALUES,X2g);CHKERRQ(ierr);
725   ierr = DMRestoreLocalVector(da2,&X2);CHKERRQ(ierr);
726 
727   ierr = DMCompositeRestoreAccess(pack,X,&X3g,&X2g);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 static void PointwiseNonlinearity(THI thi,const Node n[restrict 8],const PetscReal phi[restrict 3],PetscReal dphi[restrict 8][3],PetscScalar *restrict u,PetscScalar *restrict v,PetscScalar du[restrict 3],PetscScalar dv[restrict 3],PetscReal *eta,PetscReal *deta)
732 {
733   PetscInt    l,ll;
734   PetscScalar gam;
735 
736   du[0] = du[1] = du[2] = 0;
737   dv[0] = dv[1] = dv[2] = 0;
738   *u    = 0;
739   *v    = 0;
740   for (l=0; l<8; l++) {
741     *u += phi[l] * n[l].u;
742     *v += phi[l] * n[l].v;
743     for (ll=0; ll<3; ll++) {
744       du[ll] += dphi[l][ll] * n[l].u;
745       dv[ll] += dphi[l][ll] * n[l].v;
746     }
747   }
748   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]) + 0.25*Sqr(du[2]) + 0.25*Sqr(dv[2]);
749   THIViscosity(thi,PetscRealPart(gam),eta,deta);
750 }
751 
752 static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const Node ***xdot,Node ***f,THI thi)
753 {
754   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l;
755   PetscReal      hx,hy,etamin,etamax,beta2min,beta2max;
756   PetscErrorCode ierr;
757 
758   PetscFunctionBeginUser;
759   xs = info->zs;
760   ys = info->ys;
761   xm = info->zm;
762   ym = info->ym;
763   zm = info->xm;
764   hx = thi->Lx / info->mz;
765   hy = thi->Ly / info->my;
766 
767   etamin   = 1e100;
768   etamax   = 0;
769   beta2min = 1e100;
770   beta2max = 0;
771 
772   for (i=xs; i<xs+xm; i++) {
773     for (j=ys; j<ys+ym; j++) {
774       PrmNode pn[4],dpn[4][2];
775       QuadExtract(prm,i,j,pn);
776       ierr = QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn);CHKERRQ(ierr);
777       for (k=0; k<zm-1; k++) {
778         PetscInt  ls = 0;
779         Node      n[8],ndot[8],*fn[8];
780         PetscReal zn[8],etabase = 0;
781         PrmHexGetZ(pn,k,zm,zn);
782         HexExtract(x,i,j,k,n);
783         HexExtract(xdot,i,j,k,ndot);CHKERRQ(ierr);
784         HexExtractRef(f,i,j,k,fn);
785         if (thi->no_slip && k == 0) {
786           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
787           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
788           ls = 4;
789         }
790         for (q=0; q<8; q++) {
791           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
792           PetscScalar du[3],dv[3],u,v,udot=0,vdot=0;
793           for (l=ls; l<8; l++) {
794             udot += HexQInterp[q][l]*ndot[l].u;
795             vdot += HexQInterp[q][l]*ndot[l].v;
796           }
797           HexGrad(HexQDeriv[q],zn,dz);
798           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
799           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
800           jw /= thi->rhog;      /* scales residuals to be O(1) */
801           if (q == 0) etabase = eta;
802           RangeUpdate(&etamin,&etamax,eta);
803           for (l=ls; l<8; l++) { /* test functions */
804             const PetscScalar ds[2] = {dpn[q%4][0].h+dpn[q%4][0].b, dpn[q%4][1].h+dpn[q%4][1].b};
805             const PetscReal   pp    = phi[l],*dp = dphi[l];
806             fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
807             fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
808             fn[l]->u += pp*jw*udot*thi->inertia*pp;
809             fn[l]->v += pp*jw*vdot*thi->inertia*pp;
810           }
811         }
812         if (k == 0) { /* we are on a bottom face */
813           if (thi->no_slip) {
814             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
815             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
816             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
817             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
818             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
819             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
820             * assembled matrix (see the similar block in THIJacobianLocal).
821             *
822             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
823             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
824             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
825             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
826             */
827             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1.);
828             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
829             fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
830             fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
831           } else {              /* Integrate over bottom face to apply boundary condition */
832             for (q=0; q<4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
833               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
834               PetscScalar     u  =0,v=0,rbeta2=0;
835               PetscReal       beta2,dbeta2;
836               for (l=0; l<4; l++) {
837                 u     += phi[l]*n[l].u;
838                 v     += phi[l]*n[l].v;
839                 rbeta2 += phi[l]*pn[l].beta2;
840               }
841               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
842               RangeUpdate(&beta2min,&beta2max,beta2);
843               for (l=0; l<4; l++) {
844                 const PetscReal pp = phi[l];
845                 fn[ls+l]->u += pp*jw*beta2*u;
846                 fn[ls+l]->v += pp*jw*beta2*v;
847               }
848             }
849           }
850         }
851       }
852     }
853   }
854 
855   ierr = PRangeMinMax(&thi->eta,etamin,etamax);CHKERRQ(ierr);
856   ierr = PRangeMinMax(&thi->beta2,beta2min,beta2max);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const PrmNode **prmdot,PrmNode **f,THI thi)
861 {
862   PetscInt xs,ys,xm,ym,zm,i,j,k;
863 
864   PetscFunctionBeginUser;
865   xs = info->zs;
866   ys = info->ys;
867   xm = info->zm;
868   ym = info->ym;
869   zm = info->xm;
870 
871   for (i=xs; i<xs+xm; i++) {
872     for (j=ys; j<ys+ym; j++) {
873       PetscScalar div = 0,erate,h[8];
874       PrmNodeGetFaceMeasure(prm,i,j,h);
875       for (k=0; k<zm; k++) {
876         PetscScalar weight = (k==0 || k == zm-1) ? 0.5/(zm-1) : 1.0/(zm-1);
877         if (0) {                /* centered flux */
878           div += (- weight*h[0] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j-1][k].u,x[i][j-1][k].u)
879                   - weight*h[1] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j+1][k].u,x[i][j+1][k].u)
880                   + weight*h[2] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j+1][k].u,x[i][j+1][k].u)
881                   + weight*h[3] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j-1][k].u,x[i][j-1][k].u)
882                   - weight*h[4] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i+1][j-1][k].v,x[i+1][j][k].v)
883                   - weight*h[5] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i-1][j-1][k].v,x[i-1][j][k].v)
884                   + weight*h[6] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i-1][j+1][k].v,x[i-1][j][k].v)
885                   + weight*h[7] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i+1][j+1][k].v,x[i+1][j][k].v));
886         } else {                /* Upwind flux */
887           div += weight*(-UpwindFluxXW(x,prm,h,i,j,k, 1)
888                          -UpwindFluxXW(x,prm,h,i,j,k,-1)
889                          +UpwindFluxXE(x,prm,h,i,j,k, 1)
890                          +UpwindFluxXE(x,prm,h,i,j,k,-1)
891                          -UpwindFluxYS(x,prm,h,i,j,k, 1)
892                          -UpwindFluxYS(x,prm,h,i,j,k,-1)
893                          +UpwindFluxYN(x,prm,h,i,j,k, 1)
894                          +UpwindFluxYN(x,prm,h,i,j,k,-1));
895         }
896       }
897       /* printf("div[%d][%d] %g\n",i,j,div); */
898       THIErosion(thi,&x[i][j][0],&erate,NULL);
899       f[i][j].b     = prmdot[i][j].b - erate;
900       f[i][j].h     = prmdot[i][j].h + div;
901       f[i][j].beta2 = prmdot[i][j].beta2;
902     }
903   }
904   PetscFunctionReturn(0);
905 }
906 
907 static PetscErrorCode THIFunction(TS ts,PetscReal t,Vec X,Vec Xdot,Vec F,void *ctx)
908 {
909   PetscErrorCode ierr;
910   THI            thi = (THI)ctx;
911   DM             pack,da3,da2;
912   Vec            X3,X2,Xdot3,Xdot2,F3,F2,F3g,F2g;
913   const Node     ***x3,***xdot3;
914   const PrmNode  **x2,**xdot2;
915   Node           ***f3;
916   PrmNode        **f2;
917   DMDALocalInfo  info3;
918 
919   PetscFunctionBeginUser;
920   ierr = TSGetDM(ts,&pack);CHKERRQ(ierr);
921   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
922   ierr = DMDAGetLocalInfo(da3,&info3);CHKERRQ(ierr);
923   ierr = DMCompositeGetLocalVectors(pack,&X3,&X2);CHKERRQ(ierr);
924   ierr = DMCompositeGetLocalVectors(pack,&Xdot3,&Xdot2);CHKERRQ(ierr);
925   ierr = DMCompositeScatter(pack,X,X3,X2);CHKERRQ(ierr);
926   ierr = THIFixGhosts(thi,da3,da2,X3,X2);CHKERRQ(ierr);
927   ierr = DMCompositeScatter(pack,Xdot,Xdot3,Xdot2);CHKERRQ(ierr);
928 
929   ierr = DMGetLocalVector(da3,&F3);CHKERRQ(ierr);
930   ierr = DMGetLocalVector(da2,&F2);CHKERRQ(ierr);
931   ierr = VecZeroEntries(F3);CHKERRQ(ierr);
932 
933   ierr = DMDAVecGetArray(da3,X3,&x3);CHKERRQ(ierr);
934   ierr = DMDAVecGetArray(da2,X2,&x2);CHKERRQ(ierr);
935   ierr = DMDAVecGetArray(da3,Xdot3,&xdot3);CHKERRQ(ierr);
936   ierr = DMDAVecGetArray(da2,Xdot2,&xdot2);CHKERRQ(ierr);
937   ierr = DMDAVecGetArray(da3,F3,&f3);CHKERRQ(ierr);
938   ierr = DMDAVecGetArray(da2,F2,&f2);CHKERRQ(ierr);
939 
940   ierr = THIFunctionLocal_3D(&info3,x3,x2,xdot3,f3,thi);CHKERRQ(ierr);
941   ierr = THIFunctionLocal_2D(&info3,x3,x2,xdot2,f2,thi);CHKERRQ(ierr);
942 
943   ierr = DMDAVecRestoreArray(da3,X3,&x3);CHKERRQ(ierr);
944   ierr = DMDAVecRestoreArray(da2,X2,&x2);CHKERRQ(ierr);
945   ierr = DMDAVecRestoreArray(da3,Xdot3,&xdot3);CHKERRQ(ierr);
946   ierr = DMDAVecRestoreArray(da2,Xdot2,&xdot2);CHKERRQ(ierr);
947   ierr = DMDAVecRestoreArray(da3,F3,&f3);CHKERRQ(ierr);
948   ierr = DMDAVecRestoreArray(da2,F2,&f2);CHKERRQ(ierr);
949 
950   ierr = DMCompositeRestoreLocalVectors(pack,&X3,&X2);CHKERRQ(ierr);
951   ierr = DMCompositeRestoreLocalVectors(pack,&Xdot3,&Xdot2);CHKERRQ(ierr);
952 
953   ierr = VecZeroEntries(F);CHKERRQ(ierr);
954   ierr = DMCompositeGetAccess(pack,F,&F3g,&F2g);CHKERRQ(ierr);
955   ierr = DMLocalToGlobalBegin(da3,F3,ADD_VALUES,F3g);CHKERRQ(ierr);
956   ierr = DMLocalToGlobalEnd  (da3,F3,ADD_VALUES,F3g);CHKERRQ(ierr);
957   ierr = DMLocalToGlobalBegin(da2,F2,INSERT_VALUES,F2g);CHKERRQ(ierr);
958   ierr = DMLocalToGlobalEnd  (da2,F2,INSERT_VALUES,F2g);CHKERRQ(ierr);
959 
960   if (thi->verbose) {
961     PetscViewer viewer;
962     ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi),&viewer);CHKERRQ(ierr);
963     ierr = PetscViewerASCIIPrintf(viewer,"3D_Velocity residual (bs=2):\n");CHKERRQ(ierr);
964     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
965     ierr = VecView(F3,viewer);CHKERRQ(ierr);
966     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
967     ierr = PetscViewerASCIIPrintf(viewer,"2D_Fields residual (bs=3):\n");CHKERRQ(ierr);
968     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
969     ierr = VecView(F2,viewer);CHKERRQ(ierr);
970     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
971   }
972 
973   ierr = DMCompositeRestoreAccess(pack,F,&F3g,&F2g);CHKERRQ(ierr);
974 
975   ierr = DMRestoreLocalVector(da3,&F3);CHKERRQ(ierr);
976   ierr = DMRestoreLocalVector(da2,&F2);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
981 {
982   PetscErrorCode ierr;
983   PetscReal      nrm;
984   PetscInt       m;
985   PetscMPIInt    rank;
986 
987   PetscFunctionBeginUser;
988   ierr = MatNorm(B,NORM_FROBENIUS,&nrm);CHKERRQ(ierr);
989   ierr = MatGetSize(B,&m,0);CHKERRQ(ierr);
990   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank);CHKERRQ(ierr);
991   if (!rank) {
992     PetscScalar val0,val2;
993     ierr = MatGetValue(B,0,0,&val0);CHKERRQ(ierr);
994     ierr = MatGetValue(B,2,2,&val2);CHKERRQ(ierr);
995     ierr = PetscViewerASCIIPrintf(viewer,"Matrix dim %8d  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n",m,nrm,PetscRealPart(val0),PetscRealPart(val2),thi->eta.cmin,thi->eta.cmax,thi->beta2.cmin,thi->beta2.cmax);CHKERRQ(ierr);
996   }
997   PetscFunctionReturn(0);
998 }
999 
1000 static PetscErrorCode THISurfaceStatistics(DM pack,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
1001 {
1002   PetscErrorCode ierr;
1003   DM             da3,da2;
1004   Vec            X3,X2;
1005   Node           ***x;
1006   PetscInt       i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
1007   PetscReal      umin = 1e100,umax=-1e100;
1008   PetscScalar    usum =0.0,gusum;
1009 
1010   PetscFunctionBeginUser;
1011   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
1012   ierr = DMCompositeGetAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1013   *min = *max = *mean = 0;
1014   ierr = DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1015   ierr = DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
1016   if (zs != 0 || zm != mz) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Unexpected decomposition");
1017   ierr = DMDAVecGetArray(da3,X3,&x);CHKERRQ(ierr);
1018   for (i=xs; i<xs+xm; i++) {
1019     for (j=ys; j<ys+ym; j++) {
1020       PetscReal u = PetscRealPart(x[i][j][zm-1].u);
1021       RangeUpdate(&umin,&umax,u);
1022       usum += u;
1023     }
1024   }
1025   ierr = DMDAVecRestoreArray(da3,X3,&x);CHKERRQ(ierr);
1026   ierr = DMCompositeRestoreAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1027 
1028   ierr  = MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da3));CHKERRQ(ierr);
1029   ierr  = MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da3));CHKERRQ(ierr);
1030   ierr  = MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da3));CHKERRQ(ierr);
1031   *mean = PetscRealPart(gusum) / (mx*my);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 static PetscErrorCode THISolveStatistics(THI thi,TS ts,PetscInt coarsened,const char name[])
1036 {
1037   MPI_Comm       comm;
1038   DM             pack;
1039   Vec            X,X3,X2;
1040   PetscErrorCode ierr;
1041 
1042   PetscFunctionBeginUser;
1043   ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
1044   ierr = TSGetDM(ts,&pack);CHKERRQ(ierr);
1045   ierr = TSGetSolution(ts,&X);CHKERRQ(ierr);
1046   ierr = DMCompositeGetAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1047   ierr = PetscPrintf(comm,"Solution statistics after solve: %s\n",name);CHKERRQ(ierr);
1048   {
1049     PetscInt            its,lits;
1050     SNESConvergedReason reason;
1051     SNES                snes;
1052     ierr = TSGetSNES(ts,&snes);CHKERRQ(ierr);
1053     ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
1054     ierr = SNESGetConvergedReason(snes,&reason);CHKERRQ(ierr);
1055     ierr = SNESGetLinearSolveIterations(snes,&lits);CHKERRQ(ierr);
1056     ierr = PetscPrintf(comm,"%s: Number of SNES iterations = %d, total linear iterations = %d\n",SNESConvergedReasons[reason],its,lits);CHKERRQ(ierr);
1057   }
1058   {
1059     PetscReal   nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
1060     PetscInt    i,j,m;
1061     PetscScalar *x;
1062     ierr = VecNorm(X3,NORM_2,&nrm2);CHKERRQ(ierr);
1063     ierr = VecGetLocalSize(X3,&m);CHKERRQ(ierr);
1064     ierr = VecGetArray(X3,&x);CHKERRQ(ierr);
1065     for (i=0; i<m; i+=2) {
1066       PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
1067       tmin[0] = PetscMin(u,tmin[0]);
1068       tmin[1] = PetscMin(v,tmin[1]);
1069       tmin[2] = PetscMin(c,tmin[2]);
1070       tmax[0] = PetscMax(u,tmax[0]);
1071       tmax[1] = PetscMax(v,tmax[1]);
1072       tmax[2] = PetscMax(c,tmax[2]);
1073     }
1074     ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
1075     ierr = MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi));CHKERRQ(ierr);
1076     ierr = MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi));CHKERRQ(ierr);
1077     /* Dimensionalize to meters/year */
1078     nrm2 *= thi->units->year / thi->units->meter;
1079     for (j=0; j<3; j++) {
1080       min[j] *= thi->units->year / thi->units->meter;
1081       max[j] *= thi->units->year / thi->units->meter;
1082     }
1083     ierr = PetscPrintf(comm,"|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n",nrm2,min[0],max[0],min[1],max[1],min[2],max[2]);CHKERRQ(ierr);
1084     {
1085       PetscReal umin,umax,umean;
1086       ierr   = THISurfaceStatistics(pack,X,&umin,&umax,&umean);CHKERRQ(ierr);
1087       umin  *= thi->units->year / thi->units->meter;
1088       umax  *= thi->units->year / thi->units->meter;
1089       umean *= thi->units->year / thi->units->meter;
1090       ierr   = PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",umin,umax,umean);CHKERRQ(ierr);
1091     }
1092     /* These values stay nondimensional */
1093     ierr = PetscPrintf(comm,"Global eta range   [%g, %g], converged range [%g, %g]\n",thi->eta.min,thi->eta.max,thi->eta.cmin,thi->eta.cmax);CHKERRQ(ierr);
1094     ierr = PetscPrintf(comm,"Global beta2 range [%g, %g], converged range [%g, %g]\n",thi->beta2.min,thi->beta2.max,thi->beta2.cmin,thi->beta2.cmax);CHKERRQ(ierr);
1095   }
1096   ierr = PetscPrintf(comm,"\n");CHKERRQ(ierr);
1097   ierr = DMCompositeRestoreAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info,PetscInt i,PetscInt j,PetscInt k)
1102 {return ((i-info->gzs)*info->gym + (j-info->gys))*info->gxm + (k-info->gxs);}
1103 static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info,PetscInt i,PetscInt j)
1104 {return (i-info->gzs)*info->gym + (j-info->gys);}
1105 
1106 static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,Mat B,Mat Bcpl,THI thi)
1107 {
1108   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1109   PetscReal      hx,hy;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBeginUser;
1113   xs = info->zs;
1114   ys = info->ys;
1115   xm = info->zm;
1116   ym = info->ym;
1117   zm = info->xm;
1118   hx = thi->Lx / info->mz;
1119   hy = thi->Ly / info->my;
1120 
1121   for (i=xs; i<xs+xm; i++) {
1122     for (j=ys; j<ys+ym; j++) {
1123       PrmNode pn[4],dpn[4][2];
1124       QuadExtract(prm,i,j,pn);
1125       ierr = QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn);CHKERRQ(ierr);
1126       for (k=0; k<zm-1; k++) {
1127         Node        n[8];
1128         PetscReal   zn[8],etabase = 0;
1129         PetscScalar Ke[8*NODE_SIZE][8*NODE_SIZE],Kcpl[8*NODE_SIZE][4*PRMNODE_SIZE];
1130         PetscInt    ls = 0;
1131 
1132         PrmHexGetZ(pn,k,zm,zn);
1133         HexExtract(x,i,j,k,n);
1134         ierr = PetscMemzero(Ke,sizeof(Ke));CHKERRQ(ierr);
1135         ierr = PetscMemzero(Kcpl,sizeof(Kcpl));CHKERRQ(ierr);
1136         if (thi->no_slip && k == 0) {
1137           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1138           ls = 4;
1139         }
1140         for (q=0; q<8; q++) {
1141           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
1142           PetscScalar du[3],dv[3],u,v;
1143           HexGrad(HexQDeriv[q],zn,dz);
1144           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1145           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1146           jw /= thi->rhog;      /* residuals are scaled by this factor */
1147           if (q == 0) etabase = eta;
1148           for (l=ls; l<8; l++) { /* test functions */
1149             const PetscReal pp=phi[l],*restrict dp = dphi[l];
1150             for (ll=ls; ll<8; ll++) { /* trial functions */
1151               const PetscReal *restrict dpl = dphi[ll];
1152               PetscScalar dgdu,dgdv;
1153               dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1154               dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1155               /* Picard part */
1156               Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1157               Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1158               Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1159               Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1160               /* extra Newton terms */
1161               Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1162               Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1163               Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1164               Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1165               /* inertial part */
1166               Ke[l*2+0][ll*2+0] += pp*jw*thi->inertia*pp;
1167               Ke[l*2+1][ll*2+1] += pp*jw*thi->inertia*pp;
1168             }
1169             for (ll=0; ll<4; ll++) { /* Trial functions for surface/bed */
1170               const PetscReal dpl[] = {QuadQDeriv[q%4][ll][0]/hx, QuadQDeriv[q%4][ll][1]/hy}; /* surface = h + b */
1171               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[0];
1172               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[0];
1173               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[1];
1174               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[1];
1175             }
1176           }
1177         }
1178         if (k == 0) { /* on a bottom face */
1179           if (thi->no_slip) {
1180             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1);
1181             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1182             Ke[0][0] = thi->dirichlet_scale*diagu;
1183             Ke[0][1] = 0;
1184             Ke[1][0] = 0;
1185             Ke[1][1] = thi->dirichlet_scale*diagv;
1186           } else {
1187             for (q=0; q<4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1188               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
1189               PetscScalar     u  =0,v=0,rbeta2=0;
1190               PetscReal       beta2,dbeta2;
1191               for (l=0; l<4; l++) {
1192                 u      += phi[l]*n[l].u;
1193                 v      += phi[l]*n[l].v;
1194                 rbeta2 += phi[l]*pn[l].beta2;
1195               }
1196               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1197               for (l=0; l<4; l++) {
1198                 const PetscReal pp = phi[l];
1199                 for (ll=0; ll<4; ll++) {
1200                   const PetscReal ppl = phi[ll];
1201                   Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1202                   Ke[l*2+0][ll*2+1] +=                   pp*jw*dbeta2*u*v*ppl;
1203                   Ke[l*2+1][ll*2+0] +=                   pp*jw*dbeta2*v*u*ppl;
1204                   Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1205                 }
1206               }
1207             }
1208           }
1209         }
1210         {
1211           const PetscInt rc3blocked[8] = {
1212             DMDALocalIndex3D(info,i+0,j+0,k+0),
1213             DMDALocalIndex3D(info,i+1,j+0,k+0),
1214             DMDALocalIndex3D(info,i+1,j+1,k+0),
1215             DMDALocalIndex3D(info,i+0,j+1,k+0),
1216             DMDALocalIndex3D(info,i+0,j+0,k+1),
1217             DMDALocalIndex3D(info,i+1,j+0,k+1),
1218             DMDALocalIndex3D(info,i+1,j+1,k+1),
1219             DMDALocalIndex3D(info,i+0,j+1,k+1)
1220           },col2blocked[PRMNODE_SIZE*4] = {
1221             DMDALocalIndex2D(info,i+0,j+0),
1222             DMDALocalIndex2D(info,i+1,j+0),
1223             DMDALocalIndex2D(info,i+1,j+1),
1224             DMDALocalIndex2D(info,i+0,j+1)
1225           };
1226 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1227           for (l=0; l<8; l++) {
1228             for (ll=l+1; ll<8; ll++) {
1229               Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1230               Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1231               Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1232               Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1233             }
1234           }
1235 #endif
1236           ierr = MatSetValuesBlockedLocal(B,8,rc3blocked,8,rc3blocked,&Ke[0][0],ADD_VALUES);CHKERRQ(ierr); /* velocity-velocity coupling can use blocked insertion */
1237           {                     /* The off-diagonal part cannot (yet) */
1238             PetscInt row3scalar[NODE_SIZE*8],col2scalar[PRMNODE_SIZE*4];
1239             for (l=0; l<8; l++) for (ll=0; ll<NODE_SIZE; ll++) row3scalar[l*NODE_SIZE+ll] = rc3blocked[l]*NODE_SIZE+ll;
1240             for (l=0; l<4; l++) for (ll=0; ll<PRMNODE_SIZE; ll++) col2scalar[l*PRMNODE_SIZE+ll] = col2blocked[l]*PRMNODE_SIZE+ll;
1241             ierr = MatSetValuesLocal(Bcpl,8*NODE_SIZE,row3scalar,4*PRMNODE_SIZE,col2scalar,&Kcpl[0][0],ADD_VALUES);CHKERRQ(ierr);
1242           }
1243         }
1244       }
1245     }
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,const Node ***x3,const PrmNode **x2,const PrmNode **xdot2,PetscReal a,Mat B22,Mat B21,THI thi)
1251 {
1252   PetscErrorCode ierr;
1253   PetscInt       xs,ys,xm,ym,zm,i,j,k;
1254 
1255   PetscFunctionBeginUser;
1256   xs = info->zs;
1257   ys = info->ys;
1258   xm = info->zm;
1259   ym = info->ym;
1260   zm = info->xm;
1261 
1262   if (zm > 1024) SETERRQ(((PetscObject)info->da)->comm,PETSC_ERR_SUP,"Need to allocate more space");
1263   for (i=xs; i<xs+xm; i++) {
1264     for (j=ys; j<ys+ym; j++) {
1265       {                         /* Self-coupling */
1266         const PetscInt    row[]  = {DMDALocalIndex2D(info,i,j)};
1267         const PetscInt    col[]  = {DMDALocalIndex2D(info,i,j)};
1268         const PetscScalar vals[] = {
1269           a,0,0,
1270           0,a,0,
1271           0,0,a
1272         };
1273         ierr = MatSetValuesBlockedLocal(B22,1,row,1,col,vals,INSERT_VALUES);CHKERRQ(ierr);
1274       }
1275       for (k=0; k<zm; k++) {    /* Coupling to velocity problem */
1276         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1277         const PetscInt row[]  = {FieldIndex(PrmNode,DMDALocalIndex2D(info,i,j),h)};
1278         const PetscInt cols[] = {
1279           FieldIndex(Node,DMDALocalIndex3D(info,i-1,j,k),u),
1280           FieldIndex(Node,DMDALocalIndex3D(info,i  ,j,k),u),
1281           FieldIndex(Node,DMDALocalIndex3D(info,i+1,j,k),u),
1282           FieldIndex(Node,DMDALocalIndex3D(info,i,j-1,k),v),
1283           FieldIndex(Node,DMDALocalIndex3D(info,i,j  ,k),v),
1284           FieldIndex(Node,DMDALocalIndex3D(info,i,j+1,k),v)
1285         };
1286         const PetscScalar
1287           w  = (k && k<zm-1) ? 0.5 : 0.25,
1288           hW = w*(x2[i-1][j  ].h+x2[i  ][j  ].h)/(zm-1.),
1289           hE = w*(x2[i  ][j  ].h+x2[i+1][j  ].h)/(zm-1.),
1290           hS = w*(x2[i  ][j-1].h+x2[i  ][j  ].h)/(zm-1.),
1291           hN = w*(x2[i  ][j  ].h+x2[i  ][j+1].h)/(zm-1.);
1292         PetscScalar *vals,
1293                      vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0),
1294                                       ((PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW),
1295                                       ((PetscRealPart(x3[i][j][k].u) > 0) ?  0  : +hE),
1296                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0),
1297                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS),
1298                                       ((PetscRealPart(x3[i][j][k].v) > 0) ?  0  : +hN)},
1299                      vals_centered[] = {-0.5*hW, 0.5*(-hW+hE), 0.5*hE,
1300                                         -0.5*hS, 0.5*(-hS+hN), 0.5*hN};
1301         vals = 1 ? vals_upwind : vals_centered;
1302         if (k == 0) {
1303           Node derate;
1304           THIErosion(thi,&x3[i][j][0],NULL,&derate);
1305           vals[1] -= derate.u;
1306           vals[4] -= derate.v;
1307         }
1308         ierr = MatSetValuesLocal(B21,1,row,6,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1309       }
1310     }
1311   }
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 static PetscErrorCode THIJacobian(TS ts,PetscReal t,Vec X,Vec Xdot,PetscReal a,Mat A,Mat B,void *ctx)
1316 {
1317   PetscErrorCode ierr;
1318   THI            thi = (THI)ctx;
1319   DM             pack,da3,da2;
1320   Vec            X3,X2,Xdot2;
1321   Mat            B11,B12,B21,B22;
1322   DMDALocalInfo  info3;
1323   IS             *isloc;
1324   const Node     ***x3;
1325   const PrmNode  **x2,**xdot2;
1326 
1327   PetscFunctionBeginUser;
1328   ierr = TSGetDM(ts,&pack);CHKERRQ(ierr);
1329   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
1330   ierr = DMDAGetLocalInfo(da3,&info3);CHKERRQ(ierr);
1331   ierr = DMCompositeGetLocalVectors(pack,&X3,&X2);CHKERRQ(ierr);
1332   ierr = DMCompositeGetLocalVectors(pack,NULL,&Xdot2);CHKERRQ(ierr);
1333   ierr = DMCompositeScatter(pack,X,X3,X2);CHKERRQ(ierr);
1334   ierr = THIFixGhosts(thi,da3,da2,X3,X2);CHKERRQ(ierr);
1335   ierr = DMCompositeScatter(pack,Xdot,NULL,Xdot2);CHKERRQ(ierr);
1336 
1337   ierr = MatZeroEntries(B);CHKERRQ(ierr);
1338 
1339   ierr = DMCompositeGetLocalISs(pack,&isloc);CHKERRQ(ierr);
1340   ierr = MatGetLocalSubMatrix(B,isloc[0],isloc[0],&B11);CHKERRQ(ierr);
1341   ierr = MatGetLocalSubMatrix(B,isloc[0],isloc[1],&B12);CHKERRQ(ierr);
1342   ierr = MatGetLocalSubMatrix(B,isloc[1],isloc[0],&B21);CHKERRQ(ierr);
1343   ierr = MatGetLocalSubMatrix(B,isloc[1],isloc[1],&B22);CHKERRQ(ierr);
1344 
1345   ierr = DMDAVecGetArray(da3,X3,&x3);CHKERRQ(ierr);
1346   ierr = DMDAVecGetArray(da2,X2,&x2);CHKERRQ(ierr);
1347   ierr = DMDAVecGetArray(da2,Xdot2,&xdot2);CHKERRQ(ierr);
1348 
1349   ierr = THIJacobianLocal_Momentum(&info3,x3,x2,B11,B12,thi);CHKERRQ(ierr);
1350 
1351   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1352   ierr = MatAssemblyBegin(B,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
1353   ierr = MatAssemblyEnd(B,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
1354 
1355   ierr = THIJacobianLocal_2D(&info3,x3,x2,xdot2,a,B22,B21,thi);CHKERRQ(ierr);
1356 
1357   ierr = DMDAVecRestoreArray(da3,X3,&x3);CHKERRQ(ierr);
1358   ierr = DMDAVecRestoreArray(da2,X2,&x2);CHKERRQ(ierr);
1359   ierr = DMDAVecRestoreArray(da2,Xdot2,&xdot2);CHKERRQ(ierr);
1360 
1361   ierr = MatRestoreLocalSubMatrix(B,isloc[0],isloc[0],&B11);CHKERRQ(ierr);
1362   ierr = MatRestoreLocalSubMatrix(B,isloc[0],isloc[1],&B12);CHKERRQ(ierr);
1363   ierr = MatRestoreLocalSubMatrix(B,isloc[1],isloc[0],&B21);CHKERRQ(ierr);
1364   ierr = MatRestoreLocalSubMatrix(B,isloc[1],isloc[1],&B22);CHKERRQ(ierr);
1365   ierr = ISDestroy(&isloc[0]);CHKERRQ(ierr);
1366   ierr = ISDestroy(&isloc[1]);CHKERRQ(ierr);
1367   ierr = PetscFree(isloc);CHKERRQ(ierr);
1368 
1369   ierr = DMCompositeRestoreLocalVectors(pack,&X3,&X2);CHKERRQ(ierr);
1370   ierr = DMCompositeRestoreLocalVectors(pack,0,&Xdot2);CHKERRQ(ierr);
1371 
1372   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1373   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374   if (A != B) {
1375     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377   }
1378   if (thi->verbose) {ierr = THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1383  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1384  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1385  */
1386 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM pack,Vec X,const char filename[],const char filename2[])
1387 {
1388   const PetscInt dof   = NODE_SIZE,dof2 = PRMNODE_SIZE;
1389   Units          units = thi->units;
1390   MPI_Comm       comm;
1391   PetscErrorCode ierr;
1392   PetscViewer    viewer3,viewer2;
1393   PetscMPIInt    rank,size,tag,nn,nmax,nn2,nmax2;
1394   PetscInt       mx,my,mz,r,range[6];
1395   PetscScalar    *x,*x2;
1396   DM             da3,da2;
1397   Vec            X3,X2;
1398 
1399   PetscFunctionBeginUser;
1400   ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
1401   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
1402   ierr = DMCompositeGetAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1403   ierr = DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1404   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1405   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1406   ierr = PetscViewerASCIIOpen(comm,filename,&viewer3);CHKERRQ(ierr);
1407   ierr = PetscViewerASCIIOpen(comm,filename2,&viewer2);CHKERRQ(ierr);
1408   ierr = PetscViewerASCIIPrintf(viewer3,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");CHKERRQ(ierr);
1409   ierr = PetscViewerASCIIPrintf(viewer2,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");CHKERRQ(ierr);
1410   ierr = PetscViewerASCIIPrintf(viewer3,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,mz-1,0,my-1,0,mx-1);CHKERRQ(ierr);
1411   ierr = PetscViewerASCIIPrintf(viewer2,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,0,0,my-1,0,mx-1);CHKERRQ(ierr);
1412 
1413   ierr = DMDAGetCorners(da3,range,range+1,range+2,range+3,range+4,range+5);CHKERRQ(ierr);
1414   ierr = PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn);CHKERRQ(ierr);
1415   ierr = MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1416   ierr = PetscMPIIntCast(range[4]*range[5]*dof2,&nn2);CHKERRQ(ierr);
1417   ierr = MPI_Reduce(&nn2,&nmax2,1,MPI_INT,MPI_MAX,0,comm);CHKERRQ(ierr);
1418   tag  = ((PetscObject)viewer3)->tag;
1419   ierr = VecGetArrayRead(X3,(const PetscScalar**)&x);CHKERRQ(ierr);
1420   ierr = VecGetArrayRead(X2,(const PetscScalar**)&x2);CHKERRQ(ierr);
1421   if (!rank) {
1422     PetscScalar *array,*array2;
1423     ierr = PetscMalloc2(nmax,&array,nmax2,&array2);CHKERRQ(ierr);
1424     for (r=0; r<size; r++) {
1425       PetscInt    i,j,k,f,xs,xm,ys,ym,zs,zm;
1426       Node        *y3;
1427       PetscScalar (*y2)[PRMNODE_SIZE];
1428       MPI_Status status;
1429 
1430       if (r) {
1431         ierr = MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr);
1432       }
1433       zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1434       if (xm*ym*zm*dof > nmax) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen");
1435       if (r) {
1436         ierr = MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);CHKERRQ(ierr);
1437         ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn);CHKERRQ(ierr);
1438         if (nn != xm*ym*zm*dof) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"corrupt da3 send");
1439         y3   = (Node*)array;
1440         ierr = MPI_Recv(array2,nmax2,MPIU_SCALAR,r,tag,comm,&status);CHKERRQ(ierr);
1441         ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn2);CHKERRQ(ierr);
1442         if (nn2 != xm*ym*dof2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"corrupt da2 send");
1443         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1444       } else {
1445         y3 = (Node*)x;
1446         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1447       }
1448       ierr = PetscViewerASCIIPrintf(viewer3,"    <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);CHKERRQ(ierr);
1449       ierr = PetscViewerASCIIPrintf(viewer2,"    <Piece Extent=\"%d %d %D %D %D %D\">\n",0,0,ys,ys+ym-1,xs,xs+xm-1);CHKERRQ(ierr);
1450 
1451       ierr = PetscViewerASCIIPrintf(viewer3,"      <Points>\n");CHKERRQ(ierr);
1452       ierr = PetscViewerASCIIPrintf(viewer2,"      <Points>\n");CHKERRQ(ierr);
1453       ierr = PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1454       ierr = PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1455       for (i=xs; i<xs+xm; i++) {
1456         for (j=ys; j<ys+ym; j++) {
1457           PetscReal
1458             xx = thi->Lx*i/mx,
1459             yy = thi->Ly*j/my,
1460             b  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,b)]),
1461             h  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,h)]);
1462           for (k=zs; k<zs+zm; k++) {
1463             PetscReal zz = b + h*k/(mz-1.);
1464             ierr = PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",xx,yy,zz);CHKERRQ(ierr);
1465           }
1466           ierr = PetscViewerASCIIPrintf(viewer2,"%f %f %f\n",xx,yy,(double)0.0);CHKERRQ(ierr);
1467         }
1468       }
1469       ierr = PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");CHKERRQ(ierr);
1470       ierr = PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n");CHKERRQ(ierr);
1471       ierr = PetscViewerASCIIPrintf(viewer3,"      </Points>\n");CHKERRQ(ierr);
1472       ierr = PetscViewerASCIIPrintf(viewer2,"      </Points>\n");CHKERRQ(ierr);
1473 
1474       {                         /* Velocity and rank (3D) */
1475         ierr = PetscViewerASCIIPrintf(viewer3,"      <PointData>\n");CHKERRQ(ierr);
1476         ierr = PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1477         for (i=0; i<nn/dof; i++) {
1478           ierr = PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",PetscRealPart(y3[i].u)*units->year/units->meter,PetscRealPart(y3[i].v)*units->year/units->meter,0.0);CHKERRQ(ierr);
1479         }
1480         ierr = PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");CHKERRQ(ierr);
1481 
1482         ierr = PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");CHKERRQ(ierr);
1483         for (i=0; i<nn; i+=dof) {
1484           ierr = PetscViewerASCIIPrintf(viewer3,"%D\n",r);CHKERRQ(ierr);
1485         }
1486         ierr = PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");CHKERRQ(ierr);
1487         ierr = PetscViewerASCIIPrintf(viewer3,"      </PointData>\n");CHKERRQ(ierr);
1488       }
1489 
1490       {                         /* 2D */
1491         ierr = PetscViewerASCIIPrintf(viewer2,"      <PointData>\n");CHKERRQ(ierr);
1492         for (f=0; f<PRMNODE_SIZE; f++) {
1493           const char *fieldname;
1494           ierr = DMDAGetFieldName(da2,f,&fieldname);CHKERRQ(ierr);
1495           ierr = PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n",fieldname);CHKERRQ(ierr);
1496           for (i=0; i<nn2/PRMNODE_SIZE; i++) {
1497             ierr = PetscViewerASCIIPrintf(viewer2,"%g\n",y2[i][f]);CHKERRQ(ierr);
1498           }
1499           ierr = PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n");CHKERRQ(ierr);
1500         }
1501         ierr = PetscViewerASCIIPrintf(viewer2,"      </PointData>\n");CHKERRQ(ierr);
1502       }
1503 
1504       ierr = PetscViewerASCIIPrintf(viewer3,"    </Piece>\n");CHKERRQ(ierr);
1505       ierr = PetscViewerASCIIPrintf(viewer2,"    </Piece>\n");CHKERRQ(ierr);
1506     }
1507     ierr = PetscFree2(array,array2);CHKERRQ(ierr);
1508   } else {
1509     ierr = MPI_Send(range,6,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
1510     ierr = MPI_Send(x,nn,MPIU_SCALAR,0,tag,comm);CHKERRQ(ierr);
1511     ierr = MPI_Send(x2,nn2,MPIU_SCALAR,0,tag,comm);CHKERRQ(ierr);
1512   }
1513   ierr = VecRestoreArrayRead(X3,(const PetscScalar**)&x);CHKERRQ(ierr);
1514   ierr = VecRestoreArrayRead(X2,(const PetscScalar**)&x2);CHKERRQ(ierr);
1515   ierr = PetscViewerASCIIPrintf(viewer3,"  </StructuredGrid>\n");CHKERRQ(ierr);
1516   ierr = PetscViewerASCIIPrintf(viewer2,"  </StructuredGrid>\n");CHKERRQ(ierr);
1517 
1518   ierr = DMCompositeRestoreAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1519   ierr = PetscViewerASCIIPrintf(viewer3,"</VTKFile>\n");CHKERRQ(ierr);
1520   ierr = PetscViewerASCIIPrintf(viewer2,"</VTKFile>\n");CHKERRQ(ierr);
1521   ierr = PetscViewerDestroy(&viewer3);CHKERRQ(ierr);
1522   ierr = PetscViewerDestroy(&viewer2);CHKERRQ(ierr);
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 static PetscErrorCode THITSMonitor(TS ts,PetscInt step,PetscReal t,Vec X,void *ctx)
1527 {
1528   PetscErrorCode ierr;
1529   THI            thi = (THI)ctx;
1530   DM             pack;
1531   char           filename3[PETSC_MAX_PATH_LEN],filename2[PETSC_MAX_PATH_LEN];
1532 
1533   PetscFunctionBeginUser;
1534   if (step < 0) PetscFunctionReturn(0); /* negative one is used to indicate an interpolated solution */
1535   ierr = PetscPrintf(PetscObjectComm((PetscObject)ts),"%3D: t=%g\n",step,(double)t);CHKERRQ(ierr);
1536   if (thi->monitor_interval && step % thi->monitor_interval) PetscFunctionReturn(0);
1537   ierr = TSGetDM(ts,&pack);CHKERRQ(ierr);
1538   ierr = PetscSNPrintf(filename3,sizeof(filename3),"%s-3d-%03d.vts",thi->monitor_basename,step);CHKERRQ(ierr);
1539   ierr = PetscSNPrintf(filename2,sizeof(filename2),"%s-2d-%03d.vts",thi->monitor_basename,step);CHKERRQ(ierr);
1540   ierr = THIDAVecView_VTK_XML(thi,pack,X,filename3,filename2);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 
1545 static PetscErrorCode THICreateDM3d(THI thi,DM *dm3d)
1546 {
1547   MPI_Comm       comm;
1548   PetscInt       M    = 3,N = 3,P = 2;
1549   DM             da;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBeginUser;
1553   ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
1554   ierr = PetscOptionsBegin(comm,NULL,"Grid resolution options","");CHKERRQ(ierr);
1555   {
1556     ierr = PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);CHKERRQ(ierr);
1557     N    = M;
1558     ierr = PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);CHKERRQ(ierr);
1559     ierr = PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);CHKERRQ(ierr);
1560   }
1561   ierr  = PetscOptionsEnd();CHKERRQ(ierr);
1562   ierr  = DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);CHKERRQ(ierr);
1563   ierr = DMSetFromOptions(da);CHKERRQ(ierr);
1564   ierr = DMSetUp(da);CHKERRQ(ierr);
1565   ierr  = DMDASetFieldName(da,0,"x-velocity");CHKERRQ(ierr);
1566   ierr  = DMDASetFieldName(da,1,"y-velocity");CHKERRQ(ierr);
1567   *dm3d = da;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 int main(int argc,char *argv[])
1572 {
1573   MPI_Comm       comm;
1574   DM             pack,da3,da2;
1575   TS             ts;
1576   THI            thi;
1577   Vec            X;
1578   Mat            B;
1579   PetscInt       i,steps;
1580   PetscReal      ftime;
1581   PetscErrorCode ierr;
1582 
1583   ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr;
1584   comm = PETSC_COMM_WORLD;
1585 
1586   ierr = THICreate(comm,&thi);CHKERRQ(ierr);
1587   ierr = THICreateDM3d(thi,&da3);CHKERRQ(ierr);
1588   {
1589     PetscInt        Mx,My,mx,my,s;
1590     DMDAStencilType st;
1591     ierr = DMDAGetInfo(da3,0, 0,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);CHKERRQ(ierr);
1592     ierr = DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2);CHKERRQ(ierr);
1593     ierr = DMSetUp(da2);CHKERRQ(ierr);
1594   }
1595 
1596   ierr = PetscObjectSetName((PetscObject)da3,"3D_Velocity");CHKERRQ(ierr);
1597   ierr = DMSetOptionsPrefix(da3,"f3d_");CHKERRQ(ierr);
1598   ierr = DMDASetFieldName(da3,0,"u");CHKERRQ(ierr);
1599   ierr = DMDASetFieldName(da3,1,"v");CHKERRQ(ierr);
1600   ierr = PetscObjectSetName((PetscObject)da2,"2D_Fields");CHKERRQ(ierr);
1601   ierr = DMSetOptionsPrefix(da2,"f2d_");CHKERRQ(ierr);
1602   ierr = DMDASetFieldName(da2,0,"b");CHKERRQ(ierr);
1603   ierr = DMDASetFieldName(da2,1,"h");CHKERRQ(ierr);
1604   ierr = DMDASetFieldName(da2,2,"beta2");CHKERRQ(ierr);
1605   ierr = DMCompositeCreate(comm,&pack);CHKERRQ(ierr);
1606   ierr = DMCompositeAddDM(pack,da3);CHKERRQ(ierr);
1607   ierr = DMCompositeAddDM(pack,da2);CHKERRQ(ierr);
1608   ierr = DMDestroy(&da3);CHKERRQ(ierr);
1609   ierr = DMDestroy(&da2);CHKERRQ(ierr);
1610   ierr = DMSetUp(pack);CHKERRQ(ierr);
1611   ierr = DMCreateMatrix(pack,&B);CHKERRQ(ierr);
1612   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1613   ierr = MatSetOptionsPrefix(B,"thi_");CHKERRQ(ierr);
1614 
1615   for (i=0; i<thi->nlevels; i++) {
1616     PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
1617     PetscInt  Mx,My,Mz;
1618     ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
1619     ierr = DMDAGetInfo(da3,0, &Mz,&My,&Mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1620     ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n",i,Lx,Ly,Lz,Mx,My,Mz,Mx*My*Mz,Lx/Mx,Ly/My,1000./(Mz-1));CHKERRQ(ierr);
1621   }
1622 
1623   ierr = DMCreateGlobalVector(pack,&X);CHKERRQ(ierr);
1624   ierr = THIInitial(thi,pack,X);CHKERRQ(ierr);
1625 
1626   ierr = TSCreate(comm,&ts);CHKERRQ(ierr);
1627   ierr = TSSetDM(ts,pack);CHKERRQ(ierr);
1628   ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr);
1629   ierr = TSMonitorSet(ts,THITSMonitor,thi,NULL);CHKERRQ(ierr);
1630   ierr = TSSetType(ts,TSTHETA);CHKERRQ(ierr);
1631   ierr = TSSetIFunction(ts,NULL,THIFunction,thi);CHKERRQ(ierr);
1632   ierr = TSSetIJacobian(ts,B,B,THIJacobian,thi);CHKERRQ(ierr);
1633   ierr = TSSetMaxTime(ts,10.0);CHKERRQ(ierr);
1634   ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_STEPOVER);CHKERRQ(ierr);
1635   ierr = TSSetSolution(ts,X);CHKERRQ(ierr);
1636   ierr = TSSetTimeStep(ts,1e-3);CHKERRQ(ierr);
1637   ierr = TSSetFromOptions(ts);CHKERRQ(ierr);
1638 
1639   ierr = TSSolve(ts,X);CHKERRQ(ierr);
1640   ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr);
1641   ierr = TSGetStepNumber(ts,&steps);CHKERRQ(ierr);
1642   ierr = PetscPrintf(PETSC_COMM_WORLD,"Steps %D  final time %g\n",steps,(double)ftime);CHKERRQ(ierr);
1643 
1644   if (0) {ierr = THISolveStatistics(thi,ts,0,"Full");CHKERRQ(ierr);}
1645 
1646   {
1647     PetscBool flg;
1648     char      filename[PETSC_MAX_PATH_LEN] = "";
1649     ierr = PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg);CHKERRQ(ierr);
1650     if (flg) {
1651       ierr = THIDAVecView_VTK_XML(thi,pack,X,filename,NULL);CHKERRQ(ierr);
1652     }
1653   }
1654 
1655   ierr = VecDestroy(&X);CHKERRQ(ierr);
1656   ierr = MatDestroy(&B);CHKERRQ(ierr);
1657   ierr = DMDestroy(&pack);CHKERRQ(ierr);
1658   ierr = TSDestroy(&ts);CHKERRQ(ierr);
1659   ierr = THIDestroy(&thi);CHKERRQ(ierr);
1660   ierr = PetscFinalize();
1661   return ierr;
1662 }
1663