xref: /petsc/src/ts/tutorials/ex14.c (revision 1241a2435f426d5975ea82fba64fedc0910e0d42)
1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2 \n\
3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4 using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5 to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
6 boundary conditions in the x- and y-directions.\n\
7 \n\
8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10 \n\
11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12 \n\n";
13 
14 /*
15 The equations for horizontal velocity (u,v) are
16 
17   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19 
20 where
21 
22   eta = B/2 (epsilon + gamma)^((p-2)/2)
23 
24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25 written in terms of the second invariant
26 
27   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28 
29 The surface boundary conditions are the natural conditions.  The basal boundary conditions
30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31 
32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33 
34 The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
35 map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
36 the Jacobian of the coordinate transformation from a reference element to the physical element.
37 
38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39 specially so that columns are never distributed, and are always contiguous in memory.
40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41 and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
42 use compatible domain decomposition relative to the 3D DMDAs.
43 
44 */
45 
46 #include <petscts.h>
47 #include <petscdm.h>
48 #include <petscdmda.h>
49 #include <petscdmcomposite.h>
50 #include <ctype.h>              /* toupper() */
51 #include <petsc/private/petscimpl.h>
52 
53 #if defined __SSE2__
54 #  include <emmintrin.h>
55 #endif
56 
57 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
58 #define USE_SSE2_KERNELS (!defined NO_SSE2                              \
59                           && !defined PETSC_USE_COMPLEX                 \
60                           && !defined PETSC_USE_REAL_SINGLE           \
61                           && defined __SSE2__)
62 
63 #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
64 #  if defined __cplusplus       /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
65 #    define restrict
66 #  else
67 #    define restrict PETSC_RESTRICT
68 #  endif
69 #endif
70 
71 static PetscClassId THI_CLASSID;
72 
73 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
74 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
75 static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
76 static const PetscReal HexQNodes[]    = {-0.57735026918962573, 0.57735026918962573};
77 #define G 0.57735026918962573
78 #define H (0.5*(1.+G))
79 #define L (0.5*(1.-G))
80 #define M (-0.5)
81 #define P (0.5)
82 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
83 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
84                                                    {0,H,0,0,0,L,0,0},
85                                                    {0,0,H,0,0,0,L,0},
86                                                    {0,0,0,H,0,0,0,L},
87                                                    {L,0,0,0,H,0,0,0},
88                                                    {0,L,0,0,0,H,0,0},
89                                                    {0,0,L,0,0,0,H,0},
90                                                    {0,0,0,L,0,0,0,H}};
91 static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
92   {{M*H,M*H,M},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  ,{M*L,M*L,P},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  },
93   {{M*H,0,0}  ,{P*H,M*H,M},{0,P*H,0}  ,{0,0,0}    ,{M*L,0,0}  ,{P*L,M*L,P},{0,P*L,0}  ,{0,0,0}    },
94   {{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,M},{M*H,0,0}  ,{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,P},{M*L,0,0}  },
95   {{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,M},{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,P}},
96   {{M*L,M*L,M},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  ,{M*H,M*H,P},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  },
97   {{M*L,0,0}  ,{P*L,M*L,M},{0,P*L,0}  ,{0,0,0}    ,{M*H,0,0}  ,{P*H,M*H,P},{0,P*H,0}  ,{0,0,0}    },
98   {{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,M},{M*L,0,0}  ,{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,P},{M*H,0,0}  },
99   {{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,M},{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,P}}};
100 /* Stanndard Gauss */
101 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
102                                                  {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
103                                                  {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
104                                                  {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
105                                                  {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
106                                                  {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
107                                                  {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
108                                                  {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
109 static const PetscReal HexQDeriv_Gauss[8][8][3] = {
110   {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
111   {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
112   {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
113   {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
114   {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
115   {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
116   {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
117   {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
118 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
119 /* Standard 2x2 Gauss quadrature for the bottom layer. */
120 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
121                                             {L*H,H*H,H*L,L*L},
122                                             {L*L,H*L,H*H,L*H},
123                                             {H*L,L*L,L*H,H*H}};
124 static const PetscReal QuadQDeriv[4][4][2] = {
125   {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
126   {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
127   {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
128   {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
129 #undef G
130 #undef H
131 #undef L
132 #undef M
133 #undef P
134 
135 #define HexExtract(x,i,j,k,n) do {              \
136     (n)[0] = (x)[i][j][k];                      \
137     (n)[1] = (x)[i+1][j][k];                    \
138     (n)[2] = (x)[i+1][j+1][k];                  \
139     (n)[3] = (x)[i][j+1][k];                    \
140     (n)[4] = (x)[i][j][k+1];                    \
141     (n)[5] = (x)[i+1][j][k+1];                  \
142     (n)[6] = (x)[i+1][j+1][k+1];                \
143     (n)[7] = (x)[i][j+1][k+1];                  \
144   } while (0)
145 
146 #define HexExtractRef(x,i,j,k,n) do {           \
147     (n)[0] = &(x)[i][j][k];                     \
148     (n)[1] = &(x)[i+1][j][k];                   \
149     (n)[2] = &(x)[i+1][j+1][k];                 \
150     (n)[3] = &(x)[i][j+1][k];                   \
151     (n)[4] = &(x)[i][j][k+1];                   \
152     (n)[5] = &(x)[i+1][j][k+1];                 \
153     (n)[6] = &(x)[i+1][j+1][k+1];               \
154     (n)[7] = &(x)[i][j+1][k+1];                 \
155   } while (0)
156 
157 #define QuadExtract(x,i,j,n) do {               \
158     (n)[0] = (x)[i][j];                         \
159     (n)[1] = (x)[i+1][j];                       \
160     (n)[2] = (x)[i+1][j+1];                     \
161     (n)[3] = (x)[i][j+1];                       \
162   } while (0)
163 
164 static PetscScalar Sqr(PetscScalar a) {return a*a;}
165 
166 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
167 {
168   PetscInt i;
169   dz[0] = dz[1] = dz[2] = 0;
170   for (i=0; i<8; i++) {
171     dz[0] += dphi[i][0] * zn[i];
172     dz[1] += dphi[i][1] * zn[i];
173     dz[2] += dphi[i][2] * zn[i];
174   }
175 }
176 
177 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[restrict],PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscReal *restrict jw)
178 {
179   const PetscReal
180     jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}
181   ,ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}
182   ,jdet = jac[0][0]*jac[1][1]*jac[2][2];
183   PetscInt i;
184 
185   for (i=0; i<8; i++) {
186     const PetscReal *dphir = HexQDeriv[q][i];
187     phi[i] = HexQInterp[q][i];
188     dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
189     dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
190     dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
191   }
192   *jw = 1.0 * jdet;
193 }
194 
195 typedef struct _p_THI   *THI;
196 typedef struct _n_Units *Units;
197 
198 typedef struct {
199   PetscScalar u,v;
200 } Node;
201 
202 typedef struct {
203   PetscScalar b;                /* bed */
204   PetscScalar h;                /* thickness */
205   PetscScalar beta2;            /* friction */
206 } PrmNode;
207 
208 #define FieldSize(ntype) ((PetscInt)(sizeof(ntype)/sizeof(PetscScalar)))
209 #define FieldOffset(ntype,member) ((PetscInt)(offsetof(ntype,member)/sizeof(PetscScalar)))
210 #define FieldIndex(ntype,i,member) ((PetscInt)((i)*FieldSize(ntype) + FieldOffset(ntype,member)))
211 #define NODE_SIZE FieldSize(Node)
212 #define PRMNODE_SIZE FieldSize(PrmNode)
213 
214 typedef struct {
215   PetscReal min,max,cmin,cmax;
216 } PRange;
217 
218 struct _p_THI {
219   PETSCHEADER(int);
220   void      (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
221   PetscInt  nlevels;
222   PetscInt  zlevels;
223   PetscReal Lx,Ly,Lz;           /* Model domain */
224   PetscReal alpha;              /* Bed angle */
225   Units     units;
226   PetscReal dirichlet_scale;
227   PetscReal ssa_friction_scale;
228   PetscReal inertia;
229   PRange    eta;
230   PRange    beta2;
231   struct {
232     PetscReal Bd2,eps,exponent,glen_n;
233   } viscosity;
234   struct {
235     PetscReal irefgam,eps2,exponent;
236   } friction;
237   struct {
238     PetscReal rate,exponent,refvel;
239   } erosion;
240   PetscReal rhog;
241   PetscBool no_slip;
242   PetscBool verbose;
243   char      *mattype;
244   char      *monitor_basename;
245   PetscInt  monitor_interval;
246 };
247 
248 struct _n_Units {
249   /* fundamental */
250   PetscReal meter;
251   PetscReal kilogram;
252   PetscReal second;
253   /* derived */
254   PetscReal Pascal;
255   PetscReal year;
256 };
257 
258 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
259 {
260   const PetscScalar zm1 = zm-1,
261     znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
262               pn[1].b + pn[1].h*(PetscScalar)k/zm1,
263               pn[2].b + pn[2].h*(PetscScalar)k/zm1,
264               pn[3].b + pn[3].h*(PetscScalar)k/zm1,
265               pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
266               pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
267               pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
268               pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
269   PetscInt i;
270   for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
271 }
272 
273 /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
274 static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2],PetscReal hx,PetscReal hy,const PrmNode pn[4],PrmNode dp[4][2])
275 {
276   PetscErrorCode ierr;
277   PetscInt       q,i,f;
278   const PetscScalar (*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
279   PetscScalar (*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;
280 
281   PetscFunctionBeginUser;
282   ierr = PetscArrayzero(dpg,4);CHKERRQ(ierr);
283   for (q=0; q<4; q++) {
284     for (i=0; i<4; i++) {
285       for (f=0; f<PRMNODE_SIZE; f++) {
286         dpg[q][0][f] += dphi[q][i][0]/hx * pg[i][f];
287         dpg[q][1][f] += dphi[q][i][1]/hy * pg[i][f];
288       }
289     }
290   }
291   PetscFunctionReturn(0);
292 }
293 
294 static inline PetscReal StaggeredMidpoint2D(PetscScalar a,PetscScalar b,PetscScalar c,PetscScalar d)
295 {return 0.5*PetscRealPart(0.75*a + 0.75*b + 0.25*c + 0.25*d);}
296 static inline PetscReal UpwindFlux1D(PetscReal u,PetscReal hL,PetscReal hR)
297 {return (u > 0) ? hL*u : hR*u;}
298 
299 #define UpwindFluxXW(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i-1][j][k].u, x3[i-1][j+dj][k].u,x3[i][k+dj][k].u), \
300                                                     PetscRealPart(0.75*x2[i-1][j  ].h+0.25*x2[i-1][j+dj].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h))
301 #define UpwindFluxXE(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i+1][j][k].u, x3[i+1][j+dj][k].u,x3[i][k+dj][k].u), \
302                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h), PetscRealPart(0.75*x2[i+1][j  ].h+0.25*x2[i+1][j+dj].h))
303 #define UpwindFluxYS(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j-1][k].v, x3[i+di][j-1][k].v,x3[i+di][j][k].v), \
304                                                     PetscRealPart(0.75*x2[i  ][j-1].h+0.25*x2[i+di][j-1].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h))
305 #define UpwindFluxYN(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j+1][k].v, x3[i+di][j+1][k].v,x3[i+di][j][k].v), \
306                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h), PetscRealPart(0.75*x2[i  ][j+1].h+0.25*x2[i+di][j+1].h))
307 
308 static void PrmNodeGetFaceMeasure(const PrmNode **p,PetscInt i,PetscInt j,PetscScalar h[])
309 {
310   /* West */
311   h[0] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j-1].h,p[i][j-1].h);
312   h[1] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j+1].h,p[i][j+1].h);
313   /* East */
314   h[2] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j+1].h,p[i][j+1].h);
315   h[3] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j-1].h,p[i][j-1].h);
316   /* South */
317   h[4] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i+1][j-1].h,p[i+1][j].h);
318   h[5] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i-1][j-1].h,p[i-1][j].h);
319   /* North */
320   h[6] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i-1][j+1].h,p[i-1][j].h);
321   h[7] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i+1][j+1].h,p[i+1][j].h);
322 }
323 
324 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
325 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
326 {
327   Units units = thi->units;
328   PetscReal s = -x*PetscSinReal(thi->alpha);
329   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
330   p->h = s - p->b;
331   p->beta2 = -1e-10;             /* This value is not used, but it should not be huge because that would change the finite difference step size  */
332 }
333 
334 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
335 {
336   Units units = thi->units;
337   PetscReal s = -x*PetscSinReal(thi->alpha);
338   p->b = s - 1000*units->meter;
339   p->h = s - p->b;
340   /* tau_b = beta2 v   is a stress (Pa).
341    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
342   p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
343 }
344 
345 /* These are just toys */
346 
347 /* From Fred Herman */
348 static void THIInitialize_HOM_F(THI thi,PetscReal x,PetscReal y,PrmNode *p)
349 {
350   Units units = thi->units;
351   PetscReal s = -x*PetscSinReal(thi->alpha);
352   p->b = s - 1000*units->meter + 100*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx);/* * sin(y*2*PETSC_PI/thi->Ly); */
353   p->h = s - p->b;
354   p->h = (1-(atan((x-thi->Lx/2)/1.)+PETSC_PI/2.)/PETSC_PI)*500*units->meter+1*units->meter;
355   s = PetscRealPart(p->b + p->h);
356   p->beta2 = -1e-10;
357   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
358 }
359 
360 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
361 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
362 {
363   Units units = thi->units;
364   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
365   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
366   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
367   p->h = s - p->b;
368   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
369 }
370 
371 /* Like Z, but with 200 meter cliffs */
372 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
373 {
374   Units units = thi->units;
375   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
376   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
377   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
378   if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
379   p->h = s - p->b;
380   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
381 }
382 
383 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
384 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
385 {
386   Units units = thi->units;
387   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
388   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
389   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
390   p->h = s - p->b;
391   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
392 }
393 
394 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
395 {
396   if (thi->friction.irefgam == 0) {
397     Units units = thi->units;
398     thi->friction.irefgam = 1./(0.5*PetscSqr(100 * units->meter / units->year));
399     thi->friction.eps2 = 0.5*PetscSqr(1.e-4 / thi->friction.irefgam);
400   }
401   if (thi->friction.exponent == 0) {
402     *beta2  = rbeta2;
403     *dbeta2 = 0;
404   } else {
405     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
406     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
407   }
408 }
409 
410 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
411 {
412   PetscReal Bd2,eps,exponent;
413   if (thi->viscosity.Bd2 == 0) {
414     Units units = thi->units;
415     const PetscReal
416       n = thi->viscosity.glen_n,                        /* Glen exponent */
417       p = 1. + 1./n,                                    /* for Stokes */
418       A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
419       B = PetscPowReal(A,-1./n);                                 /* hardness parameter */
420     thi->viscosity.Bd2      = B/2;
421     thi->viscosity.exponent = (p-2)/2;
422     thi->viscosity.eps      = 0.5*PetscSqr(1e-5 / units->year);
423   }
424   Bd2      = thi->viscosity.Bd2;
425   exponent = thi->viscosity.exponent;
426   eps      = thi->viscosity.eps;
427   *eta     = Bd2 * PetscPowReal(eps + gam,exponent);
428   *deta    = exponent * (*eta) / (eps + gam);
429 }
430 
431 static void THIErosion(THI thi,const Node *vel,PetscScalar *erate,Node *derate)
432 {
433   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel),
434                     rate    = -thi->erosion.rate*PetscPowScalar(magref2, 0.5*thi->erosion.exponent);
435   if (erate) *erate = rate;
436   if (derate) {
437     if (thi->erosion.exponent == 1) {
438       derate->u = 0;
439       derate->v = 0;
440     } else {
441       derate->u = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
442       derate->v = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
443     }
444   }
445 }
446 
447 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
448 {
449   if (x < *min) *min = x;
450   if (x > *max) *max = x;
451 }
452 
453 static void PRangeClear(PRange *p)
454 {
455   p->cmin = p->min = 1e100;
456   p->cmax = p->max = -1e100;
457 }
458 
459 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
460 {
461 
462   PetscFunctionBeginUser;
463   p->cmin = min;
464   p->cmax = max;
465   if (min < p->min) p->min = min;
466   if (max > p->max) p->max = max;
467   PetscFunctionReturn(0);
468 }
469 
470 static PetscErrorCode THIDestroy(THI *thi)
471 {
472   PetscErrorCode ierr;
473 
474   PetscFunctionBeginUser;
475   if (--((PetscObject)(*thi))->refct > 0) PetscFunctionReturn(0);
476   ierr = PetscFree((*thi)->units);CHKERRQ(ierr);
477   ierr = PetscFree((*thi)->mattype);CHKERRQ(ierr);
478   ierr = PetscFree((*thi)->monitor_basename);CHKERRQ(ierr);
479   ierr = PetscHeaderDestroy(thi);CHKERRQ(ierr);
480   PetscFunctionReturn(0);
481 }
482 
483 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
484 {
485   static PetscBool registered = PETSC_FALSE;
486   THI              thi;
487   Units            units;
488   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
489   PetscErrorCode   ierr;
490 
491   PetscFunctionBeginUser;
492   *inthi = 0;
493   if (!registered) {
494     ierr = PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);CHKERRQ(ierr);
495     registered = PETSC_TRUE;
496   }
497   ierr = PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","THI",comm,THIDestroy,0);CHKERRQ(ierr);
498 
499   ierr = PetscNew(&thi->units);CHKERRQ(ierr);
500 
501   units           = thi->units;
502   units->meter    = 1e-2;
503   units->second   = 1e-7;
504   units->kilogram = 1e-12;
505 
506   ierr = PetscOptionsBegin(comm,NULL,"Scaled units options","");CHKERRQ(ierr);
507   {
508     ierr = PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);CHKERRQ(ierr);
509     ierr = PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);CHKERRQ(ierr);
510     ierr = PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);CHKERRQ(ierr);
511   }
512   ierr = PetscOptionsEnd();CHKERRQ(ierr);
513   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
514   units->year   = 31556926. * units->second, /* seconds per year */
515 
516   thi->Lx              = 10.e3;
517   thi->Ly              = 10.e3;
518   thi->Lz              = 1000;
519   thi->nlevels         = 1;
520   thi->dirichlet_scale = 1;
521   thi->verbose         = PETSC_FALSE;
522 
523   thi->viscosity.glen_n = 3.;
524   thi->erosion.rate     = 1e-3; /* m/a */
525   thi->erosion.exponent = 1.;
526   thi->erosion.refvel   = 1.;   /* m/a */
527 
528   ierr = PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");CHKERRQ(ierr);
529   {
530     QuadratureType quad       = QUAD_GAUSS;
531     char           homexp[]   = "A";
532     char           mtype[256] = MATSBAIJ;
533     PetscReal      L,m = 1.0;
534     PetscBool      flg;
535     L    = thi->Lx;
536     ierr = PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);CHKERRQ(ierr);
537     if (flg) thi->Lx = thi->Ly = L;
538     ierr = PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);CHKERRQ(ierr);
539     ierr = PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);CHKERRQ(ierr);
540     ierr = PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);CHKERRQ(ierr);
541     ierr = PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);CHKERRQ(ierr);
542     switch (homexp[0] = toupper(homexp[0])) {
543     case 'A':
544       thi->initialize = THIInitialize_HOM_A;
545       thi->no_slip    = PETSC_TRUE;
546       thi->alpha      = 0.5;
547       break;
548     case 'C':
549       thi->initialize = THIInitialize_HOM_C;
550       thi->no_slip    = PETSC_FALSE;
551       thi->alpha      = 0.1;
552       break;
553     case 'F':
554       thi->initialize = THIInitialize_HOM_F;
555       thi->no_slip    = PETSC_FALSE;
556       thi->alpha      = 0.5;
557       break;
558     case 'X':
559       thi->initialize = THIInitialize_HOM_X;
560       thi->no_slip    = PETSC_FALSE;
561       thi->alpha      = 0.3;
562       break;
563     case 'Y':
564       thi->initialize = THIInitialize_HOM_Y;
565       thi->no_slip    = PETSC_FALSE;
566       thi->alpha      = 0.5;
567       break;
568     case 'Z':
569       thi->initialize = THIInitialize_HOM_Z;
570       thi->no_slip    = PETSC_FALSE;
571       thi->alpha      = 0.5;
572       break;
573     default:
574       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
575     }
576     ierr = PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);CHKERRQ(ierr);
577     switch (quad) {
578     case QUAD_GAUSS:
579       HexQInterp = HexQInterp_Gauss;
580       HexQDeriv  = HexQDeriv_Gauss;
581       break;
582     case QUAD_LOBATTO:
583       HexQInterp = HexQInterp_Lobatto;
584       HexQDeriv  = HexQDeriv_Lobatto;
585       break;
586     }
587     ierr = PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);CHKERRQ(ierr);
588     ierr = PetscOptionsReal("-thi_viscosity_glen_n","Exponent in Glen flow law, 1=linear, infty=ideal plastic",NULL,thi->viscosity.glen_n,&thi->viscosity.glen_n,NULL);CHKERRQ(ierr);
589     ierr = PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);CHKERRQ(ierr);
590     thi->friction.exponent = (m-1)/2;
591     ierr = PetscOptionsReal("-thi_erosion_rate","Rate of erosion relative to sliding velocity at reference velocity (m/a)",NULL,thi->erosion.rate,&thi->erosion.rate,NULL);CHKERRQ(ierr);
592     ierr = PetscOptionsReal("-thi_erosion_exponent","Power of sliding velocity appearing in erosion relation",NULL,thi->erosion.exponent,&thi->erosion.exponent,NULL);CHKERRQ(ierr);
593     ierr = PetscOptionsReal("-thi_erosion_refvel","Reference sliding velocity for erosion (m/a)",NULL,thi->erosion.refvel,&thi->erosion.refvel,NULL);CHKERRQ(ierr);
594     thi->erosion.rate   *= units->meter / units->year;
595     thi->erosion.refvel *= units->meter / units->year;
596     ierr = PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);CHKERRQ(ierr);
597     ierr = PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);CHKERRQ(ierr);
598     ierr = PetscOptionsReal("-thi_inertia","Coefficient of accelaration term in velocity system, physical is almost zero",NULL,thi->inertia,&thi->inertia,NULL);CHKERRQ(ierr);
599     ierr = PetscOptionsInt("-thi_nlevels","Number of levels of refinement","",thi->nlevels,&thi->nlevels,NULL);CHKERRQ(ierr);
600     ierr = PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);CHKERRQ(ierr);
601     ierr = PetscStrallocpy(mtype,&thi->mattype);CHKERRQ(ierr);
602     ierr = PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);CHKERRQ(ierr);
603     ierr = PetscOptionsString("-thi_monitor","Basename to write state files to",NULL,monitor_basename,monitor_basename,sizeof(monitor_basename),&flg);CHKERRQ(ierr);
604     if (flg) {
605       ierr = PetscStrallocpy(monitor_basename,&thi->monitor_basename);CHKERRQ(ierr);
606       thi->monitor_interval = 1;
607       ierr = PetscOptionsInt("-thi_monitor_interval","Frequency at which to write state files",NULL,thi->monitor_interval,&thi->monitor_interval,NULL);CHKERRQ(ierr);
608     }
609   }
610   ierr = PetscOptionsEnd();CHKERRQ(ierr);
611 
612   /* dimensionalize */
613   thi->Lx    *= units->meter;
614   thi->Ly    *= units->meter;
615   thi->Lz    *= units->meter;
616   thi->alpha *= PETSC_PI / 180;
617 
618   PRangeClear(&thi->eta);
619   PRangeClear(&thi->beta2);
620 
621   {
622     PetscReal u       = 1000*units->meter/(3e7*units->second),
623               gradu   = u / (100*units->meter),eta,deta,
624               rho     = 910 * units->kilogram/PetscPowRealInt(units->meter,3),
625               grav    = 9.81 * units->meter/PetscSqr(units->second),
626               driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
627     THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
628     thi->rhog = rho * grav;
629     if (thi->verbose) {
630       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n",units->meter,units->second,units->kilogram,units->Pascal);CHKERRQ(ierr);
631       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",thi->Lx,thi->Ly,thi->Lz,rho*grav*1e3*units->meter,driving);CHKERRQ(ierr);
632       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",u,gradu,eta,2*eta*gradu,2*eta*gradu/driving);CHKERRQ(ierr);
633       THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
634       ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",1e-3*u,1e-3*gradu,eta,2*eta*1e-3*gradu,2*eta*1e-3*gradu/driving);CHKERRQ(ierr);
635     }
636   }
637 
638   *inthi = thi;
639   PetscFunctionReturn(0);
640 }
641 
642 /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
643  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
644  * the horizontal. */
645 static PetscErrorCode THIFixGhosts(THI thi,DM da3,DM da2,Vec X3,Vec X2)
646 {
647   PetscErrorCode ierr;
648   DMDALocalInfo  info;
649   PrmNode        **x2;
650   PetscInt       i,j;
651 
652   PetscFunctionBeginUser;
653   ierr = DMDAGetLocalInfo(da3,&info);CHKERRQ(ierr);
654   /* ierr = VecView(X2,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
655   ierr = DMDAVecGetArray(da2,X2,&x2);CHKERRQ(ierr);
656   for (i=info.gzs; i<info.gzs+info.gzm; i++) {
657     if (i > -1 && i < info.mz) continue;
658     for (j=info.gys; j<info.gys+info.gym; j++) {
659       x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i<0 ? 1.0 : -1.0);
660     }
661   }
662   ierr = DMDAVecRestoreArray(da2,X2,&x2);CHKERRQ(ierr);
663   /* ierr = VecView(X2,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); */
664   PetscFunctionReturn(0);
665 }
666 
667 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,PrmNode **p)
668 {
669   PetscInt       i,j,xs,xm,ys,ym,mx,my;
670   PetscErrorCode ierr;
671 
672   PetscFunctionBeginUser;
673   ierr = DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);CHKERRQ(ierr);
674   ierr = DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
675   for (i=xs; i<xs+xm; i++) {
676     for (j=ys; j<ys+ym; j++) {
677       PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
678       thi->initialize(thi,xx,yy,&p[i][j]);
679     }
680   }
681   PetscFunctionReturn(0);
682 }
683 
684 static PetscErrorCode THIInitial(THI thi,DM pack,Vec X)
685 {
686   DM             da3,da2;
687   PetscInt       i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
688   PetscReal      hx,hy;
689   PrmNode        **prm;
690   Node           ***x;
691   Vec            X3g,X2g,X2;
692   PetscErrorCode ierr;
693 
694   PetscFunctionBeginUser;
695   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
696   ierr = DMCompositeGetAccess(pack,X,&X3g,&X2g);CHKERRQ(ierr);
697   ierr = DMGetLocalVector(da2,&X2);CHKERRQ(ierr);
698 
699   ierr = DMDAGetInfo(da3,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
700   ierr = DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
701   ierr = DMDAVecGetArray(da3,X3g,&x);CHKERRQ(ierr);
702   ierr = DMDAVecGetArray(da2,X2,&prm);CHKERRQ(ierr);
703 
704   ierr = THIInitializePrm(thi,da2,prm);CHKERRQ(ierr);
705 
706   hx = thi->Lx / mx;
707   hy = thi->Ly / my;
708   for (i=xs; i<xs+xm; i++) {
709     for (j=ys; j<ys+ym; j++) {
710       for (k=zs; k<zs+zm; k++) {
711         const PetscScalar zm1      = zm-1,
712                           drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
713                           drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
714         x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
715         x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
716       }
717     }
718   }
719 
720   ierr = DMDAVecRestoreArray(da3,X3g,&x);CHKERRQ(ierr);
721   ierr = DMDAVecRestoreArray(da2,X2,&prm);CHKERRQ(ierr);
722 
723   ierr = DMLocalToGlobalBegin(da2,X2,INSERT_VALUES,X2g);CHKERRQ(ierr);
724   ierr = DMLocalToGlobalEnd  (da2,X2,INSERT_VALUES,X2g);CHKERRQ(ierr);
725   ierr = DMRestoreLocalVector(da2,&X2);CHKERRQ(ierr);
726 
727   ierr = DMCompositeRestoreAccess(pack,X,&X3g,&X2g);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 static void PointwiseNonlinearity(THI thi,const Node n[restrict 8],const PetscReal phi[restrict 3],PetscReal dphi[restrict 8][3],PetscScalar *restrict u,PetscScalar *restrict v,PetscScalar du[restrict 3],PetscScalar dv[restrict 3],PetscReal *eta,PetscReal *deta)
732 {
733   PetscInt    l,ll;
734   PetscScalar gam;
735 
736   du[0] = du[1] = du[2] = 0;
737   dv[0] = dv[1] = dv[2] = 0;
738   *u    = 0;
739   *v    = 0;
740   for (l=0; l<8; l++) {
741     *u += phi[l] * n[l].u;
742     *v += phi[l] * n[l].v;
743     for (ll=0; ll<3; ll++) {
744       du[ll] += dphi[l][ll] * n[l].u;
745       dv[ll] += dphi[l][ll] * n[l].v;
746     }
747   }
748   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]) + 0.25*Sqr(du[2]) + 0.25*Sqr(dv[2]);
749   THIViscosity(thi,PetscRealPart(gam),eta,deta);
750 }
751 
752 static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const Node ***xdot,Node ***f,THI thi)
753 {
754   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l;
755   PetscReal      hx,hy,etamin,etamax,beta2min,beta2max;
756   PetscErrorCode ierr;
757 
758   PetscFunctionBeginUser;
759   xs = info->zs;
760   ys = info->ys;
761   xm = info->zm;
762   ym = info->ym;
763   zm = info->xm;
764   hx = thi->Lx / info->mz;
765   hy = thi->Ly / info->my;
766 
767   etamin   = 1e100;
768   etamax   = 0;
769   beta2min = 1e100;
770   beta2max = 0;
771 
772   for (i=xs; i<xs+xm; i++) {
773     for (j=ys; j<ys+ym; j++) {
774       PrmNode pn[4],dpn[4][2];
775       QuadExtract(prm,i,j,pn);
776       ierr = QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn);CHKERRQ(ierr);
777       for (k=0; k<zm-1; k++) {
778         PetscInt  ls = 0;
779         Node      n[8],ndot[8],*fn[8];
780         PetscReal zn[8],etabase = 0;
781 
782         PrmHexGetZ(pn,k,zm,zn);
783         HexExtract(x,i,j,k,n);
784         HexExtract(xdot,i,j,k,ndot);
785         HexExtractRef(f,i,j,k,fn);
786         if (thi->no_slip && k == 0) {
787           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
788           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
789           ls = 4;
790         }
791         for (q=0; q<8; q++) {
792           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
793           PetscScalar du[3],dv[3],u,v,udot=0,vdot=0;
794           for (l=ls; l<8; l++) {
795             udot += HexQInterp[q][l]*ndot[l].u;
796             vdot += HexQInterp[q][l]*ndot[l].v;
797           }
798           HexGrad(HexQDeriv[q],zn,dz);
799           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
800           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
801           jw /= thi->rhog;      /* scales residuals to be O(1) */
802           if (q == 0) etabase = eta;
803           RangeUpdate(&etamin,&etamax,eta);
804           for (l=ls; l<8; l++) { /* test functions */
805             const PetscScalar ds[2] = {dpn[q%4][0].h+dpn[q%4][0].b, dpn[q%4][1].h+dpn[q%4][1].b};
806             const PetscReal   pp    = phi[l],*dp = dphi[l];
807             fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
808             fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
809             fn[l]->u += pp*jw*udot*thi->inertia*pp;
810             fn[l]->v += pp*jw*vdot*thi->inertia*pp;
811           }
812         }
813         if (k == 0) { /* we are on a bottom face */
814           if (thi->no_slip) {
815             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
816             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
817             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
818             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
819             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
820             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
821             * assembled matrix (see the similar block in THIJacobianLocal).
822             *
823             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
824             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
825             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
826             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
827             */
828             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1.);
829             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
830             fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
831             fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
832           } else {              /* Integrate over bottom face to apply boundary condition */
833             for (q=0; q<4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
834               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
835               PetscScalar     u  =0,v=0,rbeta2=0;
836               PetscReal       beta2,dbeta2;
837               for (l=0; l<4; l++) {
838                 u     += phi[l]*n[l].u;
839                 v     += phi[l]*n[l].v;
840                 rbeta2 += phi[l]*pn[l].beta2;
841               }
842               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
843               RangeUpdate(&beta2min,&beta2max,beta2);
844               for (l=0; l<4; l++) {
845                 const PetscReal pp = phi[l];
846                 fn[ls+l]->u += pp*jw*beta2*u;
847                 fn[ls+l]->v += pp*jw*beta2*v;
848               }
849             }
850           }
851         }
852       }
853     }
854   }
855 
856   ierr = PRangeMinMax(&thi->eta,etamin,etamax);CHKERRQ(ierr);
857   ierr = PRangeMinMax(&thi->beta2,beta2min,beta2max);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const PrmNode **prmdot,PrmNode **f,THI thi)
862 {
863   PetscInt xs,ys,xm,ym,zm,i,j,k;
864 
865   PetscFunctionBeginUser;
866   xs = info->zs;
867   ys = info->ys;
868   xm = info->zm;
869   ym = info->ym;
870   zm = info->xm;
871 
872   for (i=xs; i<xs+xm; i++) {
873     for (j=ys; j<ys+ym; j++) {
874       PetscScalar div = 0,erate,h[8];
875       PrmNodeGetFaceMeasure(prm,i,j,h);
876       for (k=0; k<zm; k++) {
877         PetscScalar weight = (k==0 || k == zm-1) ? 0.5/(zm-1) : 1.0/(zm-1);
878         if (0) {                /* centered flux */
879           div += (- weight*h[0] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j-1][k].u,x[i][j-1][k].u)
880                   - weight*h[1] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j+1][k].u,x[i][j+1][k].u)
881                   + weight*h[2] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j+1][k].u,x[i][j+1][k].u)
882                   + weight*h[3] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j-1][k].u,x[i][j-1][k].u)
883                   - weight*h[4] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i+1][j-1][k].v,x[i+1][j][k].v)
884                   - weight*h[5] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i-1][j-1][k].v,x[i-1][j][k].v)
885                   + weight*h[6] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i-1][j+1][k].v,x[i-1][j][k].v)
886                   + weight*h[7] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i+1][j+1][k].v,x[i+1][j][k].v));
887         } else {                /* Upwind flux */
888           div += weight*(-UpwindFluxXW(x,prm,h,i,j,k, 1)
889                          -UpwindFluxXW(x,prm,h,i,j,k,-1)
890                          +UpwindFluxXE(x,prm,h,i,j,k, 1)
891                          +UpwindFluxXE(x,prm,h,i,j,k,-1)
892                          -UpwindFluxYS(x,prm,h,i,j,k, 1)
893                          -UpwindFluxYS(x,prm,h,i,j,k,-1)
894                          +UpwindFluxYN(x,prm,h,i,j,k, 1)
895                          +UpwindFluxYN(x,prm,h,i,j,k,-1));
896         }
897       }
898       /* printf("div[%d][%d] %g\n",i,j,div); */
899       THIErosion(thi,&x[i][j][0],&erate,NULL);
900       f[i][j].b     = prmdot[i][j].b - erate;
901       f[i][j].h     = prmdot[i][j].h + div;
902       f[i][j].beta2 = prmdot[i][j].beta2;
903     }
904   }
905   PetscFunctionReturn(0);
906 }
907 
908 static PetscErrorCode THIFunction(TS ts,PetscReal t,Vec X,Vec Xdot,Vec F,void *ctx)
909 {
910   PetscErrorCode ierr;
911   THI            thi = (THI)ctx;
912   DM             pack,da3,da2;
913   Vec            X3,X2,Xdot3,Xdot2,F3,F2,F3g,F2g;
914   const Node     ***x3,***xdot3;
915   const PrmNode  **x2,**xdot2;
916   Node           ***f3;
917   PrmNode        **f2;
918   DMDALocalInfo  info3;
919 
920   PetscFunctionBeginUser;
921   ierr = TSGetDM(ts,&pack);CHKERRQ(ierr);
922   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
923   ierr = DMDAGetLocalInfo(da3,&info3);CHKERRQ(ierr);
924   ierr = DMCompositeGetLocalVectors(pack,&X3,&X2);CHKERRQ(ierr);
925   ierr = DMCompositeGetLocalVectors(pack,&Xdot3,&Xdot2);CHKERRQ(ierr);
926   ierr = DMCompositeScatter(pack,X,X3,X2);CHKERRQ(ierr);
927   ierr = THIFixGhosts(thi,da3,da2,X3,X2);CHKERRQ(ierr);
928   ierr = DMCompositeScatter(pack,Xdot,Xdot3,Xdot2);CHKERRQ(ierr);
929 
930   ierr = DMGetLocalVector(da3,&F3);CHKERRQ(ierr);
931   ierr = DMGetLocalVector(da2,&F2);CHKERRQ(ierr);
932   ierr = VecZeroEntries(F3);CHKERRQ(ierr);
933 
934   ierr = DMDAVecGetArray(da3,X3,&x3);CHKERRQ(ierr);
935   ierr = DMDAVecGetArray(da2,X2,&x2);CHKERRQ(ierr);
936   ierr = DMDAVecGetArray(da3,Xdot3,&xdot3);CHKERRQ(ierr);
937   ierr = DMDAVecGetArray(da2,Xdot2,&xdot2);CHKERRQ(ierr);
938   ierr = DMDAVecGetArray(da3,F3,&f3);CHKERRQ(ierr);
939   ierr = DMDAVecGetArray(da2,F2,&f2);CHKERRQ(ierr);
940 
941   ierr = THIFunctionLocal_3D(&info3,x3,x2,xdot3,f3,thi);CHKERRQ(ierr);
942   ierr = THIFunctionLocal_2D(&info3,x3,x2,xdot2,f2,thi);CHKERRQ(ierr);
943 
944   ierr = DMDAVecRestoreArray(da3,X3,&x3);CHKERRQ(ierr);
945   ierr = DMDAVecRestoreArray(da2,X2,&x2);CHKERRQ(ierr);
946   ierr = DMDAVecRestoreArray(da3,Xdot3,&xdot3);CHKERRQ(ierr);
947   ierr = DMDAVecRestoreArray(da2,Xdot2,&xdot2);CHKERRQ(ierr);
948   ierr = DMDAVecRestoreArray(da3,F3,&f3);CHKERRQ(ierr);
949   ierr = DMDAVecRestoreArray(da2,F2,&f2);CHKERRQ(ierr);
950 
951   ierr = DMCompositeRestoreLocalVectors(pack,&X3,&X2);CHKERRQ(ierr);
952   ierr = DMCompositeRestoreLocalVectors(pack,&Xdot3,&Xdot2);CHKERRQ(ierr);
953 
954   ierr = VecZeroEntries(F);CHKERRQ(ierr);
955   ierr = DMCompositeGetAccess(pack,F,&F3g,&F2g);CHKERRQ(ierr);
956   ierr = DMLocalToGlobalBegin(da3,F3,ADD_VALUES,F3g);CHKERRQ(ierr);
957   ierr = DMLocalToGlobalEnd  (da3,F3,ADD_VALUES,F3g);CHKERRQ(ierr);
958   ierr = DMLocalToGlobalBegin(da2,F2,INSERT_VALUES,F2g);CHKERRQ(ierr);
959   ierr = DMLocalToGlobalEnd  (da2,F2,INSERT_VALUES,F2g);CHKERRQ(ierr);
960 
961   if (thi->verbose) {
962     PetscViewer viewer;
963     ierr = PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi),&viewer);CHKERRQ(ierr);
964     ierr = PetscViewerASCIIPrintf(viewer,"3D_Velocity residual (bs=2):\n");CHKERRQ(ierr);
965     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
966     ierr = VecView(F3,viewer);CHKERRQ(ierr);
967     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
968     ierr = PetscViewerASCIIPrintf(viewer,"2D_Fields residual (bs=3):\n");CHKERRQ(ierr);
969     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
970     ierr = VecView(F2,viewer);CHKERRQ(ierr);
971     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
972   }
973 
974   ierr = DMCompositeRestoreAccess(pack,F,&F3g,&F2g);CHKERRQ(ierr);
975 
976   ierr = DMRestoreLocalVector(da3,&F3);CHKERRQ(ierr);
977   ierr = DMRestoreLocalVector(da2,&F2);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
982 {
983   PetscErrorCode ierr;
984   PetscReal      nrm;
985   PetscInt       m;
986   PetscMPIInt    rank;
987 
988   PetscFunctionBeginUser;
989   ierr = MatNorm(B,NORM_FROBENIUS,&nrm);CHKERRQ(ierr);
990   ierr = MatGetSize(B,&m,0);CHKERRQ(ierr);
991   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank);CHKERRMPI(ierr);
992   if (rank == 0) {
993     PetscScalar val0,val2;
994     ierr = MatGetValue(B,0,0,&val0);CHKERRQ(ierr);
995     ierr = MatGetValue(B,2,2,&val2);CHKERRQ(ierr);
996     ierr = PetscViewerASCIIPrintf(viewer,"Matrix dim %8d  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n",m,nrm,PetscRealPart(val0),PetscRealPart(val2),thi->eta.cmin,thi->eta.cmax,thi->beta2.cmin,thi->beta2.cmax);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 static PetscErrorCode THISurfaceStatistics(DM pack,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
1002 {
1003   PetscErrorCode ierr;
1004   DM             da3,da2;
1005   Vec            X3,X2;
1006   Node           ***x;
1007   PetscInt       i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
1008   PetscReal      umin = 1e100,umax=-1e100;
1009   PetscScalar    usum =0.0,gusum;
1010 
1011   PetscFunctionBeginUser;
1012   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
1013   ierr = DMCompositeGetAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1014   *min = *max = *mean = 0;
1015   ierr = DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1016   ierr = DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr);
1017   PetscCheckFalse(zs != 0 || zm != mz,PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Unexpected decomposition");
1018   ierr = DMDAVecGetArray(da3,X3,&x);CHKERRQ(ierr);
1019   for (i=xs; i<xs+xm; i++) {
1020     for (j=ys; j<ys+ym; j++) {
1021       PetscReal u = PetscRealPart(x[i][j][zm-1].u);
1022       RangeUpdate(&umin,&umax,u);
1023       usum += u;
1024     }
1025   }
1026   ierr = DMDAVecRestoreArray(da3,X3,&x);CHKERRQ(ierr);
1027   ierr = DMCompositeRestoreAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1028 
1029   ierr  = MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da3));CHKERRMPI(ierr);
1030   ierr  = MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da3));CHKERRMPI(ierr);
1031   ierr  = MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da3));CHKERRMPI(ierr);
1032   *mean = PetscRealPart(gusum) / (mx*my);
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 static PetscErrorCode THISolveStatistics(THI thi,TS ts,PetscInt coarsened,const char name[])
1037 {
1038   MPI_Comm       comm;
1039   DM             pack;
1040   Vec            X,X3,X2;
1041   PetscErrorCode ierr;
1042 
1043   PetscFunctionBeginUser;
1044   ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
1045   ierr = TSGetDM(ts,&pack);CHKERRQ(ierr);
1046   ierr = TSGetSolution(ts,&X);CHKERRQ(ierr);
1047   ierr = DMCompositeGetAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1048   ierr = PetscPrintf(comm,"Solution statistics after solve: %s\n",name);CHKERRQ(ierr);
1049   {
1050     PetscInt            its,lits;
1051     SNESConvergedReason reason;
1052     SNES                snes;
1053     ierr = TSGetSNES(ts,&snes);CHKERRQ(ierr);
1054     ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
1055     ierr = SNESGetConvergedReason(snes,&reason);CHKERRQ(ierr);
1056     ierr = SNESGetLinearSolveIterations(snes,&lits);CHKERRQ(ierr);
1057     ierr = PetscPrintf(comm,"%s: Number of SNES iterations = %d, total linear iterations = %d\n",SNESConvergedReasons[reason],its,lits);CHKERRQ(ierr);
1058   }
1059   {
1060     PetscReal   nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
1061     PetscInt    i,j,m;
1062     PetscScalar *x;
1063     ierr = VecNorm(X3,NORM_2,&nrm2);CHKERRQ(ierr);
1064     ierr = VecGetLocalSize(X3,&m);CHKERRQ(ierr);
1065     ierr = VecGetArray(X3,&x);CHKERRQ(ierr);
1066     for (i=0; i<m; i+=2) {
1067       PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
1068       tmin[0] = PetscMin(u,tmin[0]);
1069       tmin[1] = PetscMin(v,tmin[1]);
1070       tmin[2] = PetscMin(c,tmin[2]);
1071       tmax[0] = PetscMax(u,tmax[0]);
1072       tmax[1] = PetscMax(v,tmax[1]);
1073       tmax[2] = PetscMax(c,tmax[2]);
1074     }
1075     ierr = VecRestoreArray(X,&x);CHKERRQ(ierr);
1076     ierr = MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi));CHKERRMPI(ierr);
1077     ierr = MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi));CHKERRMPI(ierr);
1078     /* Dimensionalize to meters/year */
1079     nrm2 *= thi->units->year / thi->units->meter;
1080     for (j=0; j<3; j++) {
1081       min[j] *= thi->units->year / thi->units->meter;
1082       max[j] *= thi->units->year / thi->units->meter;
1083     }
1084     ierr = PetscPrintf(comm,"|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n",nrm2,min[0],max[0],min[1],max[1],min[2],max[2]);CHKERRQ(ierr);
1085     {
1086       PetscReal umin,umax,umean;
1087       ierr   = THISurfaceStatistics(pack,X,&umin,&umax,&umean);CHKERRQ(ierr);
1088       umin  *= thi->units->year / thi->units->meter;
1089       umax  *= thi->units->year / thi->units->meter;
1090       umean *= thi->units->year / thi->units->meter;
1091       ierr   = PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",umin,umax,umean);CHKERRQ(ierr);
1092     }
1093     /* These values stay nondimensional */
1094     ierr = PetscPrintf(comm,"Global eta range   [%g, %g], converged range [%g, %g]\n",thi->eta.min,thi->eta.max,thi->eta.cmin,thi->eta.cmax);CHKERRQ(ierr);
1095     ierr = PetscPrintf(comm,"Global beta2 range [%g, %g], converged range [%g, %g]\n",thi->beta2.min,thi->beta2.max,thi->beta2.cmin,thi->beta2.cmax);CHKERRQ(ierr);
1096   }
1097   ierr = PetscPrintf(comm,"\n");CHKERRQ(ierr);
1098   ierr = DMCompositeRestoreAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info,PetscInt i,PetscInt j,PetscInt k)
1103 {return ((i-info->gzs)*info->gym + (j-info->gys))*info->gxm + (k-info->gxs);}
1104 static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info,PetscInt i,PetscInt j)
1105 {return (i-info->gzs)*info->gym + (j-info->gys);}
1106 
1107 static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,Mat B,Mat Bcpl,THI thi)
1108 {
1109   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1110   PetscReal      hx,hy;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBeginUser;
1114   xs = info->zs;
1115   ys = info->ys;
1116   xm = info->zm;
1117   ym = info->ym;
1118   zm = info->xm;
1119   hx = thi->Lx / info->mz;
1120   hy = thi->Ly / info->my;
1121 
1122   for (i=xs; i<xs+xm; i++) {
1123     for (j=ys; j<ys+ym; j++) {
1124       PrmNode pn[4],dpn[4][2];
1125       QuadExtract(prm,i,j,pn);
1126       ierr = QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn);CHKERRQ(ierr);
1127       for (k=0; k<zm-1; k++) {
1128         Node        n[8];
1129         PetscReal   zn[8],etabase = 0;
1130         PetscScalar Ke[8*NODE_SIZE][8*NODE_SIZE],Kcpl[8*NODE_SIZE][4*PRMNODE_SIZE];
1131         PetscInt    ls = 0;
1132 
1133         PrmHexGetZ(pn,k,zm,zn);
1134         HexExtract(x,i,j,k,n);
1135         ierr = PetscMemzero(Ke,sizeof(Ke));CHKERRQ(ierr);
1136         ierr = PetscMemzero(Kcpl,sizeof(Kcpl));CHKERRQ(ierr);
1137         if (thi->no_slip && k == 0) {
1138           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1139           ls = 4;
1140         }
1141         for (q=0; q<8; q++) {
1142           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
1143           PetscScalar du[3],dv[3],u,v;
1144           HexGrad(HexQDeriv[q],zn,dz);
1145           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1146           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1147           jw /= thi->rhog;      /* residuals are scaled by this factor */
1148           if (q == 0) etabase = eta;
1149           for (l=ls; l<8; l++) { /* test functions */
1150             const PetscReal pp=phi[l],*restrict dp = dphi[l];
1151             for (ll=ls; ll<8; ll++) { /* trial functions */
1152               const PetscReal *restrict dpl = dphi[ll];
1153               PetscScalar dgdu,dgdv;
1154               dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1155               dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1156               /* Picard part */
1157               Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1158               Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1159               Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1160               Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1161               /* extra Newton terms */
1162               Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1163               Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1164               Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1165               Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1166               /* inertial part */
1167               Ke[l*2+0][ll*2+0] += pp*jw*thi->inertia*pp;
1168               Ke[l*2+1][ll*2+1] += pp*jw*thi->inertia*pp;
1169             }
1170             for (ll=0; ll<4; ll++) { /* Trial functions for surface/bed */
1171               const PetscReal dpl[] = {QuadQDeriv[q%4][ll][0]/hx, QuadQDeriv[q%4][ll][1]/hy}; /* surface = h + b */
1172               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[0];
1173               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[0];
1174               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[1];
1175               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[1];
1176             }
1177           }
1178         }
1179         if (k == 0) { /* on a bottom face */
1180           if (thi->no_slip) {
1181             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1);
1182             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1183             Ke[0][0] = thi->dirichlet_scale*diagu;
1184             Ke[0][1] = 0;
1185             Ke[1][0] = 0;
1186             Ke[1][1] = thi->dirichlet_scale*diagv;
1187           } else {
1188             for (q=0; q<4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1189               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
1190               PetscScalar     u  =0,v=0,rbeta2=0;
1191               PetscReal       beta2,dbeta2;
1192               for (l=0; l<4; l++) {
1193                 u      += phi[l]*n[l].u;
1194                 v      += phi[l]*n[l].v;
1195                 rbeta2 += phi[l]*pn[l].beta2;
1196               }
1197               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1198               for (l=0; l<4; l++) {
1199                 const PetscReal pp = phi[l];
1200                 for (ll=0; ll<4; ll++) {
1201                   const PetscReal ppl = phi[ll];
1202                   Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1203                   Ke[l*2+0][ll*2+1] +=                   pp*jw*dbeta2*u*v*ppl;
1204                   Ke[l*2+1][ll*2+0] +=                   pp*jw*dbeta2*v*u*ppl;
1205                   Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1206                 }
1207               }
1208             }
1209           }
1210         }
1211         {
1212           const PetscInt rc3blocked[8] = {
1213             DMDALocalIndex3D(info,i+0,j+0,k+0),
1214             DMDALocalIndex3D(info,i+1,j+0,k+0),
1215             DMDALocalIndex3D(info,i+1,j+1,k+0),
1216             DMDALocalIndex3D(info,i+0,j+1,k+0),
1217             DMDALocalIndex3D(info,i+0,j+0,k+1),
1218             DMDALocalIndex3D(info,i+1,j+0,k+1),
1219             DMDALocalIndex3D(info,i+1,j+1,k+1),
1220             DMDALocalIndex3D(info,i+0,j+1,k+1)
1221           },col2blocked[PRMNODE_SIZE*4] = {
1222             DMDALocalIndex2D(info,i+0,j+0),
1223             DMDALocalIndex2D(info,i+1,j+0),
1224             DMDALocalIndex2D(info,i+1,j+1),
1225             DMDALocalIndex2D(info,i+0,j+1)
1226           };
1227 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1228           for (l=0; l<8; l++) {
1229             for (ll=l+1; ll<8; ll++) {
1230               Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1231               Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1232               Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1233               Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1234             }
1235           }
1236 #endif
1237           ierr = MatSetValuesBlockedLocal(B,8,rc3blocked,8,rc3blocked,&Ke[0][0],ADD_VALUES);CHKERRQ(ierr); /* velocity-velocity coupling can use blocked insertion */
1238           {                     /* The off-diagonal part cannot (yet) */
1239             PetscInt row3scalar[NODE_SIZE*8],col2scalar[PRMNODE_SIZE*4];
1240             for (l=0; l<8; l++) for (ll=0; ll<NODE_SIZE; ll++) row3scalar[l*NODE_SIZE+ll] = rc3blocked[l]*NODE_SIZE+ll;
1241             for (l=0; l<4; l++) for (ll=0; ll<PRMNODE_SIZE; ll++) col2scalar[l*PRMNODE_SIZE+ll] = col2blocked[l]*PRMNODE_SIZE+ll;
1242             ierr = MatSetValuesLocal(Bcpl,8*NODE_SIZE,row3scalar,4*PRMNODE_SIZE,col2scalar,&Kcpl[0][0],ADD_VALUES);CHKERRQ(ierr);
1243           }
1244         }
1245       }
1246     }
1247   }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,const Node ***x3,const PrmNode **x2,const PrmNode **xdot2,PetscReal a,Mat B22,Mat B21,THI thi)
1252 {
1253   PetscErrorCode ierr;
1254   PetscInt       xs,ys,xm,ym,zm,i,j,k;
1255 
1256   PetscFunctionBeginUser;
1257   xs = info->zs;
1258   ys = info->ys;
1259   xm = info->zm;
1260   ym = info->ym;
1261   zm = info->xm;
1262 
1263   PetscCheckFalse(zm > 1024,((PetscObject)info->da)->comm,PETSC_ERR_SUP,"Need to allocate more space");
1264   for (i=xs; i<xs+xm; i++) {
1265     for (j=ys; j<ys+ym; j++) {
1266       {                         /* Self-coupling */
1267         const PetscInt    row[]  = {DMDALocalIndex2D(info,i,j)};
1268         const PetscInt    col[]  = {DMDALocalIndex2D(info,i,j)};
1269         const PetscScalar vals[] = {
1270           a,0,0,
1271           0,a,0,
1272           0,0,a
1273         };
1274         ierr = MatSetValuesBlockedLocal(B22,1,row,1,col,vals,INSERT_VALUES);CHKERRQ(ierr);
1275       }
1276       for (k=0; k<zm; k++) {    /* Coupling to velocity problem */
1277         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1278         const PetscInt row[]  = {FieldIndex(PrmNode,DMDALocalIndex2D(info,i,j),h)};
1279         const PetscInt cols[] = {
1280           FieldIndex(Node,DMDALocalIndex3D(info,i-1,j,k),u),
1281           FieldIndex(Node,DMDALocalIndex3D(info,i  ,j,k),u),
1282           FieldIndex(Node,DMDALocalIndex3D(info,i+1,j,k),u),
1283           FieldIndex(Node,DMDALocalIndex3D(info,i,j-1,k),v),
1284           FieldIndex(Node,DMDALocalIndex3D(info,i,j  ,k),v),
1285           FieldIndex(Node,DMDALocalIndex3D(info,i,j+1,k),v)
1286         };
1287         const PetscScalar
1288           w  = (k && k<zm-1) ? 0.5 : 0.25,
1289           hW = w*(x2[i-1][j  ].h+x2[i  ][j  ].h)/(zm-1.),
1290           hE = w*(x2[i  ][j  ].h+x2[i+1][j  ].h)/(zm-1.),
1291           hS = w*(x2[i  ][j-1].h+x2[i  ][j  ].h)/(zm-1.),
1292           hN = w*(x2[i  ][j  ].h+x2[i  ][j+1].h)/(zm-1.);
1293         PetscScalar *vals,
1294                      vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0),
1295                                       ((PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW),
1296                                       ((PetscRealPart(x3[i][j][k].u) > 0) ?  0  : +hE),
1297                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0),
1298                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS),
1299                                       ((PetscRealPart(x3[i][j][k].v) > 0) ?  0  : +hN)},
1300                      vals_centered[] = {-0.5*hW, 0.5*(-hW+hE), 0.5*hE,
1301                                         -0.5*hS, 0.5*(-hS+hN), 0.5*hN};
1302         vals = 1 ? vals_upwind : vals_centered;
1303         if (k == 0) {
1304           Node derate;
1305           THIErosion(thi,&x3[i][j][0],NULL,&derate);
1306           vals[1] -= derate.u;
1307           vals[4] -= derate.v;
1308         }
1309         ierr = MatSetValuesLocal(B21,1,row,6,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
1310       }
1311     }
1312   }
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 static PetscErrorCode THIJacobian(TS ts,PetscReal t,Vec X,Vec Xdot,PetscReal a,Mat A,Mat B,void *ctx)
1317 {
1318   PetscErrorCode ierr;
1319   THI            thi = (THI)ctx;
1320   DM             pack,da3,da2;
1321   Vec            X3,X2,Xdot2;
1322   Mat            B11,B12,B21,B22;
1323   DMDALocalInfo  info3;
1324   IS             *isloc;
1325   const Node     ***x3;
1326   const PrmNode  **x2,**xdot2;
1327 
1328   PetscFunctionBeginUser;
1329   ierr = TSGetDM(ts,&pack);CHKERRQ(ierr);
1330   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
1331   ierr = DMDAGetLocalInfo(da3,&info3);CHKERRQ(ierr);
1332   ierr = DMCompositeGetLocalVectors(pack,&X3,&X2);CHKERRQ(ierr);
1333   ierr = DMCompositeGetLocalVectors(pack,NULL,&Xdot2);CHKERRQ(ierr);
1334   ierr = DMCompositeScatter(pack,X,X3,X2);CHKERRQ(ierr);
1335   ierr = THIFixGhosts(thi,da3,da2,X3,X2);CHKERRQ(ierr);
1336   ierr = DMCompositeScatter(pack,Xdot,NULL,Xdot2);CHKERRQ(ierr);
1337 
1338   ierr = MatZeroEntries(B);CHKERRQ(ierr);
1339 
1340   ierr = DMCompositeGetLocalISs(pack,&isloc);CHKERRQ(ierr);
1341   ierr = MatGetLocalSubMatrix(B,isloc[0],isloc[0],&B11);CHKERRQ(ierr);
1342   ierr = MatGetLocalSubMatrix(B,isloc[0],isloc[1],&B12);CHKERRQ(ierr);
1343   ierr = MatGetLocalSubMatrix(B,isloc[1],isloc[0],&B21);CHKERRQ(ierr);
1344   ierr = MatGetLocalSubMatrix(B,isloc[1],isloc[1],&B22);CHKERRQ(ierr);
1345 
1346   ierr = DMDAVecGetArray(da3,X3,&x3);CHKERRQ(ierr);
1347   ierr = DMDAVecGetArray(da2,X2,&x2);CHKERRQ(ierr);
1348   ierr = DMDAVecGetArray(da2,Xdot2,&xdot2);CHKERRQ(ierr);
1349 
1350   ierr = THIJacobianLocal_Momentum(&info3,x3,x2,B11,B12,thi);CHKERRQ(ierr);
1351 
1352   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1353   ierr = MatAssemblyBegin(B,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
1354   ierr = MatAssemblyEnd(B,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
1355 
1356   ierr = THIJacobianLocal_2D(&info3,x3,x2,xdot2,a,B22,B21,thi);CHKERRQ(ierr);
1357 
1358   ierr = DMDAVecRestoreArray(da3,X3,&x3);CHKERRQ(ierr);
1359   ierr = DMDAVecRestoreArray(da2,X2,&x2);CHKERRQ(ierr);
1360   ierr = DMDAVecRestoreArray(da2,Xdot2,&xdot2);CHKERRQ(ierr);
1361 
1362   ierr = MatRestoreLocalSubMatrix(B,isloc[0],isloc[0],&B11);CHKERRQ(ierr);
1363   ierr = MatRestoreLocalSubMatrix(B,isloc[0],isloc[1],&B12);CHKERRQ(ierr);
1364   ierr = MatRestoreLocalSubMatrix(B,isloc[1],isloc[0],&B21);CHKERRQ(ierr);
1365   ierr = MatRestoreLocalSubMatrix(B,isloc[1],isloc[1],&B22);CHKERRQ(ierr);
1366   ierr = ISDestroy(&isloc[0]);CHKERRQ(ierr);
1367   ierr = ISDestroy(&isloc[1]);CHKERRQ(ierr);
1368   ierr = PetscFree(isloc);CHKERRQ(ierr);
1369 
1370   ierr = DMCompositeRestoreLocalVectors(pack,&X3,&X2);CHKERRQ(ierr);
1371   ierr = DMCompositeRestoreLocalVectors(pack,0,&Xdot2);CHKERRQ(ierr);
1372 
1373   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1374   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1375   if (A != B) {
1376     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1378   }
1379   if (thi->verbose) {ierr = THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);}
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1384  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1385  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1386  */
1387 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM pack,Vec X,const char filename[],const char filename2[])
1388 {
1389   const PetscInt dof   = NODE_SIZE,dof2 = PRMNODE_SIZE;
1390   Units          units = thi->units;
1391   MPI_Comm       comm;
1392   PetscErrorCode ierr;
1393   PetscViewer    viewer3,viewer2;
1394   PetscMPIInt    rank,size,tag,nn,nmax,nn2,nmax2;
1395   PetscInt       mx,my,mz,r,range[6];
1396   PetscScalar    *x,*x2;
1397   DM             da3,da2;
1398   Vec            X3,X2;
1399 
1400   PetscFunctionBeginUser;
1401   ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
1402   ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
1403   ierr = DMCompositeGetAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1404   ierr = DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1405   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1406   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
1407   ierr = PetscViewerASCIIOpen(comm,filename,&viewer3);CHKERRQ(ierr);
1408   ierr = PetscViewerASCIIOpen(comm,filename2,&viewer2);CHKERRQ(ierr);
1409   ierr = PetscViewerASCIIPrintf(viewer3,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");CHKERRQ(ierr);
1410   ierr = PetscViewerASCIIPrintf(viewer2,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");CHKERRQ(ierr);
1411   ierr = PetscViewerASCIIPrintf(viewer3,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,mz-1,0,my-1,0,mx-1);CHKERRQ(ierr);
1412   ierr = PetscViewerASCIIPrintf(viewer2,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,0,0,my-1,0,mx-1);CHKERRQ(ierr);
1413 
1414   ierr = DMDAGetCorners(da3,range,range+1,range+2,range+3,range+4,range+5);CHKERRQ(ierr);
1415   ierr = PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn);CHKERRQ(ierr);
1416   ierr = MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);CHKERRMPI(ierr);
1417   ierr = PetscMPIIntCast(range[4]*range[5]*dof2,&nn2);CHKERRQ(ierr);
1418   ierr = MPI_Reduce(&nn2,&nmax2,1,MPI_INT,MPI_MAX,0,comm);CHKERRMPI(ierr);
1419   tag  = ((PetscObject)viewer3)->tag;
1420   ierr = VecGetArrayRead(X3,(const PetscScalar**)&x);CHKERRQ(ierr);
1421   ierr = VecGetArrayRead(X2,(const PetscScalar**)&x2);CHKERRQ(ierr);
1422   if (rank == 0) {
1423     PetscScalar *array,*array2;
1424     ierr = PetscMalloc2(nmax,&array,nmax2,&array2);CHKERRQ(ierr);
1425     for (r=0; r<size; r++) {
1426       PetscInt    i,j,k,f,xs,xm,ys,ym,zs,zm;
1427       Node        *y3;
1428       PetscScalar (*y2)[PRMNODE_SIZE];
1429       MPI_Status status;
1430 
1431       if (r) {
1432         ierr = MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);CHKERRMPI(ierr);
1433       }
1434       zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1435       PetscCheckFalse(xm*ym*zm*dof > nmax,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen");
1436       if (r) {
1437         ierr = MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);CHKERRMPI(ierr);
1438         ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn);CHKERRMPI(ierr);
1439         PetscCheckFalse(nn != xm*ym*zm*dof,PETSC_COMM_SELF,PETSC_ERR_PLIB,"corrupt da3 send");
1440         y3   = (Node*)array;
1441         ierr = MPI_Recv(array2,nmax2,MPIU_SCALAR,r,tag,comm,&status);CHKERRMPI(ierr);
1442         ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn2);CHKERRMPI(ierr);
1443         PetscCheckFalse(nn2 != xm*ym*dof2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"corrupt da2 send");
1444         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1445       } else {
1446         y3 = (Node*)x;
1447         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1448       }
1449       ierr = PetscViewerASCIIPrintf(viewer3,"    <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);CHKERRQ(ierr);
1450       ierr = PetscViewerASCIIPrintf(viewer2,"    <Piece Extent=\"%d %d %D %D %D %D\">\n",0,0,ys,ys+ym-1,xs,xs+xm-1);CHKERRQ(ierr);
1451 
1452       ierr = PetscViewerASCIIPrintf(viewer3,"      <Points>\n");CHKERRQ(ierr);
1453       ierr = PetscViewerASCIIPrintf(viewer2,"      <Points>\n");CHKERRQ(ierr);
1454       ierr = PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1455       ierr = PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1456       for (i=xs; i<xs+xm; i++) {
1457         for (j=ys; j<ys+ym; j++) {
1458           PetscReal
1459             xx = thi->Lx*i/mx,
1460             yy = thi->Ly*j/my,
1461             b  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,b)]),
1462             h  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,h)]);
1463           for (k=zs; k<zs+zm; k++) {
1464             PetscReal zz = b + h*k/(mz-1.);
1465             ierr = PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",xx,yy,zz);CHKERRQ(ierr);
1466           }
1467           ierr = PetscViewerASCIIPrintf(viewer2,"%f %f %f\n",xx,yy,(double)0.0);CHKERRQ(ierr);
1468         }
1469       }
1470       ierr = PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");CHKERRQ(ierr);
1471       ierr = PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n");CHKERRQ(ierr);
1472       ierr = PetscViewerASCIIPrintf(viewer3,"      </Points>\n");CHKERRQ(ierr);
1473       ierr = PetscViewerASCIIPrintf(viewer2,"      </Points>\n");CHKERRQ(ierr);
1474 
1475       {                         /* Velocity and rank (3D) */
1476         ierr = PetscViewerASCIIPrintf(viewer3,"      <PointData>\n");CHKERRQ(ierr);
1477         ierr = PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr);
1478         for (i=0; i<nn/dof; i++) {
1479           ierr = PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",PetscRealPart(y3[i].u)*units->year/units->meter,PetscRealPart(y3[i].v)*units->year/units->meter,0.0);CHKERRQ(ierr);
1480         }
1481         ierr = PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");CHKERRQ(ierr);
1482 
1483         ierr = PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");CHKERRQ(ierr);
1484         for (i=0; i<nn; i+=dof) {
1485           ierr = PetscViewerASCIIPrintf(viewer3,"%D\n",r);CHKERRQ(ierr);
1486         }
1487         ierr = PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n");CHKERRQ(ierr);
1488         ierr = PetscViewerASCIIPrintf(viewer3,"      </PointData>\n");CHKERRQ(ierr);
1489       }
1490 
1491       {                         /* 2D */
1492         ierr = PetscViewerASCIIPrintf(viewer2,"      <PointData>\n");CHKERRQ(ierr);
1493         for (f=0; f<PRMNODE_SIZE; f++) {
1494           const char *fieldname;
1495           ierr = DMDAGetFieldName(da2,f,&fieldname);CHKERRQ(ierr);
1496           ierr = PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n",fieldname);CHKERRQ(ierr);
1497           for (i=0; i<nn2/PRMNODE_SIZE; i++) {
1498             ierr = PetscViewerASCIIPrintf(viewer2,"%g\n",y2[i][f]);CHKERRQ(ierr);
1499           }
1500           ierr = PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n");CHKERRQ(ierr);
1501         }
1502         ierr = PetscViewerASCIIPrintf(viewer2,"      </PointData>\n");CHKERRQ(ierr);
1503       }
1504 
1505       ierr = PetscViewerASCIIPrintf(viewer3,"    </Piece>\n");CHKERRQ(ierr);
1506       ierr = PetscViewerASCIIPrintf(viewer2,"    </Piece>\n");CHKERRQ(ierr);
1507     }
1508     ierr = PetscFree2(array,array2);CHKERRQ(ierr);
1509   } else {
1510     ierr = MPI_Send(range,6,MPIU_INT,0,tag,comm);CHKERRMPI(ierr);
1511     ierr = MPI_Send(x,nn,MPIU_SCALAR,0,tag,comm);CHKERRMPI(ierr);
1512     ierr = MPI_Send(x2,nn2,MPIU_SCALAR,0,tag,comm);CHKERRMPI(ierr);
1513   }
1514   ierr = VecRestoreArrayRead(X3,(const PetscScalar**)&x);CHKERRQ(ierr);
1515   ierr = VecRestoreArrayRead(X2,(const PetscScalar**)&x2);CHKERRQ(ierr);
1516   ierr = PetscViewerASCIIPrintf(viewer3,"  </StructuredGrid>\n");CHKERRQ(ierr);
1517   ierr = PetscViewerASCIIPrintf(viewer2,"  </StructuredGrid>\n");CHKERRQ(ierr);
1518 
1519   ierr = DMCompositeRestoreAccess(pack,X,&X3,&X2);CHKERRQ(ierr);
1520   ierr = PetscViewerASCIIPrintf(viewer3,"</VTKFile>\n");CHKERRQ(ierr);
1521   ierr = PetscViewerASCIIPrintf(viewer2,"</VTKFile>\n");CHKERRQ(ierr);
1522   ierr = PetscViewerDestroy(&viewer3);CHKERRQ(ierr);
1523   ierr = PetscViewerDestroy(&viewer2);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 static PetscErrorCode THITSMonitor(TS ts,PetscInt step,PetscReal t,Vec X,void *ctx)
1528 {
1529   PetscErrorCode ierr;
1530   THI            thi = (THI)ctx;
1531   DM             pack;
1532   char           filename3[PETSC_MAX_PATH_LEN],filename2[PETSC_MAX_PATH_LEN];
1533 
1534   PetscFunctionBeginUser;
1535   if (step < 0) PetscFunctionReturn(0); /* negative one is used to indicate an interpolated solution */
1536   ierr = PetscPrintf(PetscObjectComm((PetscObject)ts),"%3D: t=%g\n",step,(double)t);CHKERRQ(ierr);
1537   if (thi->monitor_interval && step % thi->monitor_interval) PetscFunctionReturn(0);
1538   ierr = TSGetDM(ts,&pack);CHKERRQ(ierr);
1539   ierr = PetscSNPrintf(filename3,sizeof(filename3),"%s-3d-%03d.vts",thi->monitor_basename,step);CHKERRQ(ierr);
1540   ierr = PetscSNPrintf(filename2,sizeof(filename2),"%s-2d-%03d.vts",thi->monitor_basename,step);CHKERRQ(ierr);
1541   ierr = THIDAVecView_VTK_XML(thi,pack,X,filename3,filename2);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 static PetscErrorCode THICreateDM3d(THI thi,DM *dm3d)
1546 {
1547   MPI_Comm       comm;
1548   PetscInt       M    = 3,N = 3,P = 2;
1549   DM             da;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBeginUser;
1553   ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr);
1554   ierr = PetscOptionsBegin(comm,NULL,"Grid resolution options","");CHKERRQ(ierr);
1555   {
1556     ierr = PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);CHKERRQ(ierr);
1557     N    = M;
1558     ierr = PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);CHKERRQ(ierr);
1559     ierr = PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);CHKERRQ(ierr);
1560   }
1561   ierr  = PetscOptionsEnd();CHKERRQ(ierr);
1562   ierr  = DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);CHKERRQ(ierr);
1563   ierr = DMSetFromOptions(da);CHKERRQ(ierr);
1564   ierr = DMSetUp(da);CHKERRQ(ierr);
1565   ierr  = DMDASetFieldName(da,0,"x-velocity");CHKERRQ(ierr);
1566   ierr  = DMDASetFieldName(da,1,"y-velocity");CHKERRQ(ierr);
1567   *dm3d = da;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 int main(int argc,char *argv[])
1572 {
1573   MPI_Comm       comm;
1574   DM             pack,da3,da2;
1575   TS             ts;
1576   THI            thi;
1577   Vec            X;
1578   Mat            B;
1579   PetscInt       i,steps;
1580   PetscReal      ftime;
1581   PetscErrorCode ierr;
1582 
1583   ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr;
1584   comm = PETSC_COMM_WORLD;
1585 
1586   ierr = THICreate(comm,&thi);CHKERRQ(ierr);
1587   ierr = THICreateDM3d(thi,&da3);CHKERRQ(ierr);
1588   {
1589     PetscInt        Mx,My,mx,my,s;
1590     DMDAStencilType st;
1591     ierr = DMDAGetInfo(da3,0, 0,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);CHKERRQ(ierr);
1592     ierr = DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2);CHKERRQ(ierr);
1593     ierr = DMSetUp(da2);CHKERRQ(ierr);
1594   }
1595 
1596   ierr = PetscObjectSetName((PetscObject)da3,"3D_Velocity");CHKERRQ(ierr);
1597   ierr = DMSetOptionsPrefix(da3,"f3d_");CHKERRQ(ierr);
1598   ierr = DMDASetFieldName(da3,0,"u");CHKERRQ(ierr);
1599   ierr = DMDASetFieldName(da3,1,"v");CHKERRQ(ierr);
1600   ierr = PetscObjectSetName((PetscObject)da2,"2D_Fields");CHKERRQ(ierr);
1601   ierr = DMSetOptionsPrefix(da2,"f2d_");CHKERRQ(ierr);
1602   ierr = DMDASetFieldName(da2,0,"b");CHKERRQ(ierr);
1603   ierr = DMDASetFieldName(da2,1,"h");CHKERRQ(ierr);
1604   ierr = DMDASetFieldName(da2,2,"beta2");CHKERRQ(ierr);
1605   ierr = DMCompositeCreate(comm,&pack);CHKERRQ(ierr);
1606   ierr = DMCompositeAddDM(pack,da3);CHKERRQ(ierr);
1607   ierr = DMCompositeAddDM(pack,da2);CHKERRQ(ierr);
1608   ierr = DMDestroy(&da3);CHKERRQ(ierr);
1609   ierr = DMDestroy(&da2);CHKERRQ(ierr);
1610   ierr = DMSetUp(pack);CHKERRQ(ierr);
1611   ierr = DMCreateMatrix(pack,&B);CHKERRQ(ierr);
1612   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1613   ierr = MatSetOptionsPrefix(B,"thi_");CHKERRQ(ierr);
1614 
1615   for (i=0; i<thi->nlevels; i++) {
1616     PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
1617     PetscInt  Mx,My,Mz;
1618     ierr = DMCompositeGetEntries(pack,&da3,&da2);CHKERRQ(ierr);
1619     ierr = DMDAGetInfo(da3,0, &Mz,&My,&Mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr);
1620     ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n",i,Lx,Ly,Lz,Mx,My,Mz,Mx*My*Mz,Lx/Mx,Ly/My,1000./(Mz-1));CHKERRQ(ierr);
1621   }
1622 
1623   ierr = DMCreateGlobalVector(pack,&X);CHKERRQ(ierr);
1624   ierr = THIInitial(thi,pack,X);CHKERRQ(ierr);
1625 
1626   ierr = TSCreate(comm,&ts);CHKERRQ(ierr);
1627   ierr = TSSetDM(ts,pack);CHKERRQ(ierr);
1628   ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr);
1629   ierr = TSMonitorSet(ts,THITSMonitor,thi,NULL);CHKERRQ(ierr);
1630   ierr = TSSetType(ts,TSTHETA);CHKERRQ(ierr);
1631   ierr = TSSetIFunction(ts,NULL,THIFunction,thi);CHKERRQ(ierr);
1632   ierr = TSSetIJacobian(ts,B,B,THIJacobian,thi);CHKERRQ(ierr);
1633   ierr = TSSetMaxTime(ts,10.0);CHKERRQ(ierr);
1634   ierr = TSSetExactFinalTime(ts,TS_EXACTFINALTIME_STEPOVER);CHKERRQ(ierr);
1635   ierr = TSSetSolution(ts,X);CHKERRQ(ierr);
1636   ierr = TSSetTimeStep(ts,1e-3);CHKERRQ(ierr);
1637   ierr = TSSetFromOptions(ts);CHKERRQ(ierr);
1638 
1639   ierr = TSSolve(ts,X);CHKERRQ(ierr);
1640   ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr);
1641   ierr = TSGetStepNumber(ts,&steps);CHKERRQ(ierr);
1642   ierr = PetscPrintf(PETSC_COMM_WORLD,"Steps %D  final time %g\n",steps,(double)ftime);CHKERRQ(ierr);
1643 
1644   if (0) {ierr = THISolveStatistics(thi,ts,0,"Full");CHKERRQ(ierr);}
1645 
1646   {
1647     PetscBool flg;
1648     char      filename[PETSC_MAX_PATH_LEN] = "";
1649     ierr = PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg);CHKERRQ(ierr);
1650     if (flg) {
1651       ierr = THIDAVecView_VTK_XML(thi,pack,X,filename,NULL);CHKERRQ(ierr);
1652     }
1653   }
1654 
1655   ierr = VecDestroy(&X);CHKERRQ(ierr);
1656   ierr = MatDestroy(&B);CHKERRQ(ierr);
1657   ierr = DMDestroy(&pack);CHKERRQ(ierr);
1658   ierr = TSDestroy(&ts);CHKERRQ(ierr);
1659   ierr = THIDestroy(&thi);CHKERRQ(ierr);
1660   ierr = PetscFinalize();
1661   return ierr;
1662 }
1663