xref: /petsc/src/ts/tutorials/ex14.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2 \n\
3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4 using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5 to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
6 boundary conditions in the x- and y-directions.\n\
7 \n\
8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10 \n\
11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12 \n\n";
13 
14 /*
15 The equations for horizontal velocity (u,v) are
16 
17   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19 
20 where
21 
22   eta = B/2 (epsilon + gamma)^((p-2)/2)
23 
24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25 written in terms of the second invariant
26 
27   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28 
29 The surface boundary conditions are the natural conditions.  The basal boundary conditions
30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31 
32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33 
34 The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
35 map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
36 the Jacobian of the coordinate transformation from a reference element to the physical element.
37 
38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39 specially so that columns are never distributed, and are always contiguous in memory.
40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41 and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
42 use compatible domain decomposition relative to the 3D DMDAs.
43 
44 */
45 
46 #include <petscts.h>
47 #include <petscdm.h>
48 #include <petscdmda.h>
49 #include <petscdmcomposite.h>
50 #include <ctype.h>              /* toupper() */
51 #include <petsc/private/petscimpl.h>
52 
53 #if defined __SSE2__
54 #  include <emmintrin.h>
55 #endif
56 
57 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
58 #define USE_SSE2_KERNELS (!defined NO_SSE2                              \
59                           && !defined PETSC_USE_COMPLEX                 \
60                           && !defined PETSC_USE_REAL_SINGLE           \
61                           && defined __SSE2__)
62 
63 #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
64 #  if defined __cplusplus       /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
65 #    define restrict
66 #  else
67 #    define restrict PETSC_RESTRICT
68 #  endif
69 #endif
70 
71 static PetscClassId THI_CLASSID;
72 
73 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
74 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
75 static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
76 static const PetscReal HexQNodes[]    = {-0.57735026918962573, 0.57735026918962573};
77 #define G 0.57735026918962573
78 #define H (0.5*(1.+G))
79 #define L (0.5*(1.-G))
80 #define M (-0.5)
81 #define P (0.5)
82 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
83 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
84                                                    {0,H,0,0,0,L,0,0},
85                                                    {0,0,H,0,0,0,L,0},
86                                                    {0,0,0,H,0,0,0,L},
87                                                    {L,0,0,0,H,0,0,0},
88                                                    {0,L,0,0,0,H,0,0},
89                                                    {0,0,L,0,0,0,H,0},
90                                                    {0,0,0,L,0,0,0,H}};
91 static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
92   {{M*H,M*H,M},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  ,{M*L,M*L,P},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  },
93   {{M*H,0,0}  ,{P*H,M*H,M},{0,P*H,0}  ,{0,0,0}    ,{M*L,0,0}  ,{P*L,M*L,P},{0,P*L,0}  ,{0,0,0}    },
94   {{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,M},{M*H,0,0}  ,{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,P},{M*L,0,0}  },
95   {{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,M},{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,P}},
96   {{M*L,M*L,M},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  ,{M*H,M*H,P},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  },
97   {{M*L,0,0}  ,{P*L,M*L,M},{0,P*L,0}  ,{0,0,0}    ,{M*H,0,0}  ,{P*H,M*H,P},{0,P*H,0}  ,{0,0,0}    },
98   {{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,M},{M*L,0,0}  ,{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,P},{M*H,0,0}  },
99   {{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,M},{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,P}}};
100 /* Stanndard Gauss */
101 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
102                                                  {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
103                                                  {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
104                                                  {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
105                                                  {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
106                                                  {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
107                                                  {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
108                                                  {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
109 static const PetscReal HexQDeriv_Gauss[8][8][3] = {
110   {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
111   {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
112   {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
113   {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
114   {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
115   {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
116   {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
117   {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
118 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
119 /* Standard 2x2 Gauss quadrature for the bottom layer. */
120 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
121                                             {L*H,H*H,H*L,L*L},
122                                             {L*L,H*L,H*H,L*H},
123                                             {H*L,L*L,L*H,H*H}};
124 static const PetscReal QuadQDeriv[4][4][2] = {
125   {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
126   {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
127   {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
128   {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
129 #undef G
130 #undef H
131 #undef L
132 #undef M
133 #undef P
134 
135 #define HexExtract(x,i,j,k,n) do {              \
136     (n)[0] = (x)[i][j][k];                      \
137     (n)[1] = (x)[i+1][j][k];                    \
138     (n)[2] = (x)[i+1][j+1][k];                  \
139     (n)[3] = (x)[i][j+1][k];                    \
140     (n)[4] = (x)[i][j][k+1];                    \
141     (n)[5] = (x)[i+1][j][k+1];                  \
142     (n)[6] = (x)[i+1][j+1][k+1];                \
143     (n)[7] = (x)[i][j+1][k+1];                  \
144   } while (0)
145 
146 #define HexExtractRef(x,i,j,k,n) do {           \
147     (n)[0] = &(x)[i][j][k];                     \
148     (n)[1] = &(x)[i+1][j][k];                   \
149     (n)[2] = &(x)[i+1][j+1][k];                 \
150     (n)[3] = &(x)[i][j+1][k];                   \
151     (n)[4] = &(x)[i][j][k+1];                   \
152     (n)[5] = &(x)[i+1][j][k+1];                 \
153     (n)[6] = &(x)[i+1][j+1][k+1];               \
154     (n)[7] = &(x)[i][j+1][k+1];                 \
155   } while (0)
156 
157 #define QuadExtract(x,i,j,n) do {               \
158     (n)[0] = (x)[i][j];                         \
159     (n)[1] = (x)[i+1][j];                       \
160     (n)[2] = (x)[i+1][j+1];                     \
161     (n)[3] = (x)[i][j+1];                       \
162   } while (0)
163 
164 static PetscScalar Sqr(PetscScalar a) {return a*a;}
165 
166 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
167 {
168   PetscInt i;
169   dz[0] = dz[1] = dz[2] = 0;
170   for (i=0; i<8; i++) {
171     dz[0] += dphi[i][0] * zn[i];
172     dz[1] += dphi[i][1] * zn[i];
173     dz[2] += dphi[i][2] * zn[i];
174   }
175 }
176 
177 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[restrict],PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscReal *restrict jw)
178 {
179   const PetscReal
180     jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}
181   ,ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}
182   ,jdet = jac[0][0]*jac[1][1]*jac[2][2];
183   PetscInt i;
184 
185   for (i=0; i<8; i++) {
186     const PetscReal *dphir = HexQDeriv[q][i];
187     phi[i] = HexQInterp[q][i];
188     dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
189     dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
190     dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
191   }
192   *jw = 1.0 * jdet;
193 }
194 
195 typedef struct _p_THI   *THI;
196 typedef struct _n_Units *Units;
197 
198 typedef struct {
199   PetscScalar u,v;
200 } Node;
201 
202 typedef struct {
203   PetscScalar b;                /* bed */
204   PetscScalar h;                /* thickness */
205   PetscScalar beta2;            /* friction */
206 } PrmNode;
207 
208 #define FieldSize(ntype) ((PetscInt)(sizeof(ntype)/sizeof(PetscScalar)))
209 #define FieldOffset(ntype,member) ((PetscInt)(offsetof(ntype,member)/sizeof(PetscScalar)))
210 #define FieldIndex(ntype,i,member) ((PetscInt)((i)*FieldSize(ntype) + FieldOffset(ntype,member)))
211 #define NODE_SIZE FieldSize(Node)
212 #define PRMNODE_SIZE FieldSize(PrmNode)
213 
214 typedef struct {
215   PetscReal min,max,cmin,cmax;
216 } PRange;
217 
218 struct _p_THI {
219   PETSCHEADER(int);
220   void      (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
221   PetscInt  nlevels;
222   PetscInt  zlevels;
223   PetscReal Lx,Ly,Lz;           /* Model domain */
224   PetscReal alpha;              /* Bed angle */
225   Units     units;
226   PetscReal dirichlet_scale;
227   PetscReal ssa_friction_scale;
228   PetscReal inertia;
229   PRange    eta;
230   PRange    beta2;
231   struct {
232     PetscReal Bd2,eps,exponent,glen_n;
233   } viscosity;
234   struct {
235     PetscReal irefgam,eps2,exponent;
236   } friction;
237   struct {
238     PetscReal rate,exponent,refvel;
239   } erosion;
240   PetscReal rhog;
241   PetscBool no_slip;
242   PetscBool verbose;
243   char      *mattype;
244   char      *monitor_basename;
245   PetscInt  monitor_interval;
246 };
247 
248 struct _n_Units {
249   /* fundamental */
250   PetscReal meter;
251   PetscReal kilogram;
252   PetscReal second;
253   /* derived */
254   PetscReal Pascal;
255   PetscReal year;
256 };
257 
258 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
259 {
260   const PetscScalar zm1 = zm-1,
261     znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
262               pn[1].b + pn[1].h*(PetscScalar)k/zm1,
263               pn[2].b + pn[2].h*(PetscScalar)k/zm1,
264               pn[3].b + pn[3].h*(PetscScalar)k/zm1,
265               pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
266               pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
267               pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
268               pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
269   PetscInt i;
270   for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
271 }
272 
273 /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
274 static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2],PetscReal hx,PetscReal hy,const PrmNode pn[4],PrmNode dp[4][2])
275 {
276   PetscErrorCode ierr;
277   PetscInt       q,i,f;
278   const PetscScalar (*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
279   PetscScalar (*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;
280 
281   PetscFunctionBeginUser;
282   CHKERRQ(PetscArrayzero(dpg,4));
283   for (q=0; q<4; q++) {
284     for (i=0; i<4; i++) {
285       for (f=0; f<PRMNODE_SIZE; f++) {
286         dpg[q][0][f] += dphi[q][i][0]/hx * pg[i][f];
287         dpg[q][1][f] += dphi[q][i][1]/hy * pg[i][f];
288       }
289     }
290   }
291   PetscFunctionReturn(0);
292 }
293 
294 static inline PetscReal StaggeredMidpoint2D(PetscScalar a,PetscScalar b,PetscScalar c,PetscScalar d)
295 {return 0.5*PetscRealPart(0.75*a + 0.75*b + 0.25*c + 0.25*d);}
296 static inline PetscReal UpwindFlux1D(PetscReal u,PetscReal hL,PetscReal hR)
297 {return (u > 0) ? hL*u : hR*u;}
298 
299 #define UpwindFluxXW(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i-1][j][k].u, x3[i-1][j+dj][k].u,x3[i][k+dj][k].u), \
300                                                     PetscRealPart(0.75*x2[i-1][j  ].h+0.25*x2[i-1][j+dj].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h))
301 #define UpwindFluxXE(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i+1][j][k].u, x3[i+1][j+dj][k].u,x3[i][k+dj][k].u), \
302                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h), PetscRealPart(0.75*x2[i+1][j  ].h+0.25*x2[i+1][j+dj].h))
303 #define UpwindFluxYS(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j-1][k].v, x3[i+di][j-1][k].v,x3[i+di][j][k].v), \
304                                                     PetscRealPart(0.75*x2[i  ][j-1].h+0.25*x2[i+di][j-1].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h))
305 #define UpwindFluxYN(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j+1][k].v, x3[i+di][j+1][k].v,x3[i+di][j][k].v), \
306                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h), PetscRealPart(0.75*x2[i  ][j+1].h+0.25*x2[i+di][j+1].h))
307 
308 static void PrmNodeGetFaceMeasure(const PrmNode **p,PetscInt i,PetscInt j,PetscScalar h[])
309 {
310   /* West */
311   h[0] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j-1].h,p[i][j-1].h);
312   h[1] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j+1].h,p[i][j+1].h);
313   /* East */
314   h[2] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j+1].h,p[i][j+1].h);
315   h[3] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j-1].h,p[i][j-1].h);
316   /* South */
317   h[4] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i+1][j-1].h,p[i+1][j].h);
318   h[5] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i-1][j-1].h,p[i-1][j].h);
319   /* North */
320   h[6] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i-1][j+1].h,p[i-1][j].h);
321   h[7] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i+1][j+1].h,p[i+1][j].h);
322 }
323 
324 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
325 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
326 {
327   Units units = thi->units;
328   PetscReal s = -x*PetscSinReal(thi->alpha);
329   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
330   p->h = s - p->b;
331   p->beta2 = -1e-10;             /* This value is not used, but it should not be huge because that would change the finite difference step size  */
332 }
333 
334 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
335 {
336   Units units = thi->units;
337   PetscReal s = -x*PetscSinReal(thi->alpha);
338   p->b = s - 1000*units->meter;
339   p->h = s - p->b;
340   /* tau_b = beta2 v   is a stress (Pa).
341    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
342   p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
343 }
344 
345 /* These are just toys */
346 
347 /* From Fred Herman */
348 static void THIInitialize_HOM_F(THI thi,PetscReal x,PetscReal y,PrmNode *p)
349 {
350   Units units = thi->units;
351   PetscReal s = -x*PetscSinReal(thi->alpha);
352   p->b = s - 1000*units->meter + 100*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx);/* * sin(y*2*PETSC_PI/thi->Ly); */
353   p->h = s - p->b;
354   p->h = (1-(atan((x-thi->Lx/2)/1.)+PETSC_PI/2.)/PETSC_PI)*500*units->meter+1*units->meter;
355   s = PetscRealPart(p->b + p->h);
356   p->beta2 = -1e-10;
357   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
358 }
359 
360 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
361 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
362 {
363   Units units = thi->units;
364   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
365   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
366   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
367   p->h = s - p->b;
368   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
369 }
370 
371 /* Like Z, but with 200 meter cliffs */
372 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
373 {
374   Units units = thi->units;
375   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
376   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
377   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
378   if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
379   p->h = s - p->b;
380   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
381 }
382 
383 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
384 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
385 {
386   Units units = thi->units;
387   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
388   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
389   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
390   p->h = s - p->b;
391   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
392 }
393 
394 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
395 {
396   if (thi->friction.irefgam == 0) {
397     Units units = thi->units;
398     thi->friction.irefgam = 1./(0.5*PetscSqr(100 * units->meter / units->year));
399     thi->friction.eps2 = 0.5*PetscSqr(1.e-4 / thi->friction.irefgam);
400   }
401   if (thi->friction.exponent == 0) {
402     *beta2  = rbeta2;
403     *dbeta2 = 0;
404   } else {
405     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
406     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
407   }
408 }
409 
410 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
411 {
412   PetscReal Bd2,eps,exponent;
413   if (thi->viscosity.Bd2 == 0) {
414     Units units = thi->units;
415     const PetscReal
416       n = thi->viscosity.glen_n,                        /* Glen exponent */
417       p = 1. + 1./n,                                    /* for Stokes */
418       A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
419       B = PetscPowReal(A,-1./n);                                 /* hardness parameter */
420     thi->viscosity.Bd2      = B/2;
421     thi->viscosity.exponent = (p-2)/2;
422     thi->viscosity.eps      = 0.5*PetscSqr(1e-5 / units->year);
423   }
424   Bd2      = thi->viscosity.Bd2;
425   exponent = thi->viscosity.exponent;
426   eps      = thi->viscosity.eps;
427   *eta     = Bd2 * PetscPowReal(eps + gam,exponent);
428   *deta    = exponent * (*eta) / (eps + gam);
429 }
430 
431 static void THIErosion(THI thi,const Node *vel,PetscScalar *erate,Node *derate)
432 {
433   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel),
434                     rate    = -thi->erosion.rate*PetscPowScalar(magref2, 0.5*thi->erosion.exponent);
435   if (erate) *erate = rate;
436   if (derate) {
437     if (thi->erosion.exponent == 1) {
438       derate->u = 0;
439       derate->v = 0;
440     } else {
441       derate->u = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
442       derate->v = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
443     }
444   }
445 }
446 
447 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
448 {
449   if (x < *min) *min = x;
450   if (x > *max) *max = x;
451 }
452 
453 static void PRangeClear(PRange *p)
454 {
455   p->cmin = p->min = 1e100;
456   p->cmax = p->max = -1e100;
457 }
458 
459 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
460 {
461   PetscFunctionBeginUser;
462   p->cmin = min;
463   p->cmax = max;
464   if (min < p->min) p->min = min;
465   if (max > p->max) p->max = max;
466   PetscFunctionReturn(0);
467 }
468 
469 static PetscErrorCode THIDestroy(THI *thi)
470 {
471   PetscErrorCode ierr;
472 
473   PetscFunctionBeginUser;
474   if (--((PetscObject)(*thi))->refct > 0) PetscFunctionReturn(0);
475   CHKERRQ(PetscFree((*thi)->units));
476   CHKERRQ(PetscFree((*thi)->mattype));
477   CHKERRQ(PetscFree((*thi)->monitor_basename));
478   CHKERRQ(PetscHeaderDestroy(thi));
479   PetscFunctionReturn(0);
480 }
481 
482 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
483 {
484   static PetscBool registered = PETSC_FALSE;
485   THI              thi;
486   Units            units;
487   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
488   PetscErrorCode   ierr;
489 
490   PetscFunctionBeginUser;
491   *inthi = 0;
492   if (!registered) {
493     CHKERRQ(PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID));
494     registered = PETSC_TRUE;
495   }
496   CHKERRQ(PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","THI",comm,THIDestroy,0));
497 
498   CHKERRQ(PetscNew(&thi->units));
499 
500   units           = thi->units;
501   units->meter    = 1e-2;
502   units->second   = 1e-7;
503   units->kilogram = 1e-12;
504 
505   ierr = PetscOptionsBegin(comm,NULL,"Scaled units options","");CHKERRQ(ierr);
506   {
507     CHKERRQ(PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL));
508     CHKERRQ(PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL));
509     CHKERRQ(PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL));
510   }
511   ierr = PetscOptionsEnd();CHKERRQ(ierr);
512   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
513   units->year   = 31556926. * units->second, /* seconds per year */
514 
515   thi->Lx              = 10.e3;
516   thi->Ly              = 10.e3;
517   thi->Lz              = 1000;
518   thi->nlevels         = 1;
519   thi->dirichlet_scale = 1;
520   thi->verbose         = PETSC_FALSE;
521 
522   thi->viscosity.glen_n = 3.;
523   thi->erosion.rate     = 1e-3; /* m/a */
524   thi->erosion.exponent = 1.;
525   thi->erosion.refvel   = 1.;   /* m/a */
526 
527   ierr = PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");CHKERRQ(ierr);
528   {
529     QuadratureType quad       = QUAD_GAUSS;
530     char           homexp[]   = "A";
531     char           mtype[256] = MATSBAIJ;
532     PetscReal      L,m = 1.0;
533     PetscBool      flg;
534     L    = thi->Lx;
535     CHKERRQ(PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg));
536     if (flg) thi->Lx = thi->Ly = L;
537     CHKERRQ(PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL));
538     CHKERRQ(PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL));
539     CHKERRQ(PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL));
540     CHKERRQ(PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL));
541     switch (homexp[0] = toupper(homexp[0])) {
542     case 'A':
543       thi->initialize = THIInitialize_HOM_A;
544       thi->no_slip    = PETSC_TRUE;
545       thi->alpha      = 0.5;
546       break;
547     case 'C':
548       thi->initialize = THIInitialize_HOM_C;
549       thi->no_slip    = PETSC_FALSE;
550       thi->alpha      = 0.1;
551       break;
552     case 'F':
553       thi->initialize = THIInitialize_HOM_F;
554       thi->no_slip    = PETSC_FALSE;
555       thi->alpha      = 0.5;
556       break;
557     case 'X':
558       thi->initialize = THIInitialize_HOM_X;
559       thi->no_slip    = PETSC_FALSE;
560       thi->alpha      = 0.3;
561       break;
562     case 'Y':
563       thi->initialize = THIInitialize_HOM_Y;
564       thi->no_slip    = PETSC_FALSE;
565       thi->alpha      = 0.5;
566       break;
567     case 'Z':
568       thi->initialize = THIInitialize_HOM_Z;
569       thi->no_slip    = PETSC_FALSE;
570       thi->alpha      = 0.5;
571       break;
572     default:
573       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
574     }
575     CHKERRQ(PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL));
576     switch (quad) {
577     case QUAD_GAUSS:
578       HexQInterp = HexQInterp_Gauss;
579       HexQDeriv  = HexQDeriv_Gauss;
580       break;
581     case QUAD_LOBATTO:
582       HexQInterp = HexQInterp_Lobatto;
583       HexQDeriv  = HexQDeriv_Lobatto;
584       break;
585     }
586     CHKERRQ(PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL));
587     CHKERRQ(PetscOptionsReal("-thi_viscosity_glen_n","Exponent in Glen flow law, 1=linear, infty=ideal plastic",NULL,thi->viscosity.glen_n,&thi->viscosity.glen_n,NULL));
588     CHKERRQ(PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL));
589     thi->friction.exponent = (m-1)/2;
590     CHKERRQ(PetscOptionsReal("-thi_erosion_rate","Rate of erosion relative to sliding velocity at reference velocity (m/a)",NULL,thi->erosion.rate,&thi->erosion.rate,NULL));
591     CHKERRQ(PetscOptionsReal("-thi_erosion_exponent","Power of sliding velocity appearing in erosion relation",NULL,thi->erosion.exponent,&thi->erosion.exponent,NULL));
592     CHKERRQ(PetscOptionsReal("-thi_erosion_refvel","Reference sliding velocity for erosion (m/a)",NULL,thi->erosion.refvel,&thi->erosion.refvel,NULL));
593     thi->erosion.rate   *= units->meter / units->year;
594     thi->erosion.refvel *= units->meter / units->year;
595     CHKERRQ(PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL));
596     CHKERRQ(PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL));
597     CHKERRQ(PetscOptionsReal("-thi_inertia","Coefficient of accelaration term in velocity system, physical is almost zero",NULL,thi->inertia,&thi->inertia,NULL));
598     CHKERRQ(PetscOptionsInt("-thi_nlevels","Number of levels of refinement","",thi->nlevels,&thi->nlevels,NULL));
599     CHKERRQ(PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL));
600     CHKERRQ(PetscStrallocpy(mtype,&thi->mattype));
601     CHKERRQ(PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL));
602     CHKERRQ(PetscOptionsString("-thi_monitor","Basename to write state files to",NULL,monitor_basename,monitor_basename,sizeof(monitor_basename),&flg));
603     if (flg) {
604       CHKERRQ(PetscStrallocpy(monitor_basename,&thi->monitor_basename));
605       thi->monitor_interval = 1;
606       CHKERRQ(PetscOptionsInt("-thi_monitor_interval","Frequency at which to write state files",NULL,thi->monitor_interval,&thi->monitor_interval,NULL));
607     }
608   }
609   ierr = PetscOptionsEnd();CHKERRQ(ierr);
610 
611   /* dimensionalize */
612   thi->Lx    *= units->meter;
613   thi->Ly    *= units->meter;
614   thi->Lz    *= units->meter;
615   thi->alpha *= PETSC_PI / 180;
616 
617   PRangeClear(&thi->eta);
618   PRangeClear(&thi->beta2);
619 
620   {
621     PetscReal u       = 1000*units->meter/(3e7*units->second),
622               gradu   = u / (100*units->meter),eta,deta,
623               rho     = 910 * units->kilogram/PetscPowRealInt(units->meter,3),
624               grav    = 9.81 * units->meter/PetscSqr(units->second),
625               driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
626     THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
627     thi->rhog = rho * grav;
628     if (thi->verbose) {
629       CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n",units->meter,units->second,units->kilogram,units->Pascal));
630       CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",thi->Lx,thi->Ly,thi->Lz,rho*grav*1e3*units->meter,driving));
631       CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",u,gradu,eta,2*eta*gradu,2*eta*gradu/driving));
632       THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
633       CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",1e-3*u,1e-3*gradu,eta,2*eta*1e-3*gradu,2*eta*1e-3*gradu/driving));
634     }
635   }
636 
637   *inthi = thi;
638   PetscFunctionReturn(0);
639 }
640 
641 /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
642  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
643  * the horizontal. */
644 static PetscErrorCode THIFixGhosts(THI thi,DM da3,DM da2,Vec X3,Vec X2)
645 {
646   PetscErrorCode ierr;
647   DMDALocalInfo  info;
648   PrmNode        **x2;
649   PetscInt       i,j;
650 
651   PetscFunctionBeginUser;
652   CHKERRQ(DMDAGetLocalInfo(da3,&info));
653   /* CHKERRQ(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
654   CHKERRQ(DMDAVecGetArray(da2,X2,&x2));
655   for (i=info.gzs; i<info.gzs+info.gzm; i++) {
656     if (i > -1 && i < info.mz) continue;
657     for (j=info.gys; j<info.gys+info.gym; j++) {
658       x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i<0 ? 1.0 : -1.0);
659     }
660   }
661   CHKERRQ(DMDAVecRestoreArray(da2,X2,&x2));
662   /* CHKERRQ(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
663   PetscFunctionReturn(0);
664 }
665 
666 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,PrmNode **p)
667 {
668   PetscInt       i,j,xs,xm,ys,ym,mx,my;
669   PetscErrorCode ierr;
670 
671   PetscFunctionBeginUser;
672   CHKERRQ(DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0));
673   CHKERRQ(DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0));
674   for (i=xs; i<xs+xm; i++) {
675     for (j=ys; j<ys+ym; j++) {
676       PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
677       thi->initialize(thi,xx,yy,&p[i][j]);
678     }
679   }
680   PetscFunctionReturn(0);
681 }
682 
683 static PetscErrorCode THIInitial(THI thi,DM pack,Vec X)
684 {
685   DM             da3,da2;
686   PetscInt       i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
687   PetscReal      hx,hy;
688   PrmNode        **prm;
689   Node           ***x;
690   Vec            X3g,X2g,X2;
691   PetscErrorCode ierr;
692 
693   PetscFunctionBeginUser;
694   CHKERRQ(DMCompositeGetEntries(pack,&da3,&da2));
695   CHKERRQ(DMCompositeGetAccess(pack,X,&X3g,&X2g));
696   CHKERRQ(DMGetLocalVector(da2,&X2));
697 
698   CHKERRQ(DMDAGetInfo(da3,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0));
699   CHKERRQ(DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm));
700   CHKERRQ(DMDAVecGetArray(da3,X3g,&x));
701   CHKERRQ(DMDAVecGetArray(da2,X2,&prm));
702 
703   CHKERRQ(THIInitializePrm(thi,da2,prm));
704 
705   hx = thi->Lx / mx;
706   hy = thi->Ly / my;
707   for (i=xs; i<xs+xm; i++) {
708     for (j=ys; j<ys+ym; j++) {
709       for (k=zs; k<zs+zm; k++) {
710         const PetscScalar zm1      = zm-1,
711                           drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
712                           drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
713         x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
714         x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
715       }
716     }
717   }
718 
719   CHKERRQ(DMDAVecRestoreArray(da3,X3g,&x));
720   CHKERRQ(DMDAVecRestoreArray(da2,X2,&prm));
721 
722   CHKERRQ(DMLocalToGlobalBegin(da2,X2,INSERT_VALUES,X2g));
723   CHKERRQ(DMLocalToGlobalEnd  (da2,X2,INSERT_VALUES,X2g));
724   CHKERRQ(DMRestoreLocalVector(da2,&X2));
725 
726   CHKERRQ(DMCompositeRestoreAccess(pack,X,&X3g,&X2g));
727   PetscFunctionReturn(0);
728 }
729 
730 static void PointwiseNonlinearity(THI thi,const Node n[restrict 8],const PetscReal phi[restrict 3],PetscReal dphi[restrict 8][3],PetscScalar *restrict u,PetscScalar *restrict v,PetscScalar du[restrict 3],PetscScalar dv[restrict 3],PetscReal *eta,PetscReal *deta)
731 {
732   PetscInt    l,ll;
733   PetscScalar gam;
734 
735   du[0] = du[1] = du[2] = 0;
736   dv[0] = dv[1] = dv[2] = 0;
737   *u    = 0;
738   *v    = 0;
739   for (l=0; l<8; l++) {
740     *u += phi[l] * n[l].u;
741     *v += phi[l] * n[l].v;
742     for (ll=0; ll<3; ll++) {
743       du[ll] += dphi[l][ll] * n[l].u;
744       dv[ll] += dphi[l][ll] * n[l].v;
745     }
746   }
747   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]) + 0.25*Sqr(du[2]) + 0.25*Sqr(dv[2]);
748   THIViscosity(thi,PetscRealPart(gam),eta,deta);
749 }
750 
751 static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const Node ***xdot,Node ***f,THI thi)
752 {
753   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l;
754   PetscReal      hx,hy,etamin,etamax,beta2min,beta2max;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBeginUser;
758   xs = info->zs;
759   ys = info->ys;
760   xm = info->zm;
761   ym = info->ym;
762   zm = info->xm;
763   hx = thi->Lx / info->mz;
764   hy = thi->Ly / info->my;
765 
766   etamin   = 1e100;
767   etamax   = 0;
768   beta2min = 1e100;
769   beta2max = 0;
770 
771   for (i=xs; i<xs+xm; i++) {
772     for (j=ys; j<ys+ym; j++) {
773       PrmNode pn[4],dpn[4][2];
774       QuadExtract(prm,i,j,pn);
775       CHKERRQ(QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn));
776       for (k=0; k<zm-1; k++) {
777         PetscInt  ls = 0;
778         Node      n[8],ndot[8],*fn[8];
779         PetscReal zn[8],etabase = 0;
780 
781         PrmHexGetZ(pn,k,zm,zn);
782         HexExtract(x,i,j,k,n);
783         HexExtract(xdot,i,j,k,ndot);
784         HexExtractRef(f,i,j,k,fn);
785         if (thi->no_slip && k == 0) {
786           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
787           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
788           ls = 4;
789         }
790         for (q=0; q<8; q++) {
791           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
792           PetscScalar du[3],dv[3],u,v,udot=0,vdot=0;
793           for (l=ls; l<8; l++) {
794             udot += HexQInterp[q][l]*ndot[l].u;
795             vdot += HexQInterp[q][l]*ndot[l].v;
796           }
797           HexGrad(HexQDeriv[q],zn,dz);
798           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
799           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
800           jw /= thi->rhog;      /* scales residuals to be O(1) */
801           if (q == 0) etabase = eta;
802           RangeUpdate(&etamin,&etamax,eta);
803           for (l=ls; l<8; l++) { /* test functions */
804             const PetscScalar ds[2] = {dpn[q%4][0].h+dpn[q%4][0].b, dpn[q%4][1].h+dpn[q%4][1].b};
805             const PetscReal   pp    = phi[l],*dp = dphi[l];
806             fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
807             fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
808             fn[l]->u += pp*jw*udot*thi->inertia*pp;
809             fn[l]->v += pp*jw*vdot*thi->inertia*pp;
810           }
811         }
812         if (k == 0) { /* we are on a bottom face */
813           if (thi->no_slip) {
814             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
815             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
816             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
817             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
818             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
819             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
820             * assembled matrix (see the similar block in THIJacobianLocal).
821             *
822             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
823             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
824             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
825             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
826             */
827             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1.);
828             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
829             fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
830             fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
831           } else {              /* Integrate over bottom face to apply boundary condition */
832             for (q=0; q<4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
833               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
834               PetscScalar     u  =0,v=0,rbeta2=0;
835               PetscReal       beta2,dbeta2;
836               for (l=0; l<4; l++) {
837                 u     += phi[l]*n[l].u;
838                 v     += phi[l]*n[l].v;
839                 rbeta2 += phi[l]*pn[l].beta2;
840               }
841               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
842               RangeUpdate(&beta2min,&beta2max,beta2);
843               for (l=0; l<4; l++) {
844                 const PetscReal pp = phi[l];
845                 fn[ls+l]->u += pp*jw*beta2*u;
846                 fn[ls+l]->v += pp*jw*beta2*v;
847               }
848             }
849           }
850         }
851       }
852     }
853   }
854 
855   CHKERRQ(PRangeMinMax(&thi->eta,etamin,etamax));
856   CHKERRQ(PRangeMinMax(&thi->beta2,beta2min,beta2max));
857   PetscFunctionReturn(0);
858 }
859 
860 static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const PrmNode **prmdot,PrmNode **f,THI thi)
861 {
862   PetscInt xs,ys,xm,ym,zm,i,j,k;
863 
864   PetscFunctionBeginUser;
865   xs = info->zs;
866   ys = info->ys;
867   xm = info->zm;
868   ym = info->ym;
869   zm = info->xm;
870 
871   for (i=xs; i<xs+xm; i++) {
872     for (j=ys; j<ys+ym; j++) {
873       PetscScalar div = 0,erate,h[8];
874       PrmNodeGetFaceMeasure(prm,i,j,h);
875       for (k=0; k<zm; k++) {
876         PetscScalar weight = (k==0 || k == zm-1) ? 0.5/(zm-1) : 1.0/(zm-1);
877         if (0) {                /* centered flux */
878           div += (- weight*h[0] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j-1][k].u,x[i][j-1][k].u)
879                   - weight*h[1] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j+1][k].u,x[i][j+1][k].u)
880                   + weight*h[2] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j+1][k].u,x[i][j+1][k].u)
881                   + weight*h[3] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j-1][k].u,x[i][j-1][k].u)
882                   - weight*h[4] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i+1][j-1][k].v,x[i+1][j][k].v)
883                   - weight*h[5] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i-1][j-1][k].v,x[i-1][j][k].v)
884                   + weight*h[6] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i-1][j+1][k].v,x[i-1][j][k].v)
885                   + weight*h[7] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i+1][j+1][k].v,x[i+1][j][k].v));
886         } else {                /* Upwind flux */
887           div += weight*(-UpwindFluxXW(x,prm,h,i,j,k, 1)
888                          -UpwindFluxXW(x,prm,h,i,j,k,-1)
889                          +UpwindFluxXE(x,prm,h,i,j,k, 1)
890                          +UpwindFluxXE(x,prm,h,i,j,k,-1)
891                          -UpwindFluxYS(x,prm,h,i,j,k, 1)
892                          -UpwindFluxYS(x,prm,h,i,j,k,-1)
893                          +UpwindFluxYN(x,prm,h,i,j,k, 1)
894                          +UpwindFluxYN(x,prm,h,i,j,k,-1));
895         }
896       }
897       /* printf("div[%d][%d] %g\n",i,j,div); */
898       THIErosion(thi,&x[i][j][0],&erate,NULL);
899       f[i][j].b     = prmdot[i][j].b - erate;
900       f[i][j].h     = prmdot[i][j].h + div;
901       f[i][j].beta2 = prmdot[i][j].beta2;
902     }
903   }
904   PetscFunctionReturn(0);
905 }
906 
907 static PetscErrorCode THIFunction(TS ts,PetscReal t,Vec X,Vec Xdot,Vec F,void *ctx)
908 {
909   PetscErrorCode ierr;
910   THI            thi = (THI)ctx;
911   DM             pack,da3,da2;
912   Vec            X3,X2,Xdot3,Xdot2,F3,F2,F3g,F2g;
913   const Node     ***x3,***xdot3;
914   const PrmNode  **x2,**xdot2;
915   Node           ***f3;
916   PrmNode        **f2;
917   DMDALocalInfo  info3;
918 
919   PetscFunctionBeginUser;
920   CHKERRQ(TSGetDM(ts,&pack));
921   CHKERRQ(DMCompositeGetEntries(pack,&da3,&da2));
922   CHKERRQ(DMDAGetLocalInfo(da3,&info3));
923   CHKERRQ(DMCompositeGetLocalVectors(pack,&X3,&X2));
924   CHKERRQ(DMCompositeGetLocalVectors(pack,&Xdot3,&Xdot2));
925   CHKERRQ(DMCompositeScatter(pack,X,X3,X2));
926   CHKERRQ(THIFixGhosts(thi,da3,da2,X3,X2));
927   CHKERRQ(DMCompositeScatter(pack,Xdot,Xdot3,Xdot2));
928 
929   CHKERRQ(DMGetLocalVector(da3,&F3));
930   CHKERRQ(DMGetLocalVector(da2,&F2));
931   CHKERRQ(VecZeroEntries(F3));
932 
933   CHKERRQ(DMDAVecGetArray(da3,X3,&x3));
934   CHKERRQ(DMDAVecGetArray(da2,X2,&x2));
935   CHKERRQ(DMDAVecGetArray(da3,Xdot3,&xdot3));
936   CHKERRQ(DMDAVecGetArray(da2,Xdot2,&xdot2));
937   CHKERRQ(DMDAVecGetArray(da3,F3,&f3));
938   CHKERRQ(DMDAVecGetArray(da2,F2,&f2));
939 
940   CHKERRQ(THIFunctionLocal_3D(&info3,x3,x2,xdot3,f3,thi));
941   CHKERRQ(THIFunctionLocal_2D(&info3,x3,x2,xdot2,f2,thi));
942 
943   CHKERRQ(DMDAVecRestoreArray(da3,X3,&x3));
944   CHKERRQ(DMDAVecRestoreArray(da2,X2,&x2));
945   CHKERRQ(DMDAVecRestoreArray(da3,Xdot3,&xdot3));
946   CHKERRQ(DMDAVecRestoreArray(da2,Xdot2,&xdot2));
947   CHKERRQ(DMDAVecRestoreArray(da3,F3,&f3));
948   CHKERRQ(DMDAVecRestoreArray(da2,F2,&f2));
949 
950   CHKERRQ(DMCompositeRestoreLocalVectors(pack,&X3,&X2));
951   CHKERRQ(DMCompositeRestoreLocalVectors(pack,&Xdot3,&Xdot2));
952 
953   CHKERRQ(VecZeroEntries(F));
954   CHKERRQ(DMCompositeGetAccess(pack,F,&F3g,&F2g));
955   CHKERRQ(DMLocalToGlobalBegin(da3,F3,ADD_VALUES,F3g));
956   CHKERRQ(DMLocalToGlobalEnd  (da3,F3,ADD_VALUES,F3g));
957   CHKERRQ(DMLocalToGlobalBegin(da2,F2,INSERT_VALUES,F2g));
958   CHKERRQ(DMLocalToGlobalEnd  (da2,F2,INSERT_VALUES,F2g));
959 
960   if (thi->verbose) {
961     PetscViewer viewer;
962     CHKERRQ(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi),&viewer));
963     CHKERRQ(PetscViewerASCIIPrintf(viewer,"3D_Velocity residual (bs=2):\n"));
964     CHKERRQ(PetscViewerASCIIPushTab(viewer));
965     CHKERRQ(VecView(F3,viewer));
966     CHKERRQ(PetscViewerASCIIPopTab(viewer));
967     CHKERRQ(PetscViewerASCIIPrintf(viewer,"2D_Fields residual (bs=3):\n"));
968     CHKERRQ(PetscViewerASCIIPushTab(viewer));
969     CHKERRQ(VecView(F2,viewer));
970     CHKERRQ(PetscViewerASCIIPopTab(viewer));
971   }
972 
973   CHKERRQ(DMCompositeRestoreAccess(pack,F,&F3g,&F2g));
974 
975   CHKERRQ(DMRestoreLocalVector(da3,&F3));
976   CHKERRQ(DMRestoreLocalVector(da2,&F2));
977   PetscFunctionReturn(0);
978 }
979 
980 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
981 {
982   PetscErrorCode ierr;
983   PetscReal      nrm;
984   PetscInt       m;
985   PetscMPIInt    rank;
986 
987   PetscFunctionBeginUser;
988   CHKERRQ(MatNorm(B,NORM_FROBENIUS,&nrm));
989   CHKERRQ(MatGetSize(B,&m,0));
990   CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank));
991   if (rank == 0) {
992     PetscScalar val0,val2;
993     CHKERRQ(MatGetValue(B,0,0,&val0));
994     CHKERRQ(MatGetValue(B,2,2,&val2));
995     CHKERRQ(PetscViewerASCIIPrintf(viewer,"Matrix dim %8d  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n",m,nrm,PetscRealPart(val0),PetscRealPart(val2),thi->eta.cmin,thi->eta.cmax,thi->beta2.cmin,thi->beta2.cmax));
996   }
997   PetscFunctionReturn(0);
998 }
999 
1000 static PetscErrorCode THISurfaceStatistics(DM pack,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
1001 {
1002   PetscErrorCode ierr;
1003   DM             da3,da2;
1004   Vec            X3,X2;
1005   Node           ***x;
1006   PetscInt       i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
1007   PetscReal      umin = 1e100,umax=-1e100;
1008   PetscScalar    usum =0.0,gusum;
1009 
1010   PetscFunctionBeginUser;
1011   CHKERRQ(DMCompositeGetEntries(pack,&da3,&da2));
1012   CHKERRQ(DMCompositeGetAccess(pack,X,&X3,&X2));
1013   *min = *max = *mean = 0;
1014   CHKERRQ(DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0));
1015   CHKERRQ(DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm));
1016   PetscCheck(zs == 0 && zm == mz,PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Unexpected decomposition");
1017   CHKERRQ(DMDAVecGetArray(da3,X3,&x));
1018   for (i=xs; i<xs+xm; i++) {
1019     for (j=ys; j<ys+ym; j++) {
1020       PetscReal u = PetscRealPart(x[i][j][zm-1].u);
1021       RangeUpdate(&umin,&umax,u);
1022       usum += u;
1023     }
1024   }
1025   CHKERRQ(DMDAVecRestoreArray(da3,X3,&x));
1026   CHKERRQ(DMCompositeRestoreAccess(pack,X,&X3,&X2));
1027 
1028   CHKERRMPI(MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da3)));
1029   CHKERRMPI(MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da3)));
1030   CHKERRMPI(MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da3)));
1031   *mean = PetscRealPart(gusum) / (mx*my);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 static PetscErrorCode THISolveStatistics(THI thi,TS ts,PetscInt coarsened,const char name[])
1036 {
1037   MPI_Comm       comm;
1038   DM             pack;
1039   Vec            X,X3,X2;
1040   PetscErrorCode ierr;
1041 
1042   PetscFunctionBeginUser;
1043   CHKERRQ(PetscObjectGetComm((PetscObject)thi,&comm));
1044   CHKERRQ(TSGetDM(ts,&pack));
1045   CHKERRQ(TSGetSolution(ts,&X));
1046   CHKERRQ(DMCompositeGetAccess(pack,X,&X3,&X2));
1047   CHKERRQ(PetscPrintf(comm,"Solution statistics after solve: %s\n",name));
1048   {
1049     PetscInt            its,lits;
1050     SNESConvergedReason reason;
1051     SNES                snes;
1052     CHKERRQ(TSGetSNES(ts,&snes));
1053     CHKERRQ(SNESGetIterationNumber(snes,&its));
1054     CHKERRQ(SNESGetConvergedReason(snes,&reason));
1055     CHKERRQ(SNESGetLinearSolveIterations(snes,&lits));
1056     CHKERRQ(PetscPrintf(comm,"%s: Number of SNES iterations = %d, total linear iterations = %d\n",SNESConvergedReasons[reason],its,lits));
1057   }
1058   {
1059     PetscReal   nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
1060     PetscInt    i,j,m;
1061     PetscScalar *x;
1062     CHKERRQ(VecNorm(X3,NORM_2,&nrm2));
1063     CHKERRQ(VecGetLocalSize(X3,&m));
1064     CHKERRQ(VecGetArray(X3,&x));
1065     for (i=0; i<m; i+=2) {
1066       PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
1067       tmin[0] = PetscMin(u,tmin[0]);
1068       tmin[1] = PetscMin(v,tmin[1]);
1069       tmin[2] = PetscMin(c,tmin[2]);
1070       tmax[0] = PetscMax(u,tmax[0]);
1071       tmax[1] = PetscMax(v,tmax[1]);
1072       tmax[2] = PetscMax(c,tmax[2]);
1073     }
1074     CHKERRQ(VecRestoreArray(X,&x));
1075     CHKERRMPI(MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi)));
1076     CHKERRMPI(MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi)));
1077     /* Dimensionalize to meters/year */
1078     nrm2 *= thi->units->year / thi->units->meter;
1079     for (j=0; j<3; j++) {
1080       min[j] *= thi->units->year / thi->units->meter;
1081       max[j] *= thi->units->year / thi->units->meter;
1082     }
1083     CHKERRQ(PetscPrintf(comm,"|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n",nrm2,min[0],max[0],min[1],max[1],min[2],max[2]));
1084     {
1085       PetscReal umin,umax,umean;
1086       CHKERRQ(THISurfaceStatistics(pack,X,&umin,&umax,&umean));
1087       umin  *= thi->units->year / thi->units->meter;
1088       umax  *= thi->units->year / thi->units->meter;
1089       umean *= thi->units->year / thi->units->meter;
1090       CHKERRQ(PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",umin,umax,umean));
1091     }
1092     /* These values stay nondimensional */
1093     CHKERRQ(PetscPrintf(comm,"Global eta range   [%g, %g], converged range [%g, %g]\n",thi->eta.min,thi->eta.max,thi->eta.cmin,thi->eta.cmax));
1094     CHKERRQ(PetscPrintf(comm,"Global beta2 range [%g, %g], converged range [%g, %g]\n",thi->beta2.min,thi->beta2.max,thi->beta2.cmin,thi->beta2.cmax));
1095   }
1096   CHKERRQ(PetscPrintf(comm,"\n"));
1097   CHKERRQ(DMCompositeRestoreAccess(pack,X,&X3,&X2));
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info,PetscInt i,PetscInt j,PetscInt k)
1102 {return ((i-info->gzs)*info->gym + (j-info->gys))*info->gxm + (k-info->gxs);}
1103 static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info,PetscInt i,PetscInt j)
1104 {return (i-info->gzs)*info->gym + (j-info->gys);}
1105 
1106 static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,Mat B,Mat Bcpl,THI thi)
1107 {
1108   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1109   PetscReal      hx,hy;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBeginUser;
1113   xs = info->zs;
1114   ys = info->ys;
1115   xm = info->zm;
1116   ym = info->ym;
1117   zm = info->xm;
1118   hx = thi->Lx / info->mz;
1119   hy = thi->Ly / info->my;
1120 
1121   for (i=xs; i<xs+xm; i++) {
1122     for (j=ys; j<ys+ym; j++) {
1123       PrmNode pn[4],dpn[4][2];
1124       QuadExtract(prm,i,j,pn);
1125       CHKERRQ(QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn));
1126       for (k=0; k<zm-1; k++) {
1127         Node        n[8];
1128         PetscReal   zn[8],etabase = 0;
1129         PetscScalar Ke[8*NODE_SIZE][8*NODE_SIZE],Kcpl[8*NODE_SIZE][4*PRMNODE_SIZE];
1130         PetscInt    ls = 0;
1131 
1132         PrmHexGetZ(pn,k,zm,zn);
1133         HexExtract(x,i,j,k,n);
1134         CHKERRQ(PetscMemzero(Ke,sizeof(Ke)));
1135         CHKERRQ(PetscMemzero(Kcpl,sizeof(Kcpl)));
1136         if (thi->no_slip && k == 0) {
1137           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1138           ls = 4;
1139         }
1140         for (q=0; q<8; q++) {
1141           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
1142           PetscScalar du[3],dv[3],u,v;
1143           HexGrad(HexQDeriv[q],zn,dz);
1144           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1145           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1146           jw /= thi->rhog;      /* residuals are scaled by this factor */
1147           if (q == 0) etabase = eta;
1148           for (l=ls; l<8; l++) { /* test functions */
1149             const PetscReal pp=phi[l],*restrict dp = dphi[l];
1150             for (ll=ls; ll<8; ll++) { /* trial functions */
1151               const PetscReal *restrict dpl = dphi[ll];
1152               PetscScalar dgdu,dgdv;
1153               dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1154               dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1155               /* Picard part */
1156               Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1157               Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1158               Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1159               Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1160               /* extra Newton terms */
1161               Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1162               Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1163               Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1164               Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1165               /* inertial part */
1166               Ke[l*2+0][ll*2+0] += pp*jw*thi->inertia*pp;
1167               Ke[l*2+1][ll*2+1] += pp*jw*thi->inertia*pp;
1168             }
1169             for (ll=0; ll<4; ll++) { /* Trial functions for surface/bed */
1170               const PetscReal dpl[] = {QuadQDeriv[q%4][ll][0]/hx, QuadQDeriv[q%4][ll][1]/hy}; /* surface = h + b */
1171               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[0];
1172               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[0];
1173               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[1];
1174               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[1];
1175             }
1176           }
1177         }
1178         if (k == 0) { /* on a bottom face */
1179           if (thi->no_slip) {
1180             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1);
1181             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1182             Ke[0][0] = thi->dirichlet_scale*diagu;
1183             Ke[0][1] = 0;
1184             Ke[1][0] = 0;
1185             Ke[1][1] = thi->dirichlet_scale*diagv;
1186           } else {
1187             for (q=0; q<4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1188               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
1189               PetscScalar     u  =0,v=0,rbeta2=0;
1190               PetscReal       beta2,dbeta2;
1191               for (l=0; l<4; l++) {
1192                 u      += phi[l]*n[l].u;
1193                 v      += phi[l]*n[l].v;
1194                 rbeta2 += phi[l]*pn[l].beta2;
1195               }
1196               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1197               for (l=0; l<4; l++) {
1198                 const PetscReal pp = phi[l];
1199                 for (ll=0; ll<4; ll++) {
1200                   const PetscReal ppl = phi[ll];
1201                   Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1202                   Ke[l*2+0][ll*2+1] +=                   pp*jw*dbeta2*u*v*ppl;
1203                   Ke[l*2+1][ll*2+0] +=                   pp*jw*dbeta2*v*u*ppl;
1204                   Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1205                 }
1206               }
1207             }
1208           }
1209         }
1210         {
1211           const PetscInt rc3blocked[8] = {
1212             DMDALocalIndex3D(info,i+0,j+0,k+0),
1213             DMDALocalIndex3D(info,i+1,j+0,k+0),
1214             DMDALocalIndex3D(info,i+1,j+1,k+0),
1215             DMDALocalIndex3D(info,i+0,j+1,k+0),
1216             DMDALocalIndex3D(info,i+0,j+0,k+1),
1217             DMDALocalIndex3D(info,i+1,j+0,k+1),
1218             DMDALocalIndex3D(info,i+1,j+1,k+1),
1219             DMDALocalIndex3D(info,i+0,j+1,k+1)
1220           },col2blocked[PRMNODE_SIZE*4] = {
1221             DMDALocalIndex2D(info,i+0,j+0),
1222             DMDALocalIndex2D(info,i+1,j+0),
1223             DMDALocalIndex2D(info,i+1,j+1),
1224             DMDALocalIndex2D(info,i+0,j+1)
1225           };
1226 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1227           for (l=0; l<8; l++) {
1228             for (ll=l+1; ll<8; ll++) {
1229               Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1230               Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1231               Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1232               Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1233             }
1234           }
1235 #endif
1236           CHKERRQ(MatSetValuesBlockedLocal(B,8,rc3blocked,8,rc3blocked,&Ke[0][0],ADD_VALUES)); /* velocity-velocity coupling can use blocked insertion */
1237           {                     /* The off-diagonal part cannot (yet) */
1238             PetscInt row3scalar[NODE_SIZE*8],col2scalar[PRMNODE_SIZE*4];
1239             for (l=0; l<8; l++) for (ll=0; ll<NODE_SIZE; ll++) row3scalar[l*NODE_SIZE+ll] = rc3blocked[l]*NODE_SIZE+ll;
1240             for (l=0; l<4; l++) for (ll=0; ll<PRMNODE_SIZE; ll++) col2scalar[l*PRMNODE_SIZE+ll] = col2blocked[l]*PRMNODE_SIZE+ll;
1241             CHKERRQ(MatSetValuesLocal(Bcpl,8*NODE_SIZE,row3scalar,4*PRMNODE_SIZE,col2scalar,&Kcpl[0][0],ADD_VALUES));
1242           }
1243         }
1244       }
1245     }
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,const Node ***x3,const PrmNode **x2,const PrmNode **xdot2,PetscReal a,Mat B22,Mat B21,THI thi)
1251 {
1252   PetscErrorCode ierr;
1253   PetscInt       xs,ys,xm,ym,zm,i,j,k;
1254 
1255   PetscFunctionBeginUser;
1256   xs = info->zs;
1257   ys = info->ys;
1258   xm = info->zm;
1259   ym = info->ym;
1260   zm = info->xm;
1261 
1262   PetscCheck(zm <= 1024,((PetscObject)info->da)->comm,PETSC_ERR_SUP,"Need to allocate more space");
1263   for (i=xs; i<xs+xm; i++) {
1264     for (j=ys; j<ys+ym; j++) {
1265       {                         /* Self-coupling */
1266         const PetscInt    row[]  = {DMDALocalIndex2D(info,i,j)};
1267         const PetscInt    col[]  = {DMDALocalIndex2D(info,i,j)};
1268         const PetscScalar vals[] = {
1269           a,0,0,
1270           0,a,0,
1271           0,0,a
1272         };
1273         CHKERRQ(MatSetValuesBlockedLocal(B22,1,row,1,col,vals,INSERT_VALUES));
1274       }
1275       for (k=0; k<zm; k++) {    /* Coupling to velocity problem */
1276         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1277         const PetscInt row[]  = {FieldIndex(PrmNode,DMDALocalIndex2D(info,i,j),h)};
1278         const PetscInt cols[] = {
1279           FieldIndex(Node,DMDALocalIndex3D(info,i-1,j,k),u),
1280           FieldIndex(Node,DMDALocalIndex3D(info,i  ,j,k),u),
1281           FieldIndex(Node,DMDALocalIndex3D(info,i+1,j,k),u),
1282           FieldIndex(Node,DMDALocalIndex3D(info,i,j-1,k),v),
1283           FieldIndex(Node,DMDALocalIndex3D(info,i,j  ,k),v),
1284           FieldIndex(Node,DMDALocalIndex3D(info,i,j+1,k),v)
1285         };
1286         const PetscScalar
1287           w  = (k && k<zm-1) ? 0.5 : 0.25,
1288           hW = w*(x2[i-1][j  ].h+x2[i  ][j  ].h)/(zm-1.),
1289           hE = w*(x2[i  ][j  ].h+x2[i+1][j  ].h)/(zm-1.),
1290           hS = w*(x2[i  ][j-1].h+x2[i  ][j  ].h)/(zm-1.),
1291           hN = w*(x2[i  ][j  ].h+x2[i  ][j+1].h)/(zm-1.);
1292         PetscScalar *vals,
1293                      vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0),
1294                                       ((PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW),
1295                                       ((PetscRealPart(x3[i][j][k].u) > 0) ?  0  : +hE),
1296                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0),
1297                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS),
1298                                       ((PetscRealPart(x3[i][j][k].v) > 0) ?  0  : +hN)},
1299                      vals_centered[] = {-0.5*hW, 0.5*(-hW+hE), 0.5*hE,
1300                                         -0.5*hS, 0.5*(-hS+hN), 0.5*hN};
1301         vals = 1 ? vals_upwind : vals_centered;
1302         if (k == 0) {
1303           Node derate;
1304           THIErosion(thi,&x3[i][j][0],NULL,&derate);
1305           vals[1] -= derate.u;
1306           vals[4] -= derate.v;
1307         }
1308         CHKERRQ(MatSetValuesLocal(B21,1,row,6,cols,vals,INSERT_VALUES));
1309       }
1310     }
1311   }
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 static PetscErrorCode THIJacobian(TS ts,PetscReal t,Vec X,Vec Xdot,PetscReal a,Mat A,Mat B,void *ctx)
1316 {
1317   PetscErrorCode ierr;
1318   THI            thi = (THI)ctx;
1319   DM             pack,da3,da2;
1320   Vec            X3,X2,Xdot2;
1321   Mat            B11,B12,B21,B22;
1322   DMDALocalInfo  info3;
1323   IS             *isloc;
1324   const Node     ***x3;
1325   const PrmNode  **x2,**xdot2;
1326 
1327   PetscFunctionBeginUser;
1328   CHKERRQ(TSGetDM(ts,&pack));
1329   CHKERRQ(DMCompositeGetEntries(pack,&da3,&da2));
1330   CHKERRQ(DMDAGetLocalInfo(da3,&info3));
1331   CHKERRQ(DMCompositeGetLocalVectors(pack,&X3,&X2));
1332   CHKERRQ(DMCompositeGetLocalVectors(pack,NULL,&Xdot2));
1333   CHKERRQ(DMCompositeScatter(pack,X,X3,X2));
1334   CHKERRQ(THIFixGhosts(thi,da3,da2,X3,X2));
1335   CHKERRQ(DMCompositeScatter(pack,Xdot,NULL,Xdot2));
1336 
1337   CHKERRQ(MatZeroEntries(B));
1338 
1339   CHKERRQ(DMCompositeGetLocalISs(pack,&isloc));
1340   CHKERRQ(MatGetLocalSubMatrix(B,isloc[0],isloc[0],&B11));
1341   CHKERRQ(MatGetLocalSubMatrix(B,isloc[0],isloc[1],&B12));
1342   CHKERRQ(MatGetLocalSubMatrix(B,isloc[1],isloc[0],&B21));
1343   CHKERRQ(MatGetLocalSubMatrix(B,isloc[1],isloc[1],&B22));
1344 
1345   CHKERRQ(DMDAVecGetArray(da3,X3,&x3));
1346   CHKERRQ(DMDAVecGetArray(da2,X2,&x2));
1347   CHKERRQ(DMDAVecGetArray(da2,Xdot2,&xdot2));
1348 
1349   CHKERRQ(THIJacobianLocal_Momentum(&info3,x3,x2,B11,B12,thi));
1350 
1351   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1352   CHKERRQ(MatAssemblyBegin(B,MAT_FLUSH_ASSEMBLY));
1353   CHKERRQ(MatAssemblyEnd(B,MAT_FLUSH_ASSEMBLY));
1354 
1355   CHKERRQ(THIJacobianLocal_2D(&info3,x3,x2,xdot2,a,B22,B21,thi));
1356 
1357   CHKERRQ(DMDAVecRestoreArray(da3,X3,&x3));
1358   CHKERRQ(DMDAVecRestoreArray(da2,X2,&x2));
1359   CHKERRQ(DMDAVecRestoreArray(da2,Xdot2,&xdot2));
1360 
1361   CHKERRQ(MatRestoreLocalSubMatrix(B,isloc[0],isloc[0],&B11));
1362   CHKERRQ(MatRestoreLocalSubMatrix(B,isloc[0],isloc[1],&B12));
1363   CHKERRQ(MatRestoreLocalSubMatrix(B,isloc[1],isloc[0],&B21));
1364   CHKERRQ(MatRestoreLocalSubMatrix(B,isloc[1],isloc[1],&B22));
1365   CHKERRQ(ISDestroy(&isloc[0]));
1366   CHKERRQ(ISDestroy(&isloc[1]));
1367   CHKERRQ(PetscFree(isloc));
1368 
1369   CHKERRQ(DMCompositeRestoreLocalVectors(pack,&X3,&X2));
1370   CHKERRQ(DMCompositeRestoreLocalVectors(pack,0,&Xdot2));
1371 
1372   CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1373   CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1374   if (A != B) {
1375     CHKERRQ(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1376     CHKERRQ(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1377   }
1378   if (thi->verbose) CHKERRQ(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD));
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1383  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1384  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1385  */
1386 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM pack,Vec X,const char filename[],const char filename2[])
1387 {
1388   const PetscInt dof   = NODE_SIZE,dof2 = PRMNODE_SIZE;
1389   Units          units = thi->units;
1390   MPI_Comm       comm;
1391   PetscErrorCode ierr;
1392   PetscViewer    viewer3,viewer2;
1393   PetscMPIInt    rank,size,tag,nn,nmax,nn2,nmax2;
1394   PetscInt       mx,my,mz,r,range[6];
1395   PetscScalar    *x,*x2;
1396   DM             da3,da2;
1397   Vec            X3,X2;
1398 
1399   PetscFunctionBeginUser;
1400   CHKERRQ(PetscObjectGetComm((PetscObject)thi,&comm));
1401   CHKERRQ(DMCompositeGetEntries(pack,&da3,&da2));
1402   CHKERRQ(DMCompositeGetAccess(pack,X,&X3,&X2));
1403   CHKERRQ(DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0));
1404   CHKERRMPI(MPI_Comm_size(comm,&size));
1405   CHKERRMPI(MPI_Comm_rank(comm,&rank));
1406   CHKERRQ(PetscViewerASCIIOpen(comm,filename,&viewer3));
1407   CHKERRQ(PetscViewerASCIIOpen(comm,filename2,&viewer2));
1408   CHKERRQ(PetscViewerASCIIPrintf(viewer3,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1409   CHKERRQ(PetscViewerASCIIPrintf(viewer2,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1410   CHKERRQ(PetscViewerASCIIPrintf(viewer3,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,mz-1,0,my-1,0,mx-1));
1411   CHKERRQ(PetscViewerASCIIPrintf(viewer2,"  <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,0,0,my-1,0,mx-1));
1412 
1413   CHKERRQ(DMDAGetCorners(da3,range,range+1,range+2,range+3,range+4,range+5));
1414   CHKERRQ(PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn));
1415   CHKERRMPI(MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm));
1416   CHKERRQ(PetscMPIIntCast(range[4]*range[5]*dof2,&nn2));
1417   CHKERRMPI(MPI_Reduce(&nn2,&nmax2,1,MPI_INT,MPI_MAX,0,comm));
1418   tag  = ((PetscObject)viewer3)->tag;
1419   CHKERRQ(VecGetArrayRead(X3,(const PetscScalar**)&x));
1420   CHKERRQ(VecGetArrayRead(X2,(const PetscScalar**)&x2));
1421   if (rank == 0) {
1422     PetscScalar *array,*array2;
1423     CHKERRQ(PetscMalloc2(nmax,&array,nmax2,&array2));
1424     for (r=0; r<size; r++) {
1425       PetscInt    i,j,k,f,xs,xm,ys,ym,zs,zm;
1426       Node        *y3;
1427       PetscScalar (*y2)[PRMNODE_SIZE];
1428       MPI_Status status;
1429 
1430       if (r) {
1431         CHKERRMPI(MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE));
1432       }
1433       zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1434       PetscCheck(xm*ym*zm*dof <= nmax,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen");
1435       if (r) {
1436         CHKERRMPI(MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status));
1437         CHKERRMPI(MPI_Get_count(&status,MPIU_SCALAR,&nn));
1438         PetscCheck(nn == xm*ym*zm*dof,PETSC_COMM_SELF,PETSC_ERR_PLIB,"corrupt da3 send");
1439         y3   = (Node*)array;
1440         CHKERRMPI(MPI_Recv(array2,nmax2,MPIU_SCALAR,r,tag,comm,&status));
1441         CHKERRMPI(MPI_Get_count(&status,MPIU_SCALAR,&nn2));
1442         PetscCheck(nn2 == xm*ym*dof2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"corrupt da2 send");
1443         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1444       } else {
1445         y3 = (Node*)x;
1446         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1447       }
1448       CHKERRQ(PetscViewerASCIIPrintf(viewer3,"    <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1));
1449       CHKERRQ(PetscViewerASCIIPrintf(viewer2,"    <Piece Extent=\"%d %d %D %D %D %D\">\n",0,0,ys,ys+ym-1,xs,xs+xm-1));
1450 
1451       CHKERRQ(PetscViewerASCIIPrintf(viewer3,"      <Points>\n"));
1452       CHKERRQ(PetscViewerASCIIPrintf(viewer2,"      <Points>\n"));
1453       CHKERRQ(PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1454       CHKERRQ(PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1455       for (i=xs; i<xs+xm; i++) {
1456         for (j=ys; j<ys+ym; j++) {
1457           PetscReal
1458             xx = thi->Lx*i/mx,
1459             yy = thi->Ly*j/my,
1460             b  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,b)]),
1461             h  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,h)]);
1462           for (k=zs; k<zs+zm; k++) {
1463             PetscReal zz = b + h*k/(mz-1.);
1464             CHKERRQ(PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",xx,yy,zz));
1465           }
1466           CHKERRQ(PetscViewerASCIIPrintf(viewer2,"%f %f %f\n",xx,yy,(double)0.0));
1467         }
1468       }
1469       CHKERRQ(PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n"));
1470       CHKERRQ(PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n"));
1471       CHKERRQ(PetscViewerASCIIPrintf(viewer3,"      </Points>\n"));
1472       CHKERRQ(PetscViewerASCIIPrintf(viewer2,"      </Points>\n"));
1473 
1474       {                         /* Velocity and rank (3D) */
1475         CHKERRQ(PetscViewerASCIIPrintf(viewer3,"      <PointData>\n"));
1476         CHKERRQ(PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1477         for (i=0; i<nn/dof; i++) {
1478           CHKERRQ(PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",PetscRealPart(y3[i].u)*units->year/units->meter,PetscRealPart(y3[i].v)*units->year/units->meter,0.0));
1479         }
1480         CHKERRQ(PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n"));
1481 
1482         CHKERRQ(PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1483         for (i=0; i<nn; i+=dof) {
1484           CHKERRQ(PetscViewerASCIIPrintf(viewer3,"%D\n",r));
1485         }
1486         CHKERRQ(PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n"));
1487         CHKERRQ(PetscViewerASCIIPrintf(viewer3,"      </PointData>\n"));
1488       }
1489 
1490       {                         /* 2D */
1491         CHKERRQ(PetscViewerASCIIPrintf(viewer2,"      <PointData>\n"));
1492         for (f=0; f<PRMNODE_SIZE; f++) {
1493           const char *fieldname;
1494           CHKERRQ(DMDAGetFieldName(da2,f,&fieldname));
1495           CHKERRQ(PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n",fieldname));
1496           for (i=0; i<nn2/PRMNODE_SIZE; i++) {
1497             CHKERRQ(PetscViewerASCIIPrintf(viewer2,"%g\n",y2[i][f]));
1498           }
1499           CHKERRQ(PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n"));
1500         }
1501         CHKERRQ(PetscViewerASCIIPrintf(viewer2,"      </PointData>\n"));
1502       }
1503 
1504       CHKERRQ(PetscViewerASCIIPrintf(viewer3,"    </Piece>\n"));
1505       CHKERRQ(PetscViewerASCIIPrintf(viewer2,"    </Piece>\n"));
1506     }
1507     CHKERRQ(PetscFree2(array,array2));
1508   } else {
1509     CHKERRMPI(MPI_Send(range,6,MPIU_INT,0,tag,comm));
1510     CHKERRMPI(MPI_Send(x,nn,MPIU_SCALAR,0,tag,comm));
1511     CHKERRMPI(MPI_Send(x2,nn2,MPIU_SCALAR,0,tag,comm));
1512   }
1513   CHKERRQ(VecRestoreArrayRead(X3,(const PetscScalar**)&x));
1514   CHKERRQ(VecRestoreArrayRead(X2,(const PetscScalar**)&x2));
1515   CHKERRQ(PetscViewerASCIIPrintf(viewer3,"  </StructuredGrid>\n"));
1516   CHKERRQ(PetscViewerASCIIPrintf(viewer2,"  </StructuredGrid>\n"));
1517 
1518   CHKERRQ(DMCompositeRestoreAccess(pack,X,&X3,&X2));
1519   CHKERRQ(PetscViewerASCIIPrintf(viewer3,"</VTKFile>\n"));
1520   CHKERRQ(PetscViewerASCIIPrintf(viewer2,"</VTKFile>\n"));
1521   CHKERRQ(PetscViewerDestroy(&viewer3));
1522   CHKERRQ(PetscViewerDestroy(&viewer2));
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 static PetscErrorCode THITSMonitor(TS ts,PetscInt step,PetscReal t,Vec X,void *ctx)
1527 {
1528   PetscErrorCode ierr;
1529   THI            thi = (THI)ctx;
1530   DM             pack;
1531   char           filename3[PETSC_MAX_PATH_LEN],filename2[PETSC_MAX_PATH_LEN];
1532 
1533   PetscFunctionBeginUser;
1534   if (step < 0) PetscFunctionReturn(0); /* negative one is used to indicate an interpolated solution */
1535   CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)ts),"%3D: t=%g\n",step,(double)t));
1536   if (thi->monitor_interval && step % thi->monitor_interval) PetscFunctionReturn(0);
1537   CHKERRQ(TSGetDM(ts,&pack));
1538   CHKERRQ(PetscSNPrintf(filename3,sizeof(filename3),"%s-3d-%03d.vts",thi->monitor_basename,step));
1539   CHKERRQ(PetscSNPrintf(filename2,sizeof(filename2),"%s-2d-%03d.vts",thi->monitor_basename,step));
1540   CHKERRQ(THIDAVecView_VTK_XML(thi,pack,X,filename3,filename2));
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 static PetscErrorCode THICreateDM3d(THI thi,DM *dm3d)
1545 {
1546   MPI_Comm       comm;
1547   PetscInt       M    = 3,N = 3,P = 2;
1548   DM             da;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBeginUser;
1552   CHKERRQ(PetscObjectGetComm((PetscObject)thi,&comm));
1553   ierr = PetscOptionsBegin(comm,NULL,"Grid resolution options","");CHKERRQ(ierr);
1554   {
1555     CHKERRQ(PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL));
1556     N    = M;
1557     CHKERRQ(PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL));
1558     CHKERRQ(PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL));
1559   }
1560   ierr  = PetscOptionsEnd();CHKERRQ(ierr);
1561   CHKERRQ(DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da));
1562   CHKERRQ(DMSetFromOptions(da));
1563   CHKERRQ(DMSetUp(da));
1564   CHKERRQ(DMDASetFieldName(da,0,"x-velocity"));
1565   CHKERRQ(DMDASetFieldName(da,1,"y-velocity"));
1566   *dm3d = da;
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 int main(int argc,char *argv[])
1571 {
1572   MPI_Comm       comm;
1573   DM             pack,da3,da2;
1574   TS             ts;
1575   THI            thi;
1576   Vec            X;
1577   Mat            B;
1578   PetscInt       i,steps;
1579   PetscReal      ftime;
1580   PetscErrorCode ierr;
1581 
1582   ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr;
1583   comm = PETSC_COMM_WORLD;
1584 
1585   CHKERRQ(THICreate(comm,&thi));
1586   CHKERRQ(THICreateDM3d(thi,&da3));
1587   {
1588     PetscInt        Mx,My,mx,my,s;
1589     DMDAStencilType st;
1590     CHKERRQ(DMDAGetInfo(da3,0, 0,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st));
1591     CHKERRQ(DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2));
1592     CHKERRQ(DMSetUp(da2));
1593   }
1594 
1595   CHKERRQ(PetscObjectSetName((PetscObject)da3,"3D_Velocity"));
1596   CHKERRQ(DMSetOptionsPrefix(da3,"f3d_"));
1597   CHKERRQ(DMDASetFieldName(da3,0,"u"));
1598   CHKERRQ(DMDASetFieldName(da3,1,"v"));
1599   CHKERRQ(PetscObjectSetName((PetscObject)da2,"2D_Fields"));
1600   CHKERRQ(DMSetOptionsPrefix(da2,"f2d_"));
1601   CHKERRQ(DMDASetFieldName(da2,0,"b"));
1602   CHKERRQ(DMDASetFieldName(da2,1,"h"));
1603   CHKERRQ(DMDASetFieldName(da2,2,"beta2"));
1604   CHKERRQ(DMCompositeCreate(comm,&pack));
1605   CHKERRQ(DMCompositeAddDM(pack,da3));
1606   CHKERRQ(DMCompositeAddDM(pack,da2));
1607   CHKERRQ(DMDestroy(&da3));
1608   CHKERRQ(DMDestroy(&da2));
1609   CHKERRQ(DMSetUp(pack));
1610   CHKERRQ(DMCreateMatrix(pack,&B));
1611   CHKERRQ(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
1612   CHKERRQ(MatSetOptionsPrefix(B,"thi_"));
1613 
1614   for (i=0; i<thi->nlevels; i++) {
1615     PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
1616     PetscInt  Mx,My,Mz;
1617     CHKERRQ(DMCompositeGetEntries(pack,&da3,&da2));
1618     CHKERRQ(DMDAGetInfo(da3,0, &Mz,&My,&Mx, 0,0,0, 0,0,0,0,0,0));
1619     CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n",i,Lx,Ly,Lz,Mx,My,Mz,Mx*My*Mz,Lx/Mx,Ly/My,1000./(Mz-1)));
1620   }
1621 
1622   CHKERRQ(DMCreateGlobalVector(pack,&X));
1623   CHKERRQ(THIInitial(thi,pack,X));
1624 
1625   CHKERRQ(TSCreate(comm,&ts));
1626   CHKERRQ(TSSetDM(ts,pack));
1627   CHKERRQ(TSSetProblemType(ts,TS_NONLINEAR));
1628   CHKERRQ(TSMonitorSet(ts,THITSMonitor,thi,NULL));
1629   CHKERRQ(TSSetType(ts,TSTHETA));
1630   CHKERRQ(TSSetIFunction(ts,NULL,THIFunction,thi));
1631   CHKERRQ(TSSetIJacobian(ts,B,B,THIJacobian,thi));
1632   CHKERRQ(TSSetMaxTime(ts,10.0));
1633   CHKERRQ(TSSetExactFinalTime(ts,TS_EXACTFINALTIME_STEPOVER));
1634   CHKERRQ(TSSetSolution(ts,X));
1635   CHKERRQ(TSSetTimeStep(ts,1e-3));
1636   CHKERRQ(TSSetFromOptions(ts));
1637 
1638   CHKERRQ(TSSolve(ts,X));
1639   CHKERRQ(TSGetSolveTime(ts,&ftime));
1640   CHKERRQ(TSGetStepNumber(ts,&steps));
1641   CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Steps %D  final time %g\n",steps,(double)ftime));
1642 
1643   if (0) CHKERRQ(THISolveStatistics(thi,ts,0,"Full"));
1644 
1645   {
1646     PetscBool flg;
1647     char      filename[PETSC_MAX_PATH_LEN] = "";
1648     CHKERRQ(PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg));
1649     if (flg) {
1650       CHKERRQ(THIDAVecView_VTK_XML(thi,pack,X,filename,NULL));
1651     }
1652   }
1653 
1654   CHKERRQ(VecDestroy(&X));
1655   CHKERRQ(MatDestroy(&B));
1656   CHKERRQ(DMDestroy(&pack));
1657   CHKERRQ(TSDestroy(&ts));
1658   CHKERRQ(THIDestroy(&thi));
1659   ierr = PetscFinalize();
1660   return ierr;
1661 }
1662