xref: /petsc/src/ts/tutorials/ex14.c (revision b94d7ded0a05f1bbd5e48daa6f92b28259c75b44)
1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2 \n\
3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4 using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5 to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
6 boundary conditions in the x- and y-directions.\n\
7 \n\
8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10 \n\
11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12 \n\n";
13 
14 /*
15 The equations for horizontal velocity (u,v) are
16 
17   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19 
20 where
21 
22   eta = B/2 (epsilon + gamma)^((p-2)/2)
23 
24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25 written in terms of the second invariant
26 
27   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28 
29 The surface boundary conditions are the natural conditions.  The basal boundary conditions
30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31 
32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33 
34 The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
35 map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
36 the Jacobian of the coordinate transformation from a reference element to the physical element.
37 
38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39 specially so that columns are never distributed, and are always contiguous in memory.
40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41 and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
42 use compatible domain decomposition relative to the 3D DMDAs.
43 
44 */
45 
46 #include <petscts.h>
47 #include <petscdm.h>
48 #include <petscdmda.h>
49 #include <petscdmcomposite.h>
50 #include <ctype.h>              /* toupper() */
51 #include <petsc/private/petscimpl.h>
52 
53 #if defined __SSE2__
54 #  include <emmintrin.h>
55 #endif
56 
57 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
58 #define USE_SSE2_KERNELS (!defined NO_SSE2                              \
59                           && !defined PETSC_USE_COMPLEX                 \
60                           && !defined PETSC_USE_REAL_SINGLE           \
61                           && defined __SSE2__)
62 
63 #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
64 #  if defined __cplusplus       /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
65 #    define restrict
66 #  else
67 #    define restrict PETSC_RESTRICT
68 #  endif
69 #endif
70 
71 static PetscClassId THI_CLASSID;
72 
73 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
74 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
75 static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
76 static const PetscReal HexQNodes[]    = {-0.57735026918962573, 0.57735026918962573};
77 #define G 0.57735026918962573
78 #define H (0.5*(1.+G))
79 #define L (0.5*(1.-G))
80 #define M (-0.5)
81 #define P (0.5)
82 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
83 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
84                                                    {0,H,0,0,0,L,0,0},
85                                                    {0,0,H,0,0,0,L,0},
86                                                    {0,0,0,H,0,0,0,L},
87                                                    {L,0,0,0,H,0,0,0},
88                                                    {0,L,0,0,0,H,0,0},
89                                                    {0,0,L,0,0,0,H,0},
90                                                    {0,0,0,L,0,0,0,H}};
91 static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
92   {{M*H,M*H,M},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  ,{M*L,M*L,P},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  },
93   {{M*H,0,0}  ,{P*H,M*H,M},{0,P*H,0}  ,{0,0,0}    ,{M*L,0,0}  ,{P*L,M*L,P},{0,P*L,0}  ,{0,0,0}    },
94   {{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,M},{M*H,0,0}  ,{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,P},{M*L,0,0}  },
95   {{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,M},{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,P}},
96   {{M*L,M*L,M},{P*L,0,0}  ,{0,0,0}    ,{0,P*L,0}  ,{M*H,M*H,P},{P*H,0,0}  ,{0,0,0}    ,{0,P*H,0}  },
97   {{M*L,0,0}  ,{P*L,M*L,M},{0,P*L,0}  ,{0,0,0}    ,{M*H,0,0}  ,{P*H,M*H,P},{0,P*H,0}  ,{0,0,0}    },
98   {{0,0,0}    ,{0,M*L,0}  ,{P*L,P*L,M},{M*L,0,0}  ,{0,0,0}    ,{0,M*H,0}  ,{P*H,P*H,P},{M*H,0,0}  },
99   {{0,M*L,0}  ,{0,0,0}    ,{P*L,0,0}  ,{M*L,P*L,M},{0,M*H,0}  ,{0,0,0}    ,{P*H,0,0}  ,{M*H,P*H,P}}};
100 /* Stanndard Gauss */
101 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
102                                                  {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
103                                                  {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
104                                                  {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
105                                                  {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
106                                                  {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
107                                                  {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
108                                                  {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
109 static const PetscReal HexQDeriv_Gauss[8][8][3] = {
110   {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
111   {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
112   {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
113   {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
114   {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
115   {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
116   {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
117   {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
118 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
119 /* Standard 2x2 Gauss quadrature for the bottom layer. */
120 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
121                                             {L*H,H*H,H*L,L*L},
122                                             {L*L,H*L,H*H,L*H},
123                                             {H*L,L*L,L*H,H*H}};
124 static const PetscReal QuadQDeriv[4][4][2] = {
125   {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
126   {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
127   {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
128   {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
129 #undef G
130 #undef H
131 #undef L
132 #undef M
133 #undef P
134 
135 #define HexExtract(x,i,j,k,n) do {              \
136     (n)[0] = (x)[i][j][k];                      \
137     (n)[1] = (x)[i+1][j][k];                    \
138     (n)[2] = (x)[i+1][j+1][k];                  \
139     (n)[3] = (x)[i][j+1][k];                    \
140     (n)[4] = (x)[i][j][k+1];                    \
141     (n)[5] = (x)[i+1][j][k+1];                  \
142     (n)[6] = (x)[i+1][j+1][k+1];                \
143     (n)[7] = (x)[i][j+1][k+1];                  \
144   } while (0)
145 
146 #define HexExtractRef(x,i,j,k,n) do {           \
147     (n)[0] = &(x)[i][j][k];                     \
148     (n)[1] = &(x)[i+1][j][k];                   \
149     (n)[2] = &(x)[i+1][j+1][k];                 \
150     (n)[3] = &(x)[i][j+1][k];                   \
151     (n)[4] = &(x)[i][j][k+1];                   \
152     (n)[5] = &(x)[i+1][j][k+1];                 \
153     (n)[6] = &(x)[i+1][j+1][k+1];               \
154     (n)[7] = &(x)[i][j+1][k+1];                 \
155   } while (0)
156 
157 #define QuadExtract(x,i,j,n) do {               \
158     (n)[0] = (x)[i][j];                         \
159     (n)[1] = (x)[i+1][j];                       \
160     (n)[2] = (x)[i+1][j+1];                     \
161     (n)[3] = (x)[i][j+1];                       \
162   } while (0)
163 
164 static PetscScalar Sqr(PetscScalar a) {return a*a;}
165 
166 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
167 {
168   PetscInt i;
169   dz[0] = dz[1] = dz[2] = 0;
170   for (i=0; i<8; i++) {
171     dz[0] += dphi[i][0] * zn[i];
172     dz[1] += dphi[i][1] * zn[i];
173     dz[2] += dphi[i][2] * zn[i];
174   }
175 }
176 
177 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[restrict],PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscReal *restrict jw)
178 {
179   const PetscReal
180     jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}
181   ,ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}
182   ,jdet = jac[0][0]*jac[1][1]*jac[2][2];
183   PetscInt i;
184 
185   for (i=0; i<8; i++) {
186     const PetscReal *dphir = HexQDeriv[q][i];
187     phi[i] = HexQInterp[q][i];
188     dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
189     dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
190     dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
191   }
192   *jw = 1.0 * jdet;
193 }
194 
195 typedef struct _p_THI   *THI;
196 typedef struct _n_Units *Units;
197 
198 typedef struct {
199   PetscScalar u,v;
200 } Node;
201 
202 typedef struct {
203   PetscScalar b;                /* bed */
204   PetscScalar h;                /* thickness */
205   PetscScalar beta2;            /* friction */
206 } PrmNode;
207 
208 #define FieldSize(ntype) ((PetscInt)(sizeof(ntype)/sizeof(PetscScalar)))
209 #define FieldOffset(ntype,member) ((PetscInt)(offsetof(ntype,member)/sizeof(PetscScalar)))
210 #define FieldIndex(ntype,i,member) ((PetscInt)((i)*FieldSize(ntype) + FieldOffset(ntype,member)))
211 #define NODE_SIZE FieldSize(Node)
212 #define PRMNODE_SIZE FieldSize(PrmNode)
213 
214 typedef struct {
215   PetscReal min,max,cmin,cmax;
216 } PRange;
217 
218 struct _p_THI {
219   PETSCHEADER(int);
220   void      (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
221   PetscInt  nlevels;
222   PetscInt  zlevels;
223   PetscReal Lx,Ly,Lz;           /* Model domain */
224   PetscReal alpha;              /* Bed angle */
225   Units     units;
226   PetscReal dirichlet_scale;
227   PetscReal ssa_friction_scale;
228   PetscReal inertia;
229   PRange    eta;
230   PRange    beta2;
231   struct {
232     PetscReal Bd2,eps,exponent,glen_n;
233   } viscosity;
234   struct {
235     PetscReal irefgam,eps2,exponent;
236   } friction;
237   struct {
238     PetscReal rate,exponent,refvel;
239   } erosion;
240   PetscReal rhog;
241   PetscBool no_slip;
242   PetscBool verbose;
243   char      *mattype;
244   char      *monitor_basename;
245   PetscInt  monitor_interval;
246 };
247 
248 struct _n_Units {
249   /* fundamental */
250   PetscReal meter;
251   PetscReal kilogram;
252   PetscReal second;
253   /* derived */
254   PetscReal Pascal;
255   PetscReal year;
256 };
257 
258 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
259 {
260   const PetscScalar zm1 = zm-1,
261     znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
262               pn[1].b + pn[1].h*(PetscScalar)k/zm1,
263               pn[2].b + pn[2].h*(PetscScalar)k/zm1,
264               pn[3].b + pn[3].h*(PetscScalar)k/zm1,
265               pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
266               pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
267               pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
268               pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
269   PetscInt i;
270   for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
271 }
272 
273 /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
274 static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2],PetscReal hx,PetscReal hy,const PrmNode pn[4],PrmNode dp[4][2])
275 {
276   PetscInt       q,i,f;
277   const PetscScalar (*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
278   PetscScalar (*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;
279 
280   PetscFunctionBeginUser;
281   PetscCall(PetscArrayzero(dpg,4));
282   for (q=0; q<4; q++) {
283     for (i=0; i<4; i++) {
284       for (f=0; f<PRMNODE_SIZE; f++) {
285         dpg[q][0][f] += dphi[q][i][0]/hx * pg[i][f];
286         dpg[q][1][f] += dphi[q][i][1]/hy * pg[i][f];
287       }
288     }
289   }
290   PetscFunctionReturn(0);
291 }
292 
293 static inline PetscReal StaggeredMidpoint2D(PetscScalar a,PetscScalar b,PetscScalar c,PetscScalar d)
294 {return 0.5*PetscRealPart(0.75*a + 0.75*b + 0.25*c + 0.25*d);}
295 static inline PetscReal UpwindFlux1D(PetscReal u,PetscReal hL,PetscReal hR)
296 {return (u > 0) ? hL*u : hR*u;}
297 
298 #define UpwindFluxXW(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i-1][j][k].u, x3[i-1][j+dj][k].u,x3[i][k+dj][k].u), \
299                                                     PetscRealPart(0.75*x2[i-1][j  ].h+0.25*x2[i-1][j+dj].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h))
300 #define UpwindFluxXE(x3,x2,h,i,j,k,dj) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u,x3[i+1][j][k].u, x3[i+1][j+dj][k].u,x3[i][k+dj][k].u), \
301                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i  ][j+dj].h), PetscRealPart(0.75*x2[i+1][j  ].h+0.25*x2[i+1][j+dj].h))
302 #define UpwindFluxYS(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j-1][k].v, x3[i+di][j-1][k].v,x3[i+di][j][k].v), \
303                                                     PetscRealPart(0.75*x2[i  ][j-1].h+0.25*x2[i+di][j-1].h), PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h))
304 #define UpwindFluxYN(x3,x2,h,i,j,k,di) UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v,x3[i][j+1][k].v, x3[i+di][j+1][k].v,x3[i+di][j][k].v), \
305                                                     PetscRealPart(0.75*x2[i  ][j  ].h+0.25*x2[i+di][j  ].h), PetscRealPart(0.75*x2[i  ][j+1].h+0.25*x2[i+di][j+1].h))
306 
307 static void PrmNodeGetFaceMeasure(const PrmNode **p,PetscInt i,PetscInt j,PetscScalar h[])
308 {
309   /* West */
310   h[0] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j-1].h,p[i][j-1].h);
311   h[1] = StaggeredMidpoint2D(p[i][j].h,p[i-1][j].h,p[i-1][j+1].h,p[i][j+1].h);
312   /* East */
313   h[2] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j+1].h,p[i][j+1].h);
314   h[3] = StaggeredMidpoint2D(p[i][j].h,p[i+1][j].h,p[i+1][j-1].h,p[i][j-1].h);
315   /* South */
316   h[4] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i+1][j-1].h,p[i+1][j].h);
317   h[5] = StaggeredMidpoint2D(p[i][j].h,p[i][j-1].h,p[i-1][j-1].h,p[i-1][j].h);
318   /* North */
319   h[6] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i-1][j+1].h,p[i-1][j].h);
320   h[7] = StaggeredMidpoint2D(p[i][j].h,p[i][j+1].h,p[i+1][j+1].h,p[i+1][j].h);
321 }
322 
323 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
324 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
325 {
326   Units units = thi->units;
327   PetscReal s = -x*PetscSinReal(thi->alpha);
328   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly);
329   p->h = s - p->b;
330   p->beta2 = -1e-10;             /* This value is not used, but it should not be huge because that would change the finite difference step size  */
331 }
332 
333 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
334 {
335   Units units = thi->units;
336   PetscReal s = -x*PetscSinReal(thi->alpha);
337   p->b = s - 1000*units->meter;
338   p->h = s - p->b;
339   /* tau_b = beta2 v   is a stress (Pa).
340    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
341   p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
342 }
343 
344 /* These are just toys */
345 
346 /* From Fred Herman */
347 static void THIInitialize_HOM_F(THI thi,PetscReal x,PetscReal y,PrmNode *p)
348 {
349   Units units = thi->units;
350   PetscReal s = -x*PetscSinReal(thi->alpha);
351   p->b = s - 1000*units->meter + 100*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx);/* * sin(y*2*PETSC_PI/thi->Ly); */
352   p->h = s - p->b;
353   p->h = (1-(atan((x-thi->Lx/2)/1.)+PETSC_PI/2.)/PETSC_PI)*500*units->meter+1*units->meter;
354   s = PetscRealPart(p->b + p->h);
355   p->beta2 = -1e-10;
356   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
357 }
358 
359 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
360 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
361 {
362   Units units = thi->units;
363   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
364   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
365   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
366   p->h = s - p->b;
367   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
368 }
369 
370 /* Like Z, but with 200 meter cliffs */
371 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
372 {
373   Units units = thi->units;
374   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
375   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
376   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
377   if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter;
378   p->h = s - p->b;
379   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
380 }
381 
382 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
383 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
384 {
385   Units units = thi->units;
386   PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
387   PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha);
388   p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
389   p->h = s - p->b;
390   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter / thi->rhog;
391 }
392 
393 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
394 {
395   if (thi->friction.irefgam == 0) {
396     Units units = thi->units;
397     thi->friction.irefgam = 1./(0.5*PetscSqr(100 * units->meter / units->year));
398     thi->friction.eps2 = 0.5*PetscSqr(1.e-4 / thi->friction.irefgam);
399   }
400   if (thi->friction.exponent == 0) {
401     *beta2  = rbeta2;
402     *dbeta2 = 0;
403   } else {
404     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
405     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
406   }
407 }
408 
409 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
410 {
411   PetscReal Bd2,eps,exponent;
412   if (thi->viscosity.Bd2 == 0) {
413     Units units = thi->units;
414     const PetscReal
415       n = thi->viscosity.glen_n,                        /* Glen exponent */
416       p = 1. + 1./n,                                    /* for Stokes */
417       A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
418       B = PetscPowReal(A,-1./n);                                 /* hardness parameter */
419     thi->viscosity.Bd2      = B/2;
420     thi->viscosity.exponent = (p-2)/2;
421     thi->viscosity.eps      = 0.5*PetscSqr(1e-5 / units->year);
422   }
423   Bd2      = thi->viscosity.Bd2;
424   exponent = thi->viscosity.exponent;
425   eps      = thi->viscosity.eps;
426   *eta     = Bd2 * PetscPowReal(eps + gam,exponent);
427   *deta    = exponent * (*eta) / (eps + gam);
428 }
429 
430 static void THIErosion(THI thi,const Node *vel,PetscScalar *erate,Node *derate)
431 {
432   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel),
433                     rate    = -thi->erosion.rate*PetscPowScalar(magref2, 0.5*thi->erosion.exponent);
434   if (erate) *erate = rate;
435   if (derate) {
436     if (thi->erosion.exponent == 1) {
437       derate->u = 0;
438       derate->v = 0;
439     } else {
440       derate->u = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
441       derate->v = 0.5*thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
442     }
443   }
444 }
445 
446 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
447 {
448   if (x < *min) *min = x;
449   if (x > *max) *max = x;
450 }
451 
452 static void PRangeClear(PRange *p)
453 {
454   p->cmin = p->min = 1e100;
455   p->cmax = p->max = -1e100;
456 }
457 
458 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
459 {
460   PetscFunctionBeginUser;
461   p->cmin = min;
462   p->cmax = max;
463   if (min < p->min) p->min = min;
464   if (max > p->max) p->max = max;
465   PetscFunctionReturn(0);
466 }
467 
468 static PetscErrorCode THIDestroy(THI *thi)
469 {
470   PetscFunctionBeginUser;
471   if (--((PetscObject)(*thi))->refct > 0) PetscFunctionReturn(0);
472   PetscCall(PetscFree((*thi)->units));
473   PetscCall(PetscFree((*thi)->mattype));
474   PetscCall(PetscFree((*thi)->monitor_basename));
475   PetscCall(PetscHeaderDestroy(thi));
476   PetscFunctionReturn(0);
477 }
478 
479 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
480 {
481   static PetscBool registered = PETSC_FALSE;
482   THI              thi;
483   Units            units;
484   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
485   PetscErrorCode   ierr;
486 
487   PetscFunctionBeginUser;
488   *inthi = 0;
489   if (!registered) {
490     PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID));
491     registered = PETSC_TRUE;
492   }
493   PetscCall(PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","THI",comm,THIDestroy,0));
494 
495   PetscCall(PetscNew(&thi->units));
496 
497   units           = thi->units;
498   units->meter    = 1e-2;
499   units->second   = 1e-7;
500   units->kilogram = 1e-12;
501 
502   PetscOptionsBegin(comm,NULL,"Scaled units options","");
503   {
504     PetscCall(PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL));
505     PetscCall(PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL));
506     PetscCall(PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL));
507   }
508   PetscOptionsEnd();
509   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
510   units->year   = 31556926. * units->second, /* seconds per year */
511 
512   thi->Lx              = 10.e3;
513   thi->Ly              = 10.e3;
514   thi->Lz              = 1000;
515   thi->nlevels         = 1;
516   thi->dirichlet_scale = 1;
517   thi->verbose         = PETSC_FALSE;
518 
519   thi->viscosity.glen_n = 3.;
520   thi->erosion.rate     = 1e-3; /* m/a */
521   thi->erosion.exponent = 1.;
522   thi->erosion.refvel   = 1.;   /* m/a */
523 
524   PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");
525   {
526     QuadratureType quad       = QUAD_GAUSS;
527     char           homexp[]   = "A";
528     char           mtype[256] = MATSBAIJ;
529     PetscReal      L,m = 1.0;
530     PetscBool      flg;
531     L    = thi->Lx;
532     PetscCall(PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg));
533     if (flg) thi->Lx = thi->Ly = L;
534     PetscCall(PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL));
535     PetscCall(PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL));
536     PetscCall(PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL));
537     PetscCall(PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL));
538     switch (homexp[0] = toupper(homexp[0])) {
539     case 'A':
540       thi->initialize = THIInitialize_HOM_A;
541       thi->no_slip    = PETSC_TRUE;
542       thi->alpha      = 0.5;
543       break;
544     case 'C':
545       thi->initialize = THIInitialize_HOM_C;
546       thi->no_slip    = PETSC_FALSE;
547       thi->alpha      = 0.1;
548       break;
549     case 'F':
550       thi->initialize = THIInitialize_HOM_F;
551       thi->no_slip    = PETSC_FALSE;
552       thi->alpha      = 0.5;
553       break;
554     case 'X':
555       thi->initialize = THIInitialize_HOM_X;
556       thi->no_slip    = PETSC_FALSE;
557       thi->alpha      = 0.3;
558       break;
559     case 'Y':
560       thi->initialize = THIInitialize_HOM_Y;
561       thi->no_slip    = PETSC_FALSE;
562       thi->alpha      = 0.5;
563       break;
564     case 'Z':
565       thi->initialize = THIInitialize_HOM_Z;
566       thi->no_slip    = PETSC_FALSE;
567       thi->alpha      = 0.5;
568       break;
569     default:
570       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
571     }
572     PetscCall(PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL));
573     switch (quad) {
574     case QUAD_GAUSS:
575       HexQInterp = HexQInterp_Gauss;
576       HexQDeriv  = HexQDeriv_Gauss;
577       break;
578     case QUAD_LOBATTO:
579       HexQInterp = HexQInterp_Lobatto;
580       HexQDeriv  = HexQDeriv_Lobatto;
581       break;
582     }
583     PetscCall(PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL));
584     PetscCall(PetscOptionsReal("-thi_viscosity_glen_n","Exponent in Glen flow law, 1=linear, infty=ideal plastic",NULL,thi->viscosity.glen_n,&thi->viscosity.glen_n,NULL));
585     PetscCall(PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL));
586     thi->friction.exponent = (m-1)/2;
587     PetscCall(PetscOptionsReal("-thi_erosion_rate","Rate of erosion relative to sliding velocity at reference velocity (m/a)",NULL,thi->erosion.rate,&thi->erosion.rate,NULL));
588     PetscCall(PetscOptionsReal("-thi_erosion_exponent","Power of sliding velocity appearing in erosion relation",NULL,thi->erosion.exponent,&thi->erosion.exponent,NULL));
589     PetscCall(PetscOptionsReal("-thi_erosion_refvel","Reference sliding velocity for erosion (m/a)",NULL,thi->erosion.refvel,&thi->erosion.refvel,NULL));
590     thi->erosion.rate   *= units->meter / units->year;
591     thi->erosion.refvel *= units->meter / units->year;
592     PetscCall(PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL));
593     PetscCall(PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL));
594     PetscCall(PetscOptionsReal("-thi_inertia","Coefficient of accelaration term in velocity system, physical is almost zero",NULL,thi->inertia,&thi->inertia,NULL));
595     PetscCall(PetscOptionsInt("-thi_nlevels","Number of levels of refinement","",thi->nlevels,&thi->nlevels,NULL));
596     PetscCall(PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL));
597     PetscCall(PetscStrallocpy(mtype,&thi->mattype));
598     PetscCall(PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL));
599     PetscCall(PetscOptionsString("-thi_monitor","Basename to write state files to",NULL,monitor_basename,monitor_basename,sizeof(monitor_basename),&flg));
600     if (flg) {
601       PetscCall(PetscStrallocpy(monitor_basename,&thi->monitor_basename));
602       thi->monitor_interval = 1;
603       PetscCall(PetscOptionsInt("-thi_monitor_interval","Frequency at which to write state files",NULL,thi->monitor_interval,&thi->monitor_interval,NULL));
604     }
605   }
606   PetscOptionsEnd();
607 
608   /* dimensionalize */
609   thi->Lx    *= units->meter;
610   thi->Ly    *= units->meter;
611   thi->Lz    *= units->meter;
612   thi->alpha *= PETSC_PI / 180;
613 
614   PRangeClear(&thi->eta);
615   PRangeClear(&thi->beta2);
616 
617   {
618     PetscReal u       = 1000*units->meter/(3e7*units->second),
619               gradu   = u / (100*units->meter),eta,deta,
620               rho     = 910 * units->kilogram/PetscPowRealInt(units->meter,3),
621               grav    = 9.81 * units->meter/PetscSqr(units->second),
622               driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter;
623     THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
624     thi->rhog = rho * grav;
625     if (thi->verbose) {
626       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal));
627       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving));
628       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu,2*eta*gradu/driving)));
629       THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
630       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu,2*eta*1e-3*gradu/driving)));
631     }
632   }
633 
634   *inthi = thi;
635   PetscFunctionReturn(0);
636 }
637 
638 /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
639  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
640  * the horizontal. */
641 static PetscErrorCode THIFixGhosts(THI thi,DM da3,DM da2,Vec X3,Vec X2)
642 {
643   DMDALocalInfo  info;
644   PrmNode        **x2;
645   PetscInt       i,j;
646 
647   PetscFunctionBeginUser;
648   PetscCall(DMDAGetLocalInfo(da3,&info));
649   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
650   PetscCall(DMDAVecGetArray(da2,X2,&x2));
651   for (i=info.gzs; i<info.gzs+info.gzm; i++) {
652     if (i > -1 && i < info.mz) continue;
653     for (j=info.gys; j<info.gys+info.gym; j++) {
654       x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i<0 ? 1.0 : -1.0);
655     }
656   }
657   PetscCall(DMDAVecRestoreArray(da2,X2,&x2));
658   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
659   PetscFunctionReturn(0);
660 }
661 
662 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,PrmNode **p)
663 {
664   PetscInt       i,j,xs,xm,ys,ym,mx,my;
665 
666   PetscFunctionBeginUser;
667   PetscCall(DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0));
668   PetscCall(DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0));
669   for (i=xs; i<xs+xm; i++) {
670     for (j=ys; j<ys+ym; j++) {
671       PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
672       thi->initialize(thi,xx,yy,&p[i][j]);
673     }
674   }
675   PetscFunctionReturn(0);
676 }
677 
678 static PetscErrorCode THIInitial(THI thi,DM pack,Vec X)
679 {
680   DM             da3,da2;
681   PetscInt       i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
682   PetscReal      hx,hy;
683   PrmNode        **prm;
684   Node           ***x;
685   Vec            X3g,X2g,X2;
686 
687   PetscFunctionBeginUser;
688   PetscCall(DMCompositeGetEntries(pack,&da3,&da2));
689   PetscCall(DMCompositeGetAccess(pack,X,&X3g,&X2g));
690   PetscCall(DMGetLocalVector(da2,&X2));
691 
692   PetscCall(DMDAGetInfo(da3,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0));
693   PetscCall(DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm));
694   PetscCall(DMDAVecGetArray(da3,X3g,&x));
695   PetscCall(DMDAVecGetArray(da2,X2,&prm));
696 
697   PetscCall(THIInitializePrm(thi,da2,prm));
698 
699   hx = thi->Lx / mx;
700   hy = thi->Ly / my;
701   for (i=xs; i<xs+xm; i++) {
702     for (j=ys; j<ys+ym; j++) {
703       for (k=zs; k<zs+zm; k++) {
704         const PetscScalar zm1      = zm-1,
705                           drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
706                           drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy);
707         x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
708         x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
709       }
710     }
711   }
712 
713   PetscCall(DMDAVecRestoreArray(da3,X3g,&x));
714   PetscCall(DMDAVecRestoreArray(da2,X2,&prm));
715 
716   PetscCall(DMLocalToGlobalBegin(da2,X2,INSERT_VALUES,X2g));
717   PetscCall(DMLocalToGlobalEnd  (da2,X2,INSERT_VALUES,X2g));
718   PetscCall(DMRestoreLocalVector(da2,&X2));
719 
720   PetscCall(DMCompositeRestoreAccess(pack,X,&X3g,&X2g));
721   PetscFunctionReturn(0);
722 }
723 
724 static void PointwiseNonlinearity(THI thi,const Node n[restrict 8],const PetscReal phi[restrict 3],PetscReal dphi[restrict 8][3],PetscScalar *restrict u,PetscScalar *restrict v,PetscScalar du[restrict 3],PetscScalar dv[restrict 3],PetscReal *eta,PetscReal *deta)
725 {
726   PetscInt    l,ll;
727   PetscScalar gam;
728 
729   du[0] = du[1] = du[2] = 0;
730   dv[0] = dv[1] = dv[2] = 0;
731   *u    = 0;
732   *v    = 0;
733   for (l=0; l<8; l++) {
734     *u += phi[l] * n[l].u;
735     *v += phi[l] * n[l].v;
736     for (ll=0; ll<3; ll++) {
737       du[ll] += dphi[l][ll] * n[l].u;
738       dv[ll] += dphi[l][ll] * n[l].v;
739     }
740   }
741   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]) + 0.25*Sqr(du[2]) + 0.25*Sqr(dv[2]);
742   THIViscosity(thi,PetscRealPart(gam),eta,deta);
743 }
744 
745 static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const Node ***xdot,Node ***f,THI thi)
746 {
747   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l;
748   PetscReal      hx,hy,etamin,etamax,beta2min,beta2max;
749 
750   PetscFunctionBeginUser;
751   xs = info->zs;
752   ys = info->ys;
753   xm = info->zm;
754   ym = info->ym;
755   zm = info->xm;
756   hx = thi->Lx / info->mz;
757   hy = thi->Ly / info->my;
758 
759   etamin   = 1e100;
760   etamax   = 0;
761   beta2min = 1e100;
762   beta2max = 0;
763 
764   for (i=xs; i<xs+xm; i++) {
765     for (j=ys; j<ys+ym; j++) {
766       PrmNode pn[4],dpn[4][2];
767       QuadExtract(prm,i,j,pn);
768       PetscCall(QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn));
769       for (k=0; k<zm-1; k++) {
770         PetscInt  ls = 0;
771         Node      n[8],ndot[8],*fn[8];
772         PetscReal zn[8],etabase = 0;
773 
774         PrmHexGetZ(pn,k,zm,zn);
775         HexExtract(x,i,j,k,n);
776         HexExtract(xdot,i,j,k,ndot);
777         HexExtractRef(f,i,j,k,fn);
778         if (thi->no_slip && k == 0) {
779           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
780           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
781           ls = 4;
782         }
783         for (q=0; q<8; q++) {
784           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
785           PetscScalar du[3],dv[3],u,v,udot=0,vdot=0;
786           for (l=ls; l<8; l++) {
787             udot += HexQInterp[q][l]*ndot[l].u;
788             vdot += HexQInterp[q][l]*ndot[l].v;
789           }
790           HexGrad(HexQDeriv[q],zn,dz);
791           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
792           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
793           jw /= thi->rhog;      /* scales residuals to be O(1) */
794           if (q == 0) etabase = eta;
795           RangeUpdate(&etamin,&etamax,eta);
796           for (l=ls; l<8; l++) { /* test functions */
797             const PetscScalar ds[2] = {dpn[q%4][0].h+dpn[q%4][0].b, dpn[q%4][1].h+dpn[q%4][1].b};
798             const PetscReal   pp    = phi[l],*dp = dphi[l];
799             fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
800             fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
801             fn[l]->u += pp*jw*udot*thi->inertia*pp;
802             fn[l]->v += pp*jw*vdot*thi->inertia*pp;
803           }
804         }
805         if (k == 0) { /* we are on a bottom face */
806           if (thi->no_slip) {
807             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
808             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
809             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
810             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
811             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
812             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
813             * assembled matrix (see the similar block in THIJacobianLocal).
814             *
815             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
816             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
817             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
818             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
819             */
820             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1.);
821             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
822             fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u;
823             fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v;
824           } else {              /* Integrate over bottom face to apply boundary condition */
825             for (q=0; q<4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
826               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
827               PetscScalar     u  =0,v=0,rbeta2=0;
828               PetscReal       beta2,dbeta2;
829               for (l=0; l<4; l++) {
830                 u     += phi[l]*n[l].u;
831                 v     += phi[l]*n[l].v;
832                 rbeta2 += phi[l]*pn[l].beta2;
833               }
834               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
835               RangeUpdate(&beta2min,&beta2max,beta2);
836               for (l=0; l<4; l++) {
837                 const PetscReal pp = phi[l];
838                 fn[ls+l]->u += pp*jw*beta2*u;
839                 fn[ls+l]->v += pp*jw*beta2*v;
840               }
841             }
842           }
843         }
844       }
845     }
846   }
847 
848   PetscCall(PRangeMinMax(&thi->eta,etamin,etamax));
849   PetscCall(PRangeMinMax(&thi->beta2,beta2min,beta2max));
850   PetscFunctionReturn(0);
851 }
852 
853 static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,const PrmNode **prmdot,PrmNode **f,THI thi)
854 {
855   PetscInt xs,ys,xm,ym,zm,i,j,k;
856 
857   PetscFunctionBeginUser;
858   xs = info->zs;
859   ys = info->ys;
860   xm = info->zm;
861   ym = info->ym;
862   zm = info->xm;
863 
864   for (i=xs; i<xs+xm; i++) {
865     for (j=ys; j<ys+ym; j++) {
866       PetscScalar div = 0,erate,h[8];
867       PrmNodeGetFaceMeasure(prm,i,j,h);
868       for (k=0; k<zm; k++) {
869         PetscScalar weight = (k==0 || k == zm-1) ? 0.5/(zm-1) : 1.0/(zm-1);
870         if (0) {                /* centered flux */
871           div += (- weight*h[0] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j-1][k].u,x[i][j-1][k].u)
872                   - weight*h[1] * StaggeredMidpoint2D(x[i][j][k].u,x[i-1][j][k].u, x[i-1][j+1][k].u,x[i][j+1][k].u)
873                   + weight*h[2] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j+1][k].u,x[i][j+1][k].u)
874                   + weight*h[3] * StaggeredMidpoint2D(x[i][j][k].u,x[i+1][j][k].u, x[i+1][j-1][k].u,x[i][j-1][k].u)
875                   - weight*h[4] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i+1][j-1][k].v,x[i+1][j][k].v)
876                   - weight*h[5] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j-1][k].v, x[i-1][j-1][k].v,x[i-1][j][k].v)
877                   + weight*h[6] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i-1][j+1][k].v,x[i-1][j][k].v)
878                   + weight*h[7] * StaggeredMidpoint2D(x[i][j][k].v,x[i][j+1][k].v, x[i+1][j+1][k].v,x[i+1][j][k].v));
879         } else {                /* Upwind flux */
880           div += weight*(-UpwindFluxXW(x,prm,h,i,j,k, 1)
881                          -UpwindFluxXW(x,prm,h,i,j,k,-1)
882                          +UpwindFluxXE(x,prm,h,i,j,k, 1)
883                          +UpwindFluxXE(x,prm,h,i,j,k,-1)
884                          -UpwindFluxYS(x,prm,h,i,j,k, 1)
885                          -UpwindFluxYS(x,prm,h,i,j,k,-1)
886                          +UpwindFluxYN(x,prm,h,i,j,k, 1)
887                          +UpwindFluxYN(x,prm,h,i,j,k,-1));
888         }
889       }
890       /* printf("div[%d][%d] %g\n",i,j,div); */
891       THIErosion(thi,&x[i][j][0],&erate,NULL);
892       f[i][j].b     = prmdot[i][j].b - erate;
893       f[i][j].h     = prmdot[i][j].h + div;
894       f[i][j].beta2 = prmdot[i][j].beta2;
895     }
896   }
897   PetscFunctionReturn(0);
898 }
899 
900 static PetscErrorCode THIFunction(TS ts,PetscReal t,Vec X,Vec Xdot,Vec F,void *ctx)
901 {
902   THI            thi = (THI)ctx;
903   DM             pack,da3,da2;
904   Vec            X3,X2,Xdot3,Xdot2,F3,F2,F3g,F2g;
905   const Node     ***x3,***xdot3;
906   const PrmNode  **x2,**xdot2;
907   Node           ***f3;
908   PrmNode        **f2;
909   DMDALocalInfo  info3;
910 
911   PetscFunctionBeginUser;
912   PetscCall(TSGetDM(ts,&pack));
913   PetscCall(DMCompositeGetEntries(pack,&da3,&da2));
914   PetscCall(DMDAGetLocalInfo(da3,&info3));
915   PetscCall(DMCompositeGetLocalVectors(pack,&X3,&X2));
916   PetscCall(DMCompositeGetLocalVectors(pack,&Xdot3,&Xdot2));
917   PetscCall(DMCompositeScatter(pack,X,X3,X2));
918   PetscCall(THIFixGhosts(thi,da3,da2,X3,X2));
919   PetscCall(DMCompositeScatter(pack,Xdot,Xdot3,Xdot2));
920 
921   PetscCall(DMGetLocalVector(da3,&F3));
922   PetscCall(DMGetLocalVector(da2,&F2));
923   PetscCall(VecZeroEntries(F3));
924 
925   PetscCall(DMDAVecGetArray(da3,X3,&x3));
926   PetscCall(DMDAVecGetArray(da2,X2,&x2));
927   PetscCall(DMDAVecGetArray(da3,Xdot3,&xdot3));
928   PetscCall(DMDAVecGetArray(da2,Xdot2,&xdot2));
929   PetscCall(DMDAVecGetArray(da3,F3,&f3));
930   PetscCall(DMDAVecGetArray(da2,F2,&f2));
931 
932   PetscCall(THIFunctionLocal_3D(&info3,x3,x2,xdot3,f3,thi));
933   PetscCall(THIFunctionLocal_2D(&info3,x3,x2,xdot2,f2,thi));
934 
935   PetscCall(DMDAVecRestoreArray(da3,X3,&x3));
936   PetscCall(DMDAVecRestoreArray(da2,X2,&x2));
937   PetscCall(DMDAVecRestoreArray(da3,Xdot3,&xdot3));
938   PetscCall(DMDAVecRestoreArray(da2,Xdot2,&xdot2));
939   PetscCall(DMDAVecRestoreArray(da3,F3,&f3));
940   PetscCall(DMDAVecRestoreArray(da2,F2,&f2));
941 
942   PetscCall(DMCompositeRestoreLocalVectors(pack,&X3,&X2));
943   PetscCall(DMCompositeRestoreLocalVectors(pack,&Xdot3,&Xdot2));
944 
945   PetscCall(VecZeroEntries(F));
946   PetscCall(DMCompositeGetAccess(pack,F,&F3g,&F2g));
947   PetscCall(DMLocalToGlobalBegin(da3,F3,ADD_VALUES,F3g));
948   PetscCall(DMLocalToGlobalEnd  (da3,F3,ADD_VALUES,F3g));
949   PetscCall(DMLocalToGlobalBegin(da2,F2,INSERT_VALUES,F2g));
950   PetscCall(DMLocalToGlobalEnd  (da2,F2,INSERT_VALUES,F2g));
951 
952   if (thi->verbose) {
953     PetscViewer viewer;
954     PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi),&viewer));
955     PetscCall(PetscViewerASCIIPrintf(viewer,"3D_Velocity residual (bs=2):\n"));
956     PetscCall(PetscViewerASCIIPushTab(viewer));
957     PetscCall(VecView(F3,viewer));
958     PetscCall(PetscViewerASCIIPopTab(viewer));
959     PetscCall(PetscViewerASCIIPrintf(viewer,"2D_Fields residual (bs=3):\n"));
960     PetscCall(PetscViewerASCIIPushTab(viewer));
961     PetscCall(VecView(F2,viewer));
962     PetscCall(PetscViewerASCIIPopTab(viewer));
963   }
964 
965   PetscCall(DMCompositeRestoreAccess(pack,F,&F3g,&F2g));
966 
967   PetscCall(DMRestoreLocalVector(da3,&F3));
968   PetscCall(DMRestoreLocalVector(da2,&F2));
969   PetscFunctionReturn(0);
970 }
971 
972 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
973 {
974   PetscReal      nrm;
975   PetscInt       m;
976   PetscMPIInt    rank;
977 
978   PetscFunctionBeginUser;
979   PetscCall(MatNorm(B,NORM_FROBENIUS,&nrm));
980   PetscCall(MatGetSize(B,&m,0));
981   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank));
982   if (rank == 0) {
983     PetscScalar val0,val2;
984     PetscCall(MatGetValue(B,0,0,&val0));
985     PetscCall(MatGetValue(B,2,2,&val2));
986     PetscCall(PetscViewerASCIIPrintf(viewer,"Matrix dim %8" PetscInt_FMT "  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax));
987   }
988   PetscFunctionReturn(0);
989 }
990 
991 static PetscErrorCode THISurfaceStatistics(DM pack,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
992 {
993   DM             da3,da2;
994   Vec            X3,X2;
995   Node           ***x;
996   PetscInt       i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
997   PetscReal      umin = 1e100,umax=-1e100;
998   PetscScalar    usum =0.0,gusum;
999 
1000   PetscFunctionBeginUser;
1001   PetscCall(DMCompositeGetEntries(pack,&da3,&da2));
1002   PetscCall(DMCompositeGetAccess(pack,X,&X3,&X2));
1003   *min = *max = *mean = 0;
1004   PetscCall(DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0));
1005   PetscCall(DMDAGetCorners(da3,&zs,&ys,&xs,&zm,&ym,&xm));
1006   PetscCheck(zs == 0 && zm == mz,PETSC_COMM_WORLD,PETSC_ERR_PLIB,"Unexpected decomposition");
1007   PetscCall(DMDAVecGetArray(da3,X3,&x));
1008   for (i=xs; i<xs+xm; i++) {
1009     for (j=ys; j<ys+ym; j++) {
1010       PetscReal u = PetscRealPart(x[i][j][zm-1].u);
1011       RangeUpdate(&umin,&umax,u);
1012       usum += u;
1013     }
1014   }
1015   PetscCall(DMDAVecRestoreArray(da3,X3,&x));
1016   PetscCall(DMCompositeRestoreAccess(pack,X,&X3,&X2));
1017 
1018   PetscCallMPI(MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da3)));
1019   PetscCallMPI(MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da3)));
1020   PetscCallMPI(MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da3)));
1021   *mean = PetscRealPart(gusum) / (mx*my);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 static PetscErrorCode THISolveStatistics(THI thi,TS ts,PetscInt coarsened,const char name[])
1026 {
1027   MPI_Comm       comm;
1028   DM             pack;
1029   Vec            X,X3,X2;
1030 
1031   PetscFunctionBeginUser;
1032   PetscCall(PetscObjectGetComm((PetscObject)thi,&comm));
1033   PetscCall(TSGetDM(ts,&pack));
1034   PetscCall(TSGetSolution(ts,&X));
1035   PetscCall(DMCompositeGetAccess(pack,X,&X3,&X2));
1036   PetscCall(PetscPrintf(comm,"Solution statistics after solve: %s\n",name));
1037   {
1038     PetscInt            its,lits;
1039     SNESConvergedReason reason;
1040     SNES                snes;
1041     PetscCall(TSGetSNES(ts,&snes));
1042     PetscCall(SNESGetIterationNumber(snes,&its));
1043     PetscCall(SNESGetConvergedReason(snes,&reason));
1044     PetscCall(SNESGetLinearSolveIterations(snes,&lits));
1045     PetscCall(PetscPrintf(comm,"%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n",SNESConvergedReasons[reason],its,lits));
1046   }
1047   {
1048     PetscReal   nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3];
1049     PetscInt    i,j,m;
1050     PetscScalar *x;
1051     PetscCall(VecNorm(X3,NORM_2,&nrm2));
1052     PetscCall(VecGetLocalSize(X3,&m));
1053     PetscCall(VecGetArray(X3,&x));
1054     for (i=0; i<m; i+=2) {
1055       PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v);
1056       tmin[0] = PetscMin(u,tmin[0]);
1057       tmin[1] = PetscMin(v,tmin[1]);
1058       tmin[2] = PetscMin(c,tmin[2]);
1059       tmax[0] = PetscMax(u,tmax[0]);
1060       tmax[1] = PetscMax(v,tmax[1]);
1061       tmax[2] = PetscMax(c,tmax[2]);
1062     }
1063     PetscCall(VecRestoreArray(X,&x));
1064     PetscCallMPI(MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi)));
1065     PetscCallMPI(MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi)));
1066     /* Dimensionalize to meters/year */
1067     nrm2 *= thi->units->year / thi->units->meter;
1068     for (j=0; j<3; j++) {
1069       min[j] *= thi->units->year / thi->units->meter;
1070       max[j] *= thi->units->year / thi->units->meter;
1071     }
1072     PetscCall(PetscPrintf(comm,"|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2]));
1073     {
1074       PetscReal umin,umax,umean;
1075       PetscCall(THISurfaceStatistics(pack,X,&umin,&umax,&umean));
1076       umin  *= thi->units->year / thi->units->meter;
1077       umax  *= thi->units->year / thi->units->meter;
1078       umean *= thi->units->year / thi->units->meter;
1079       PetscCall(PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean));
1080     }
1081     /* These values stay nondimensional */
1082     PetscCall(PetscPrintf(comm,"Global eta range   [%g, %g], converged range [%g, %g]\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax));
1083     PetscCall(PetscPrintf(comm,"Global beta2 range [%g, %g], converged range [%g, %g]\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax));
1084   }
1085   PetscCall(PetscPrintf(comm,"\n"));
1086   PetscCall(DMCompositeRestoreAccess(pack,X,&X3,&X2));
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info,PetscInt i,PetscInt j,PetscInt k)
1091 {return ((i-info->gzs)*info->gym + (j-info->gys))*info->gxm + (k-info->gxs);}
1092 static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info,PetscInt i,PetscInt j)
1093 {return (i-info->gzs)*info->gym + (j-info->gys);}
1094 
1095 static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info,const Node ***x,const PrmNode **prm,Mat B,Mat Bcpl,THI thi)
1096 {
1097   PetscInt       xs,ys,xm,ym,zm,i,j,k,q,l,ll;
1098   PetscReal      hx,hy;
1099 
1100   PetscFunctionBeginUser;
1101   xs = info->zs;
1102   ys = info->ys;
1103   xm = info->zm;
1104   ym = info->ym;
1105   zm = info->xm;
1106   hx = thi->Lx / info->mz;
1107   hy = thi->Ly / info->my;
1108 
1109   for (i=xs; i<xs+xm; i++) {
1110     for (j=ys; j<ys+ym; j++) {
1111       PrmNode pn[4],dpn[4][2];
1112       QuadExtract(prm,i,j,pn);
1113       PetscCall(QuadComputeGrad4(QuadQDeriv,hx,hy,pn,dpn));
1114       for (k=0; k<zm-1; k++) {
1115         Node        n[8];
1116         PetscReal   zn[8],etabase = 0;
1117         PetscScalar Ke[8*NODE_SIZE][8*NODE_SIZE],Kcpl[8*NODE_SIZE][4*PRMNODE_SIZE];
1118         PetscInt    ls = 0;
1119 
1120         PrmHexGetZ(pn,k,zm,zn);
1121         HexExtract(x,i,j,k,n);
1122         PetscCall(PetscMemzero(Ke,sizeof(Ke)));
1123         PetscCall(PetscMemzero(Kcpl,sizeof(Kcpl)));
1124         if (thi->no_slip && k == 0) {
1125           for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1126           ls = 4;
1127         }
1128         for (q=0; q<8; q++) {
1129           PetscReal   dz[3],phi[8],dphi[8][3],jw,eta,deta;
1130           PetscScalar du[3],dv[3],u,v;
1131           HexGrad(HexQDeriv[q],zn,dz);
1132           HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1133           PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1134           jw /= thi->rhog;      /* residuals are scaled by this factor */
1135           if (q == 0) etabase = eta;
1136           for (l=ls; l<8; l++) { /* test functions */
1137             const PetscReal pp=phi[l],*restrict dp = dphi[l];
1138             for (ll=ls; ll<8; ll++) { /* trial functions */
1139               const PetscReal *restrict dpl = dphi[ll];
1140               PetscScalar dgdu,dgdv;
1141               dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1142               dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1143               /* Picard part */
1144               Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1145               Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1146               Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1147               Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1148               /* extra Newton terms */
1149               Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1150               Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1151               Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1152               Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1153               /* inertial part */
1154               Ke[l*2+0][ll*2+0] += pp*jw*thi->inertia*pp;
1155               Ke[l*2+1][ll*2+1] += pp*jw*thi->inertia*pp;
1156             }
1157             for (ll=0; ll<4; ll++) { /* Trial functions for surface/bed */
1158               const PetscReal dpl[] = {QuadQDeriv[q%4][ll][0]/hx, QuadQDeriv[q%4][ll][1]/hy}; /* surface = h + b */
1159               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[0];
1160               Kcpl[FieldIndex(Node,l,u)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[0];
1161               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,h)] += pp*jw*thi->rhog*dpl[1];
1162               Kcpl[FieldIndex(Node,l,v)][FieldIndex(PrmNode,ll,b)] += pp*jw*thi->rhog*dpl[1];
1163             }
1164           }
1165         }
1166         if (k == 0) { /* on a bottom face */
1167           if (thi->no_slip) {
1168             const PetscReal   hz    = PetscRealPart(pn[0].h)/(zm-1);
1169             const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1170             Ke[0][0] = thi->dirichlet_scale*diagu;
1171             Ke[0][1] = 0;
1172             Ke[1][0] = 0;
1173             Ke[1][1] = thi->dirichlet_scale*diagv;
1174           } else {
1175             for (q=0; q<4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1176               const PetscReal jw = 0.25*hx*hy,*phi = QuadQInterp[q];
1177               PetscScalar     u  =0,v=0,rbeta2=0;
1178               PetscReal       beta2,dbeta2;
1179               for (l=0; l<4; l++) {
1180                 u      += phi[l]*n[l].u;
1181                 v      += phi[l]*n[l].v;
1182                 rbeta2 += phi[l]*pn[l].beta2;
1183               }
1184               THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1185               for (l=0; l<4; l++) {
1186                 const PetscReal pp = phi[l];
1187                 for (ll=0; ll<4; ll++) {
1188                   const PetscReal ppl = phi[ll];
1189                   Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1190                   Ke[l*2+0][ll*2+1] +=                   pp*jw*dbeta2*u*v*ppl;
1191                   Ke[l*2+1][ll*2+0] +=                   pp*jw*dbeta2*v*u*ppl;
1192                   Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1193                 }
1194               }
1195             }
1196           }
1197         }
1198         {
1199           const PetscInt rc3blocked[8] = {
1200             DMDALocalIndex3D(info,i+0,j+0,k+0),
1201             DMDALocalIndex3D(info,i+1,j+0,k+0),
1202             DMDALocalIndex3D(info,i+1,j+1,k+0),
1203             DMDALocalIndex3D(info,i+0,j+1,k+0),
1204             DMDALocalIndex3D(info,i+0,j+0,k+1),
1205             DMDALocalIndex3D(info,i+1,j+0,k+1),
1206             DMDALocalIndex3D(info,i+1,j+1,k+1),
1207             DMDALocalIndex3D(info,i+0,j+1,k+1)
1208           },col2blocked[PRMNODE_SIZE*4] = {
1209             DMDALocalIndex2D(info,i+0,j+0),
1210             DMDALocalIndex2D(info,i+1,j+0),
1211             DMDALocalIndex2D(info,i+1,j+1),
1212             DMDALocalIndex2D(info,i+0,j+1)
1213           };
1214 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1215           for (l=0; l<8; l++) {
1216             for (ll=l+1; ll<8; ll++) {
1217               Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1218               Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1219               Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1220               Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1221             }
1222           }
1223 #endif
1224           PetscCall(MatSetValuesBlockedLocal(B,8,rc3blocked,8,rc3blocked,&Ke[0][0],ADD_VALUES)); /* velocity-velocity coupling can use blocked insertion */
1225           {                     /* The off-diagonal part cannot (yet) */
1226             PetscInt row3scalar[NODE_SIZE*8],col2scalar[PRMNODE_SIZE*4];
1227             for (l=0; l<8; l++) for (ll=0; ll<NODE_SIZE; ll++) row3scalar[l*NODE_SIZE+ll] = rc3blocked[l]*NODE_SIZE+ll;
1228             for (l=0; l<4; l++) for (ll=0; ll<PRMNODE_SIZE; ll++) col2scalar[l*PRMNODE_SIZE+ll] = col2blocked[l]*PRMNODE_SIZE+ll;
1229             PetscCall(MatSetValuesLocal(Bcpl,8*NODE_SIZE,row3scalar,4*PRMNODE_SIZE,col2scalar,&Kcpl[0][0],ADD_VALUES));
1230           }
1231         }
1232       }
1233     }
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,const Node ***x3,const PrmNode **x2,const PrmNode **xdot2,PetscReal a,Mat B22,Mat B21,THI thi)
1239 {
1240   PetscInt       xs,ys,xm,ym,zm,i,j,k;
1241 
1242   PetscFunctionBeginUser;
1243   xs = info->zs;
1244   ys = info->ys;
1245   xm = info->zm;
1246   ym = info->ym;
1247   zm = info->xm;
1248 
1249   PetscCheck(zm <= 1024,((PetscObject)info->da)->comm,PETSC_ERR_SUP,"Need to allocate more space");
1250   for (i=xs; i<xs+xm; i++) {
1251     for (j=ys; j<ys+ym; j++) {
1252       {                         /* Self-coupling */
1253         const PetscInt    row[]  = {DMDALocalIndex2D(info,i,j)};
1254         const PetscInt    col[]  = {DMDALocalIndex2D(info,i,j)};
1255         const PetscScalar vals[] = {
1256           a,0,0,
1257           0,a,0,
1258           0,0,a
1259         };
1260         PetscCall(MatSetValuesBlockedLocal(B22,1,row,1,col,vals,INSERT_VALUES));
1261       }
1262       for (k=0; k<zm; k++) {    /* Coupling to velocity problem */
1263         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1264         const PetscInt row[]  = {FieldIndex(PrmNode,DMDALocalIndex2D(info,i,j),h)};
1265         const PetscInt cols[] = {
1266           FieldIndex(Node,DMDALocalIndex3D(info,i-1,j,k),u),
1267           FieldIndex(Node,DMDALocalIndex3D(info,i  ,j,k),u),
1268           FieldIndex(Node,DMDALocalIndex3D(info,i+1,j,k),u),
1269           FieldIndex(Node,DMDALocalIndex3D(info,i,j-1,k),v),
1270           FieldIndex(Node,DMDALocalIndex3D(info,i,j  ,k),v),
1271           FieldIndex(Node,DMDALocalIndex3D(info,i,j+1,k),v)
1272         };
1273         const PetscScalar
1274           w  = (k && k<zm-1) ? 0.5 : 0.25,
1275           hW = w*(x2[i-1][j  ].h+x2[i  ][j  ].h)/(zm-1.),
1276           hE = w*(x2[i  ][j  ].h+x2[i+1][j  ].h)/(zm-1.),
1277           hS = w*(x2[i  ][j-1].h+x2[i  ][j  ].h)/(zm-1.),
1278           hN = w*(x2[i  ][j  ].h+x2[i  ][j+1].h)/(zm-1.);
1279         PetscScalar *vals,
1280                      vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0),
1281                                       ((PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW),
1282                                       ((PetscRealPart(x3[i][j][k].u) > 0) ?  0  : +hE),
1283                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0),
1284                                       ((PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS),
1285                                       ((PetscRealPart(x3[i][j][k].v) > 0) ?  0  : +hN)},
1286                      vals_centered[] = {-0.5*hW, 0.5*(-hW+hE), 0.5*hE,
1287                                         -0.5*hS, 0.5*(-hS+hN), 0.5*hN};
1288         vals = 1 ? vals_upwind : vals_centered;
1289         if (k == 0) {
1290           Node derate;
1291           THIErosion(thi,&x3[i][j][0],NULL,&derate);
1292           vals[1] -= derate.u;
1293           vals[4] -= derate.v;
1294         }
1295         PetscCall(MatSetValuesLocal(B21,1,row,6,cols,vals,INSERT_VALUES));
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 static PetscErrorCode THIJacobian(TS ts,PetscReal t,Vec X,Vec Xdot,PetscReal a,Mat A,Mat B,void *ctx)
1303 {
1304   THI            thi = (THI)ctx;
1305   DM             pack,da3,da2;
1306   Vec            X3,X2,Xdot2;
1307   Mat            B11,B12,B21,B22;
1308   DMDALocalInfo  info3;
1309   IS             *isloc;
1310   const Node     ***x3;
1311   const PrmNode  **x2,**xdot2;
1312 
1313   PetscFunctionBeginUser;
1314   PetscCall(TSGetDM(ts,&pack));
1315   PetscCall(DMCompositeGetEntries(pack,&da3,&da2));
1316   PetscCall(DMDAGetLocalInfo(da3,&info3));
1317   PetscCall(DMCompositeGetLocalVectors(pack,&X3,&X2));
1318   PetscCall(DMCompositeGetLocalVectors(pack,NULL,&Xdot2));
1319   PetscCall(DMCompositeScatter(pack,X,X3,X2));
1320   PetscCall(THIFixGhosts(thi,da3,da2,X3,X2));
1321   PetscCall(DMCompositeScatter(pack,Xdot,NULL,Xdot2));
1322 
1323   PetscCall(MatZeroEntries(B));
1324 
1325   PetscCall(DMCompositeGetLocalISs(pack,&isloc));
1326   PetscCall(MatGetLocalSubMatrix(B,isloc[0],isloc[0],&B11));
1327   PetscCall(MatGetLocalSubMatrix(B,isloc[0],isloc[1],&B12));
1328   PetscCall(MatGetLocalSubMatrix(B,isloc[1],isloc[0],&B21));
1329   PetscCall(MatGetLocalSubMatrix(B,isloc[1],isloc[1],&B22));
1330 
1331   PetscCall(DMDAVecGetArray(da3,X3,&x3));
1332   PetscCall(DMDAVecGetArray(da2,X2,&x2));
1333   PetscCall(DMDAVecGetArray(da2,Xdot2,&xdot2));
1334 
1335   PetscCall(THIJacobianLocal_Momentum(&info3,x3,x2,B11,B12,thi));
1336 
1337   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1338   PetscCall(MatAssemblyBegin(B,MAT_FLUSH_ASSEMBLY));
1339   PetscCall(MatAssemblyEnd(B,MAT_FLUSH_ASSEMBLY));
1340 
1341   PetscCall(THIJacobianLocal_2D(&info3,x3,x2,xdot2,a,B22,B21,thi));
1342 
1343   PetscCall(DMDAVecRestoreArray(da3,X3,&x3));
1344   PetscCall(DMDAVecRestoreArray(da2,X2,&x2));
1345   PetscCall(DMDAVecRestoreArray(da2,Xdot2,&xdot2));
1346 
1347   PetscCall(MatRestoreLocalSubMatrix(B,isloc[0],isloc[0],&B11));
1348   PetscCall(MatRestoreLocalSubMatrix(B,isloc[0],isloc[1],&B12));
1349   PetscCall(MatRestoreLocalSubMatrix(B,isloc[1],isloc[0],&B21));
1350   PetscCall(MatRestoreLocalSubMatrix(B,isloc[1],isloc[1],&B22));
1351   PetscCall(ISDestroy(&isloc[0]));
1352   PetscCall(ISDestroy(&isloc[1]));
1353   PetscCall(PetscFree(isloc));
1354 
1355   PetscCall(DMCompositeRestoreLocalVectors(pack,&X3,&X2));
1356   PetscCall(DMCompositeRestoreLocalVectors(pack,0,&Xdot2));
1357 
1358   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
1359   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
1360   if (A != B) {
1361     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
1362     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
1363   }
1364   if (thi->verbose) PetscCall(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD));
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1369  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1370  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1371  */
1372 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM pack,Vec X,const char filename[],const char filename2[])
1373 {
1374   const PetscInt dof   = NODE_SIZE,dof2 = PRMNODE_SIZE;
1375   Units          units = thi->units;
1376   MPI_Comm       comm;
1377   PetscViewer    viewer3,viewer2;
1378   PetscMPIInt    rank,size,tag,nn,nmax,nn2,nmax2;
1379   PetscInt       mx,my,mz,r,range[6];
1380   PetscScalar    *x,*x2;
1381   DM             da3,da2;
1382   Vec            X3,X2;
1383 
1384   PetscFunctionBeginUser;
1385   PetscCall(PetscObjectGetComm((PetscObject)thi,&comm));
1386   PetscCall(DMCompositeGetEntries(pack,&da3,&da2));
1387   PetscCall(DMCompositeGetAccess(pack,X,&X3,&X2));
1388   PetscCall(DMDAGetInfo(da3,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0));
1389   PetscCallMPI(MPI_Comm_size(comm,&size));
1390   PetscCallMPI(MPI_Comm_rank(comm,&rank));
1391   PetscCall(PetscViewerASCIIOpen(comm,filename,&viewer3));
1392   PetscCall(PetscViewerASCIIOpen(comm,filename2,&viewer2));
1393   PetscCall(PetscViewerASCIIPrintf(viewer3,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1394   PetscCall(PetscViewerASCIIPrintf(viewer2,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1395   PetscCall(PetscViewerASCIIPrintf(viewer3,"  <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n",0,mz-1,0,my-1,0,mx-1));
1396   PetscCall(PetscViewerASCIIPrintf(viewer2,"  <StructuredGrid WholeExtent=\"%d %d %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n",0,0,0,my-1,0,mx-1));
1397 
1398   PetscCall(DMDAGetCorners(da3,range,range+1,range+2,range+3,range+4,range+5));
1399   PetscCall(PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn));
1400   PetscCallMPI(MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm));
1401   PetscCall(PetscMPIIntCast(range[4]*range[5]*dof2,&nn2));
1402   PetscCallMPI(MPI_Reduce(&nn2,&nmax2,1,MPI_INT,MPI_MAX,0,comm));
1403   tag  = ((PetscObject)viewer3)->tag;
1404   PetscCall(VecGetArrayRead(X3,(const PetscScalar**)&x));
1405   PetscCall(VecGetArrayRead(X2,(const PetscScalar**)&x2));
1406   if (rank == 0) {
1407     PetscScalar *array,*array2;
1408     PetscCall(PetscMalloc2(nmax,&array,nmax2,&array2));
1409     for (r=0; r<size; r++) {
1410       PetscInt    i,j,k,f,xs,xm,ys,ym,zs,zm;
1411       Node        *y3;
1412       PetscScalar (*y2)[PRMNODE_SIZE];
1413       MPI_Status status;
1414 
1415       if (r) {
1416         PetscCallMPI(MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE));
1417       }
1418       zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1419       PetscCheck(xm*ym*zm*dof <= nmax,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen");
1420       if (r) {
1421         PetscCallMPI(MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status));
1422         PetscCallMPI(MPI_Get_count(&status,MPIU_SCALAR,&nn));
1423         PetscCheck(nn == xm*ym*zm*dof,PETSC_COMM_SELF,PETSC_ERR_PLIB,"corrupt da3 send");
1424         y3   = (Node*)array;
1425         PetscCallMPI(MPI_Recv(array2,nmax2,MPIU_SCALAR,r,tag,comm,&status));
1426         PetscCallMPI(MPI_Get_count(&status,MPIU_SCALAR,&nn2));
1427         PetscCheck(nn2 == xm*ym*dof2,PETSC_COMM_SELF,PETSC_ERR_PLIB,"corrupt da2 send");
1428         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1429       } else {
1430         y3 = (Node*)x;
1431         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1432       }
1433       PetscCall(PetscViewerASCIIPrintf(viewer3,"    <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1));
1434       PetscCall(PetscViewerASCIIPrintf(viewer2,"    <Piece Extent=\"%d %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n",0,0,ys,ys+ym-1,xs,xs+xm-1));
1435 
1436       PetscCall(PetscViewerASCIIPrintf(viewer3,"      <Points>\n"));
1437       PetscCall(PetscViewerASCIIPrintf(viewer2,"      <Points>\n"));
1438       PetscCall(PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1439       PetscCall(PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1440       for (i=xs; i<xs+xm; i++) {
1441         for (j=ys; j<ys+ym; j++) {
1442           PetscReal
1443             xx = thi->Lx*i/mx,
1444             yy = thi->Ly*j/my,
1445             b  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,b)]),
1446             h  = PetscRealPart(y2[i*ym+j][FieldOffset(PrmNode,h)]);
1447           for (k=zs; k<zs+zm; k++) {
1448             PetscReal zz = b + h*k/(mz-1.);
1449             PetscCall(PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",(double)xx,(double)yy,(double)zz));
1450           }
1451           PetscCall(PetscViewerASCIIPrintf(viewer2,"%f %f %f\n",(double)xx,(double)yy,(double)0.0));
1452         }
1453       }
1454       PetscCall(PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n"));
1455       PetscCall(PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n"));
1456       PetscCall(PetscViewerASCIIPrintf(viewer3,"      </Points>\n"));
1457       PetscCall(PetscViewerASCIIPrintf(viewer2,"      </Points>\n"));
1458 
1459       {                         /* Velocity and rank (3D) */
1460         PetscCall(PetscViewerASCIIPrintf(viewer3,"      <PointData>\n"));
1461         PetscCall(PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1462         for (i=0; i<nn/dof; i++) {
1463           PetscCall(PetscViewerASCIIPrintf(viewer3,"%f %f %f\n",(double)(PetscRealPart(y3[i].u)*units->year/units->meter),(double)(PetscRealPart(y3[i].v)*units->year/units->meter),0.0));
1464         }
1465         PetscCall(PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n"));
1466 
1467         PetscCall(PetscViewerASCIIPrintf(viewer3,"        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1468         for (i=0; i<nn; i+=dof) {
1469           PetscCall(PetscViewerASCIIPrintf(viewer3,"%" PetscInt_FMT "\n",r));
1470         }
1471         PetscCall(PetscViewerASCIIPrintf(viewer3,"        </DataArray>\n"));
1472         PetscCall(PetscViewerASCIIPrintf(viewer3,"      </PointData>\n"));
1473       }
1474 
1475       {                         /* 2D */
1476         PetscCall(PetscViewerASCIIPrintf(viewer2,"      <PointData>\n"));
1477         for (f=0; f<PRMNODE_SIZE; f++) {
1478           const char *fieldname;
1479           PetscCall(DMDAGetFieldName(da2,f,&fieldname));
1480           PetscCall(PetscViewerASCIIPrintf(viewer2,"        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n",fieldname));
1481           for (i=0; i<nn2/PRMNODE_SIZE; i++) {
1482             PetscCall(PetscViewerASCIIPrintf(viewer2,"%g\n",(double)y2[i][f]));
1483           }
1484           PetscCall(PetscViewerASCIIPrintf(viewer2,"        </DataArray>\n"));
1485         }
1486         PetscCall(PetscViewerASCIIPrintf(viewer2,"      </PointData>\n"));
1487       }
1488 
1489       PetscCall(PetscViewerASCIIPrintf(viewer3,"    </Piece>\n"));
1490       PetscCall(PetscViewerASCIIPrintf(viewer2,"    </Piece>\n"));
1491     }
1492     PetscCall(PetscFree2(array,array2));
1493   } else {
1494     PetscCallMPI(MPI_Send(range,6,MPIU_INT,0,tag,comm));
1495     PetscCallMPI(MPI_Send(x,nn,MPIU_SCALAR,0,tag,comm));
1496     PetscCallMPI(MPI_Send(x2,nn2,MPIU_SCALAR,0,tag,comm));
1497   }
1498   PetscCall(VecRestoreArrayRead(X3,(const PetscScalar**)&x));
1499   PetscCall(VecRestoreArrayRead(X2,(const PetscScalar**)&x2));
1500   PetscCall(PetscViewerASCIIPrintf(viewer3,"  </StructuredGrid>\n"));
1501   PetscCall(PetscViewerASCIIPrintf(viewer2,"  </StructuredGrid>\n"));
1502 
1503   PetscCall(DMCompositeRestoreAccess(pack,X,&X3,&X2));
1504   PetscCall(PetscViewerASCIIPrintf(viewer3,"</VTKFile>\n"));
1505   PetscCall(PetscViewerASCIIPrintf(viewer2,"</VTKFile>\n"));
1506   PetscCall(PetscViewerDestroy(&viewer3));
1507   PetscCall(PetscViewerDestroy(&viewer2));
1508   PetscFunctionReturn(0);
1509 }
1510 
1511 static PetscErrorCode THITSMonitor(TS ts,PetscInt step,PetscReal t,Vec X,void *ctx)
1512 {
1513   THI            thi = (THI)ctx;
1514   DM             pack;
1515   char           filename3[PETSC_MAX_PATH_LEN],filename2[PETSC_MAX_PATH_LEN];
1516 
1517   PetscFunctionBeginUser;
1518   if (step < 0) PetscFunctionReturn(0); /* negative one is used to indicate an interpolated solution */
1519   PetscCall(PetscPrintf(PetscObjectComm((PetscObject)ts),"%3" PetscInt_FMT ": t=%g\n",step,(double)t));
1520   if (thi->monitor_interval && step % thi->monitor_interval) PetscFunctionReturn(0);
1521   PetscCall(TSGetDM(ts,&pack));
1522   PetscCall(PetscSNPrintf(filename3,sizeof(filename3),"%s-3d-%03" PetscInt_FMT ".vts",thi->monitor_basename,step));
1523   PetscCall(PetscSNPrintf(filename2,sizeof(filename2),"%s-2d-%03" PetscInt_FMT ".vts",thi->monitor_basename,step));
1524   PetscCall(THIDAVecView_VTK_XML(thi,pack,X,filename3,filename2));
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 static PetscErrorCode THICreateDM3d(THI thi,DM *dm3d)
1529 {
1530   MPI_Comm       comm;
1531   PetscInt       M    = 3,N = 3,P = 2;
1532   DM             da;
1533 
1534   PetscFunctionBeginUser;
1535   PetscCall(PetscObjectGetComm((PetscObject)thi,&comm));
1536   PetscOptionsBegin(comm,NULL,"Grid resolution options","");
1537   {
1538     PetscCall(PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL));
1539     N    = M;
1540     PetscCall(PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL));
1541     PetscCall(PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL));
1542   }
1543   PetscOptionsEnd();
1544   PetscCall(DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da));
1545   PetscCall(DMSetFromOptions(da));
1546   PetscCall(DMSetUp(da));
1547   PetscCall(DMDASetFieldName(da,0,"x-velocity"));
1548   PetscCall(DMDASetFieldName(da,1,"y-velocity"));
1549   *dm3d = da;
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 int main(int argc,char *argv[])
1554 {
1555   MPI_Comm       comm;
1556   DM             pack,da3,da2;
1557   TS             ts;
1558   THI            thi;
1559   Vec            X;
1560   Mat            B;
1561   PetscInt       i,steps;
1562   PetscReal      ftime;
1563 
1564   PetscCall(PetscInitialize(&argc,&argv,0,help));
1565   comm = PETSC_COMM_WORLD;
1566 
1567   PetscCall(THICreate(comm,&thi));
1568   PetscCall(THICreateDM3d(thi,&da3));
1569   {
1570     PetscInt        Mx,My,mx,my,s;
1571     DMDAStencilType st;
1572     PetscCall(DMDAGetInfo(da3,0, 0,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st));
1573     PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2));
1574     PetscCall(DMSetUp(da2));
1575   }
1576 
1577   PetscCall(PetscObjectSetName((PetscObject)da3,"3D_Velocity"));
1578   PetscCall(DMSetOptionsPrefix(da3,"f3d_"));
1579   PetscCall(DMDASetFieldName(da3,0,"u"));
1580   PetscCall(DMDASetFieldName(da3,1,"v"));
1581   PetscCall(PetscObjectSetName((PetscObject)da2,"2D_Fields"));
1582   PetscCall(DMSetOptionsPrefix(da2,"f2d_"));
1583   PetscCall(DMDASetFieldName(da2,0,"b"));
1584   PetscCall(DMDASetFieldName(da2,1,"h"));
1585   PetscCall(DMDASetFieldName(da2,2,"beta2"));
1586   PetscCall(DMCompositeCreate(comm,&pack));
1587   PetscCall(DMCompositeAddDM(pack,da3));
1588   PetscCall(DMCompositeAddDM(pack,da2));
1589   PetscCall(DMDestroy(&da3));
1590   PetscCall(DMDestroy(&da2));
1591   PetscCall(DMSetUp(pack));
1592   PetscCall(DMCreateMatrix(pack,&B));
1593   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
1594   PetscCall(MatSetOptionsPrefix(B,"thi_"));
1595 
1596   for (i=0; i<thi->nlevels; i++) {
1597     PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
1598     PetscInt  Mx,My,Mz;
1599     PetscCall(DMCompositeGetEntries(pack,&da3,&da2));
1600     PetscCall(DMDAGetInfo(da3,0, &Mz,&My,&Mx, 0,0,0, 0,0,0,0,0,0));
1601     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n",i,Lx,Ly,Lz,Mx,My,Mz,Mx*My*Mz,Lx/Mx,Ly/My,1000./(Mz-1)));
1602   }
1603 
1604   PetscCall(DMCreateGlobalVector(pack,&X));
1605   PetscCall(THIInitial(thi,pack,X));
1606 
1607   PetscCall(TSCreate(comm,&ts));
1608   PetscCall(TSSetDM(ts,pack));
1609   PetscCall(TSSetProblemType(ts,TS_NONLINEAR));
1610   PetscCall(TSMonitorSet(ts,THITSMonitor,thi,NULL));
1611   PetscCall(TSSetType(ts,TSTHETA));
1612   PetscCall(TSSetIFunction(ts,NULL,THIFunction,thi));
1613   PetscCall(TSSetIJacobian(ts,B,B,THIJacobian,thi));
1614   PetscCall(TSSetMaxTime(ts,10.0));
1615   PetscCall(TSSetExactFinalTime(ts,TS_EXACTFINALTIME_STEPOVER));
1616   PetscCall(TSSetSolution(ts,X));
1617   PetscCall(TSSetTimeStep(ts,1e-3));
1618   PetscCall(TSSetFromOptions(ts));
1619 
1620   PetscCall(TSSolve(ts,X));
1621   PetscCall(TSGetSolveTime(ts,&ftime));
1622   PetscCall(TSGetStepNumber(ts,&steps));
1623   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Steps %" PetscInt_FMT "  final time %g\n",steps,(double)ftime));
1624 
1625   if (0) PetscCall(THISolveStatistics(thi,ts,0,"Full"));
1626 
1627   {
1628     PetscBool flg;
1629     char      filename[PETSC_MAX_PATH_LEN] = "";
1630     PetscCall(PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg));
1631     if (flg) PetscCall(THIDAVecView_VTK_XML(thi,pack,X,filename,NULL));
1632   }
1633 
1634   PetscCall(VecDestroy(&X));
1635   PetscCall(MatDestroy(&B));
1636   PetscCall(DMDestroy(&pack));
1637   PetscCall(TSDestroy(&ts));
1638   PetscCall(THIDestroy(&thi));
1639   PetscCall(PetscFinalize());
1640   return 0;
1641 }
1642