xref: /petsc/src/ts/tutorials/ex14.c (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2 \n\
3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4 using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5 to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
6 boundary conditions in the x- and y-directions.\n\
7 \n\
8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10 \n\
11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12 \n\n";
13 
14 /*
15 The equations for horizontal velocity (u,v) are
16 
17   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19 
20 where
21 
22   eta = B/2 (epsilon + gamma)^((p-2)/2)
23 
24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25 written in terms of the second invariant
26 
27   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28 
29 The surface boundary conditions are the natural conditions.  The basal boundary conditions
30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31 
32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33 
34 The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
35 map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
36 the Jacobian of the coordinate transformation from a reference element to the physical element.
37 
38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39 specially so that columns are never distributed, and are always contiguous in memory.
40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41 and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
42 use compatible domain decomposition relative to the 3D DMDAs.
43 
44 */
45 
46 #include <petscts.h>
47 #include <petscdm.h>
48 #include <petscdmda.h>
49 #include <petscdmcomposite.h>
50 #include <ctype.h> /* toupper() */
51 #include <petsc/private/petscimpl.h>
52 
53 #if defined __SSE2__
54   #include <emmintrin.h>
55 #endif
56 
57 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
58 #define USE_SSE2_KERNELS (!defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && defined __SSE2__)
59 
60 #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
61   #if defined __cplusplus /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
62     #define restrict
63   #else
64     #define restrict PETSC_RESTRICT
65   #endif
66 #endif
67 
68 static PetscClassId THI_CLASSID;
69 
70 typedef enum {
71   QUAD_GAUSS,
72   QUAD_LOBATTO
73 } QuadratureType;
74 static const char     *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
75 static const PetscReal HexQWeights[8]    = {1, 1, 1, 1, 1, 1, 1, 1};
76 static const PetscReal HexQNodes[]       = {-0.57735026918962573, 0.57735026918962573};
77 #define G 0.57735026918962573
78 #define H (0.5 * (1. + G))
79 #define L (0.5 * (1. - G))
80 #define M (-0.5)
81 #define P (0.5)
82 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
83 static const PetscReal HexQInterp_Lobatto[8][8] = {
84   {H, 0, 0, 0, L, 0, 0, 0},
85   {0, H, 0, 0, 0, L, 0, 0},
86   {0, 0, H, 0, 0, 0, L, 0},
87   {0, 0, 0, H, 0, 0, 0, L},
88   {L, 0, 0, 0, H, 0, 0, 0},
89   {0, L, 0, 0, 0, H, 0, 0},
90   {0, 0, L, 0, 0, 0, H, 0},
91   {0, 0, 0, L, 0, 0, 0, H}
92 };
93 static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
94   {{M * H, M *H, M}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0},     {M * L, M *L, P}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0}    },
95   {{M * H, 0, 0},    {P * H, M *H, M}, {0, P *H, 0},     {0, 0, 0},        {M * L, 0, 0},    {P * L, M *L, P}, {0, P *L, 0},     {0, 0, 0}       },
96   {{0, 0, 0},        {0, M *H, 0},     {P * H, P *H, M}, {M * H, 0, 0},    {0, 0, 0},        {0, M *L, 0},     {P * L, P *L, P}, {M * L, 0, 0}   },
97   {{0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, M}, {0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, P}},
98   {{M * L, M *L, M}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0},     {M * H, M *H, P}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0}    },
99   {{M * L, 0, 0},    {P * L, M *L, M}, {0, P *L, 0},     {0, 0, 0},        {M * H, 0, 0},    {P * H, M *H, P}, {0, P *H, 0},     {0, 0, 0}       },
100   {{0, 0, 0},        {0, M *L, 0},     {P * L, P *L, M}, {M * L, 0, 0},    {0, 0, 0},        {0, M *H, 0},     {P * H, P *H, P}, {M * H, 0, 0}   },
101   {{0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, M}, {0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, P}}
102 };
103 /* Stanndard Gauss */
104 static const PetscReal HexQInterp_Gauss[8][8] = {
105   {H * H * H, L *H *H, L *L *H, H *L *H, H *H *L, L *H *L, L *L *L, H *L *L},
106   {L * H * H, H *H *H, H *L *H, L *L *H, L *H *L, H *H *L, H *L *L, L *L *L},
107   {L * L * H, H *L *H, H *H *H, L *H *H, L *L *L, H *L *L, H *H *L, L *H *L},
108   {H * L * H, L *L *H, L *H *H, H *H *H, H *L *L, L *L *L, L *H *L, H *H *L},
109   {H * H * L, L *H *L, L *L *L, H *L *L, H *H *H, L *H *H, L *L *H, H *L *H},
110   {L * H * L, H *H *L, H *L *L, L *L *L, L *H *H, H *H *H, H *L *H, L *L *H},
111   {L * L * L, H *L *L, H *H *L, L *H *L, L *L *H, H *L *H, H *H *H, L *H *H},
112   {H * L * L, L *L *L, L *H *L, H *H *L, H *L *H, L *L *H, L *H *H, H *H *H}
113 };
114 static const PetscReal HexQDeriv_Gauss[8][8][3] = {
115   {{M * H * H, H *M *H, H *H *M}, {P * H * H, L *M *H, L *H *M}, {P * L * H, L *P *H, L *L *M}, {M * L * H, H *P *H, H *L *M}, {M * H * L, H *M *L, H *H *P}, {P * H * L, L *M *L, L *H *P}, {P * L * L, L *P *L, L *L *P}, {M * L * L, H *P *L, H *L *P}},
116   {{M * H * H, L *M *H, L *H *M}, {P * H * H, H *M *H, H *H *M}, {P * L * H, H *P *H, H *L *M}, {M * L * H, L *P *H, L *L *M}, {M * H * L, L *M *L, L *H *P}, {P * H * L, H *M *L, H *H *P}, {P * L * L, H *P *L, H *L *P}, {M * L * L, L *P *L, L *L *P}},
117   {{M * L * H, L *M *H, L *L *M}, {P * L * H, H *M *H, H *L *M}, {P * H * H, H *P *H, H *H *M}, {M * H * H, L *P *H, L *H *M}, {M * L * L, L *M *L, L *L *P}, {P * L * L, H *M *L, H *L *P}, {P * H * L, H *P *L, H *H *P}, {M * H * L, L *P *L, L *H *P}},
118   {{M * L * H, H *M *H, H *L *M}, {P * L * H, L *M *H, L *L *M}, {P * H * H, L *P *H, L *H *M}, {M * H * H, H *P *H, H *H *M}, {M * L * L, H *M *L, H *L *P}, {P * L * L, L *M *L, L *L *P}, {P * H * L, L *P *L, L *H *P}, {M * H * L, H *P *L, H *H *P}},
119   {{M * H * L, H *M *L, H *H *M}, {P * H * L, L *M *L, L *H *M}, {P * L * L, L *P *L, L *L *M}, {M * L * L, H *P *L, H *L *M}, {M * H * H, H *M *H, H *H *P}, {P * H * H, L *M *H, L *H *P}, {P * L * H, L *P *H, L *L *P}, {M * L * H, H *P *H, H *L *P}},
120   {{M * H * L, L *M *L, L *H *M}, {P * H * L, H *M *L, H *H *M}, {P * L * L, H *P *L, H *L *M}, {M * L * L, L *P *L, L *L *M}, {M * H * H, L *M *H, L *H *P}, {P * H * H, H *M *H, H *H *P}, {P * L * H, H *P *H, H *L *P}, {M * L * H, L *P *H, L *L *P}},
121   {{M * L * L, L *M *L, L *L *M}, {P * L * L, H *M *L, H *L *M}, {P * H * L, H *P *L, H *H *M}, {M * H * L, L *P *L, L *H *M}, {M * L * H, L *M *H, L *L *P}, {P * L * H, H *M *H, H *L *P}, {P * H * H, H *P *H, H *H *P}, {M * H * H, L *P *H, L *H *P}},
122   {{M * L * L, H *M *L, H *L *M}, {P * L * L, L *M *L, L *L *M}, {P * H * L, L *P *L, L *H *M}, {M * H * L, H *P *L, H *H *M}, {M * L * H, H *M *H, H *L *P}, {P * L * H, L *M *H, L *L *P}, {P * H * H, L *P *H, L *H *P}, {M * H * H, H *P *H, H *H *P}}
123 };
124 static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
125 /* Standard 2x2 Gauss quadrature for the bottom layer. */
126 static const PetscReal QuadQInterp[4][4] = {
127   {H * H, L *H, L *L, H *L},
128   {L * H, H *H, H *L, L *L},
129   {L * L, H *L, H *H, L *H},
130   {H * L, L *L, L *H, H *H}
131 };
132 static const PetscReal QuadQDeriv[4][4][2] = {
133   {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}},
134   {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}},
135   {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}},
136   {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}}
137 };
138 #undef G
139 #undef H
140 #undef L
141 #undef M
142 #undef P
143 
144 #define HexExtract(x, i, j, k, n) \
145   do { \
146     (n)[0] = (x)[i][j][k]; \
147     (n)[1] = (x)[i + 1][j][k]; \
148     (n)[2] = (x)[i + 1][j + 1][k]; \
149     (n)[3] = (x)[i][j + 1][k]; \
150     (n)[4] = (x)[i][j][k + 1]; \
151     (n)[5] = (x)[i + 1][j][k + 1]; \
152     (n)[6] = (x)[i + 1][j + 1][k + 1]; \
153     (n)[7] = (x)[i][j + 1][k + 1]; \
154   } while (0)
155 
156 #define HexExtractRef(x, i, j, k, n) \
157   do { \
158     (n)[0] = &(x)[i][j][k]; \
159     (n)[1] = &(x)[i + 1][j][k]; \
160     (n)[2] = &(x)[i + 1][j + 1][k]; \
161     (n)[3] = &(x)[i][j + 1][k]; \
162     (n)[4] = &(x)[i][j][k + 1]; \
163     (n)[5] = &(x)[i + 1][j][k + 1]; \
164     (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
165     (n)[7] = &(x)[i][j + 1][k + 1]; \
166   } while (0)
167 
168 #define QuadExtract(x, i, j, n) \
169   do { \
170     (n)[0] = (x)[i][j]; \
171     (n)[1] = (x)[i + 1][j]; \
172     (n)[2] = (x)[i + 1][j + 1]; \
173     (n)[3] = (x)[i][j + 1]; \
174   } while (0)
175 
176 static PetscScalar Sqr(PetscScalar a)
177 {
178   return a * a;
179 }
180 
181 static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[])
182 {
183   PetscInt i;
184   dz[0] = dz[1] = dz[2] = 0;
185   for (i = 0; i < 8; i++) {
186     dz[0] += dphi[i][0] * zn[i];
187     dz[1] += dphi[i][1] * zn[i];
188     dz[2] += dphi[i][2] * zn[i];
189   }
190 }
191 
192 static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[restrict], PetscReal phi[restrict], PetscReal dphi[restrict][3], PetscReal *restrict jw)
193 {
194   const PetscReal jac[3][3] =
195     {
196       {hx / 2, 0,      0    },
197       {0,      hy / 2, 0    },
198       {dz[0],  dz[1],  dz[2]}
199   },
200                   ijac[3][3] = {{1 / jac[0][0], 0, 0}, {0, 1 / jac[1][1], 0}, {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}}, jdet = jac[0][0] * jac[1][1] * jac[2][2];
201   PetscInt i;
202 
203   for (i = 0; i < 8; i++) {
204     const PetscReal *dphir = HexQDeriv[q][i];
205     phi[i]                 = HexQInterp[q][i];
206     dphi[i][0]             = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
207     dphi[i][1]             = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
208     dphi[i][2]             = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
209   }
210   *jw = 1.0 * jdet;
211 }
212 
213 typedef struct _p_THI   *THI;
214 typedef struct _n_Units *Units;
215 
216 typedef struct {
217   PetscScalar u, v;
218 } Node;
219 
220 typedef struct {
221   PetscScalar b;     /* bed */
222   PetscScalar h;     /* thickness */
223   PetscScalar beta2; /* friction */
224 } PrmNode;
225 
226 #define FieldSize(ntype)             ((PetscInt)(sizeof(ntype) / sizeof(PetscScalar)))
227 #define FieldOffset(ntype, member)   ((PetscInt)(offsetof(ntype, member) / sizeof(PetscScalar)))
228 #define FieldIndex(ntype, i, member) ((PetscInt)((i)*FieldSize(ntype) + FieldOffset(ntype, member)))
229 #define NODE_SIZE                    FieldSize(Node)
230 #define PRMNODE_SIZE                 FieldSize(PrmNode)
231 
232 typedef struct {
233   PetscReal min, max, cmin, cmax;
234 } PRange;
235 
236 struct _p_THI {
237   PETSCHEADER(int);
238   void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
239   PetscInt  nlevels;
240   PetscInt  zlevels;
241   PetscReal Lx, Ly, Lz; /* Model domain */
242   PetscReal alpha;      /* Bed angle */
243   Units     units;
244   PetscReal dirichlet_scale;
245   PetscReal ssa_friction_scale;
246   PetscReal inertia;
247   PRange    eta;
248   PRange    beta2;
249   struct {
250     PetscReal Bd2, eps, exponent, glen_n;
251   } viscosity;
252   struct {
253     PetscReal irefgam, eps2, exponent;
254   } friction;
255   struct {
256     PetscReal rate, exponent, refvel;
257   } erosion;
258   PetscReal rhog;
259   PetscBool no_slip;
260   PetscBool verbose;
261   char     *mattype;
262   char     *monitor_basename;
263   PetscInt  monitor_interval;
264 };
265 
266 struct _n_Units {
267   /* fundamental */
268   PetscReal meter;
269   PetscReal kilogram;
270   PetscReal second;
271   /* derived */
272   PetscReal Pascal;
273   PetscReal year;
274 };
275 
276 static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[])
277 {
278   const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1,       pn[1].b + pn[1].h * (PetscScalar)k / zm1,       pn[2].b + pn[2].h * (PetscScalar)k / zm1,       pn[3].b + pn[3].h * (PetscScalar)k / zm1,
279                                             pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
280   PetscInt          i;
281   for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
282 }
283 
284 /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
285 static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2], PetscReal hx, PetscReal hy, const PrmNode pn[4], PrmNode dp[4][2])
286 {
287   PetscInt q, i, f;
288   const PetscScalar(*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
289   PetscScalar(*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;
290 
291   PetscFunctionBeginUser;
292   PetscCall(PetscArrayzero(dpg, 4));
293   for (q = 0; q < 4; q++) {
294     for (i = 0; i < 4; i++) {
295       for (f = 0; f < PRMNODE_SIZE; f++) {
296         dpg[q][0][f] += dphi[q][i][0] / hx * pg[i][f];
297         dpg[q][1][f] += dphi[q][i][1] / hy * pg[i][f];
298       }
299     }
300   }
301   PetscFunctionReturn(0);
302 }
303 
304 static inline PetscReal StaggeredMidpoint2D(PetscScalar a, PetscScalar b, PetscScalar c, PetscScalar d)
305 {
306   return 0.5 * PetscRealPart(0.75 * a + 0.75 * b + 0.25 * c + 0.25 * d);
307 }
308 static inline PetscReal UpwindFlux1D(PetscReal u, PetscReal hL, PetscReal hR)
309 {
310   return (u > 0) ? hL * u : hR * u;
311 }
312 
313 #define UpwindFluxXW(x3, x2, h, i, j, k, dj) \
314   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i - 1][j][k].u, x3[i - 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i - 1][j].h + 0.25 * x2[i - 1][j + dj].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h))
315 #define UpwindFluxXE(x3, x2, h, i, j, k, dj) \
316   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i + 1][j][k].u, x3[i + 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h), PetscRealPart(0.75 * x2[i + 1][j].h + 0.25 * x2[i + 1][j + dj].h))
317 #define UpwindFluxYS(x3, x2, h, i, j, k, di) \
318   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j - 1][k].v, x3[i + di][j - 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j - 1].h + 0.25 * x2[i + di][j - 1].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h))
319 #define UpwindFluxYN(x3, x2, h, i, j, k, di) \
320   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j + 1][k].v, x3[i + di][j + 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h), PetscRealPart(0.75 * x2[i][j + 1].h + 0.25 * x2[i + di][j + 1].h))
321 
322 static void PrmNodeGetFaceMeasure(const PrmNode **p, PetscInt i, PetscInt j, PetscScalar h[])
323 {
324   /* West */
325   h[0] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j - 1].h, p[i][j - 1].h);
326   h[1] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j + 1].h, p[i][j + 1].h);
327   /* East */
328   h[2] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j + 1].h, p[i][j + 1].h);
329   h[3] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j - 1].h, p[i][j - 1].h);
330   /* South */
331   h[4] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i + 1][j - 1].h, p[i + 1][j].h);
332   h[5] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i - 1][j - 1].h, p[i - 1][j].h);
333   /* North */
334   h[6] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i - 1][j + 1].h, p[i - 1][j].h);
335   h[7] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i + 1][j + 1].h, p[i + 1][j].h);
336 }
337 
338 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
339 static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p)
340 {
341   Units     units = thi->units;
342   PetscReal s     = -x * PetscSinReal(thi->alpha);
343   p->b            = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
344   p->h            = s - p->b;
345   p->beta2        = -1e-10; /* This value is not used, but it should not be huge because that would change the finite difference step size  */
346 }
347 
348 static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p)
349 {
350   Units     units = thi->units;
351   PetscReal s     = -x * PetscSinReal(thi->alpha);
352   p->b            = s - 1000 * units->meter;
353   p->h            = s - p->b;
354   /* tau_b = beta2 v   is a stress (Pa).
355    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
356   p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
357 }
358 
359 /* These are just toys */
360 
361 /* From Fred Herman */
362 static void THIInitialize_HOM_F(THI thi, PetscReal x, PetscReal y, PrmNode *p)
363 {
364   Units     units = thi->units;
365   PetscReal s     = -x * PetscSinReal(thi->alpha);
366   p->b            = s - 1000 * units->meter + 100 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx); /* * sin(y*2*PETSC_PI/thi->Ly); */
367   p->h            = s - p->b;
368   p->h            = (1 - (atan((x - thi->Lx / 2) / 1.) + PETSC_PI / 2.) / PETSC_PI) * 500 * units->meter + 1 * units->meter;
369   s               = PetscRealPart(p->b + p->h);
370   p->beta2        = -1e-10;
371   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
372 }
373 
374 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
375 static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
376 {
377   Units     units = thi->units;
378   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
379   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
380   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
381   p->h     = s - p->b;
382   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
383 }
384 
385 /* Like Z, but with 200 meter cliffs */
386 static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
387 {
388   Units     units = thi->units;
389   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
390   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
391   p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
392   if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
393   p->h     = s - p->b;
394   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
395 }
396 
397 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
398 static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
399 {
400   Units     units = thi->units;
401   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
402   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
403   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
404   p->h     = s - p->b;
405   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
406 }
407 
408 static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2)
409 {
410   if (thi->friction.irefgam == 0) {
411     Units units           = thi->units;
412     thi->friction.irefgam = 1. / (0.5 * PetscSqr(100 * units->meter / units->year));
413     thi->friction.eps2    = 0.5 * PetscSqr(1.e-4 / thi->friction.irefgam);
414   }
415   if (thi->friction.exponent == 0) {
416     *beta2  = rbeta2;
417     *dbeta2 = 0;
418   } else {
419     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
420     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
421   }
422 }
423 
424 static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta)
425 {
426   PetscReal Bd2, eps, exponent;
427   if (thi->viscosity.Bd2 == 0) {
428     Units           units   = thi->units;
429     const PetscReal n       = thi->viscosity.glen_n,                                  /* Glen exponent */
430       p                     = 1. + 1. / n,                                            /* for Stokes */
431       A                     = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
432       B                     = PetscPowReal(A, -1. / n);                               /* hardness parameter */
433     thi->viscosity.Bd2      = B / 2;
434     thi->viscosity.exponent = (p - 2) / 2;
435     thi->viscosity.eps      = 0.5 * PetscSqr(1e-5 / units->year);
436   }
437   Bd2      = thi->viscosity.Bd2;
438   exponent = thi->viscosity.exponent;
439   eps      = thi->viscosity.eps;
440   *eta     = Bd2 * PetscPowReal(eps + gam, exponent);
441   *deta    = exponent * (*eta) / (eps + gam);
442 }
443 
444 static void THIErosion(THI thi, const Node *vel, PetscScalar *erate, Node *derate)
445 {
446   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel), rate = -thi->erosion.rate * PetscPowScalar(magref2, 0.5 * thi->erosion.exponent);
447   if (erate) *erate = rate;
448   if (derate) {
449     if (thi->erosion.exponent == 1) {
450       derate->u = 0;
451       derate->v = 0;
452     } else {
453       derate->u = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
454       derate->v = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
455     }
456   }
457 }
458 
459 static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x)
460 {
461   if (x < *min) *min = x;
462   if (x > *max) *max = x;
463 }
464 
465 static void PRangeClear(PRange *p)
466 {
467   p->cmin = p->min = 1e100;
468   p->cmax = p->max = -1e100;
469 }
470 
471 static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max)
472 {
473   PetscFunctionBeginUser;
474   p->cmin = min;
475   p->cmax = max;
476   if (min < p->min) p->min = min;
477   if (max > p->max) p->max = max;
478   PetscFunctionReturn(0);
479 }
480 
481 static PetscErrorCode THIDestroy(THI *thi)
482 {
483   PetscFunctionBeginUser;
484   if (--((PetscObject)(*thi))->refct > 0) PetscFunctionReturn(0);
485   PetscCall(PetscFree((*thi)->units));
486   PetscCall(PetscFree((*thi)->mattype));
487   PetscCall(PetscFree((*thi)->monitor_basename));
488   PetscCall(PetscHeaderDestroy(thi));
489   PetscFunctionReturn(0);
490 }
491 
492 static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi)
493 {
494   static PetscBool registered = PETSC_FALSE;
495   THI              thi;
496   Units            units;
497   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
498   PetscErrorCode   ierr;
499 
500   PetscFunctionBeginUser;
501   *inthi = 0;
502   if (!registered) {
503     PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID));
504     registered = PETSC_TRUE;
505   }
506   PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "THI", comm, THIDestroy, 0));
507 
508   PetscCall(PetscNew(&thi->units));
509 
510   units           = thi->units;
511   units->meter    = 1e-2;
512   units->second   = 1e-7;
513   units->kilogram = 1e-12;
514 
515   PetscOptionsBegin(comm, NULL, "Scaled units options", "");
516   {
517     PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL));
518     PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL));
519     PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL));
520   }
521   PetscOptionsEnd();
522   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
523   units->year   = 31556926. * units->second, /* seconds per year */
524 
525     thi->Lx            = 10.e3;
526   thi->Ly              = 10.e3;
527   thi->Lz              = 1000;
528   thi->nlevels         = 1;
529   thi->dirichlet_scale = 1;
530   thi->verbose         = PETSC_FALSE;
531 
532   thi->viscosity.glen_n = 3.;
533   thi->erosion.rate     = 1e-3; /* m/a */
534   thi->erosion.exponent = 1.;
535   thi->erosion.refvel   = 1.; /* m/a */
536 
537   PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
538   {
539     QuadratureType quad       = QUAD_GAUSS;
540     char           homexp[]   = "A";
541     char           mtype[256] = MATSBAIJ;
542     PetscReal      L, m = 1.0;
543     PetscBool      flg;
544     L = thi->Lx;
545     PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg));
546     if (flg) thi->Lx = thi->Ly = L;
547     PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL));
548     PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL));
549     PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL));
550     PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL));
551     switch (homexp[0] = toupper(homexp[0])) {
552     case 'A':
553       thi->initialize = THIInitialize_HOM_A;
554       thi->no_slip    = PETSC_TRUE;
555       thi->alpha      = 0.5;
556       break;
557     case 'C':
558       thi->initialize = THIInitialize_HOM_C;
559       thi->no_slip    = PETSC_FALSE;
560       thi->alpha      = 0.1;
561       break;
562     case 'F':
563       thi->initialize = THIInitialize_HOM_F;
564       thi->no_slip    = PETSC_FALSE;
565       thi->alpha      = 0.5;
566       break;
567     case 'X':
568       thi->initialize = THIInitialize_HOM_X;
569       thi->no_slip    = PETSC_FALSE;
570       thi->alpha      = 0.3;
571       break;
572     case 'Y':
573       thi->initialize = THIInitialize_HOM_Y;
574       thi->no_slip    = PETSC_FALSE;
575       thi->alpha      = 0.5;
576       break;
577     case 'Z':
578       thi->initialize = THIInitialize_HOM_Z;
579       thi->no_slip    = PETSC_FALSE;
580       thi->alpha      = 0.5;
581       break;
582     default:
583       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
584     }
585     PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL));
586     switch (quad) {
587     case QUAD_GAUSS:
588       HexQInterp = HexQInterp_Gauss;
589       HexQDeriv  = HexQDeriv_Gauss;
590       break;
591     case QUAD_LOBATTO:
592       HexQInterp = HexQInterp_Lobatto;
593       HexQDeriv  = HexQDeriv_Lobatto;
594       break;
595     }
596     PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL));
597     PetscCall(PetscOptionsReal("-thi_viscosity_glen_n", "Exponent in Glen flow law, 1=linear, infty=ideal plastic", NULL, thi->viscosity.glen_n, &thi->viscosity.glen_n, NULL));
598     PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL));
599     thi->friction.exponent = (m - 1) / 2;
600     PetscCall(PetscOptionsReal("-thi_erosion_rate", "Rate of erosion relative to sliding velocity at reference velocity (m/a)", NULL, thi->erosion.rate, &thi->erosion.rate, NULL));
601     PetscCall(PetscOptionsReal("-thi_erosion_exponent", "Power of sliding velocity appearing in erosion relation", NULL, thi->erosion.exponent, &thi->erosion.exponent, NULL));
602     PetscCall(PetscOptionsReal("-thi_erosion_refvel", "Reference sliding velocity for erosion (m/a)", NULL, thi->erosion.refvel, &thi->erosion.refvel, NULL));
603     thi->erosion.rate *= units->meter / units->year;
604     thi->erosion.refvel *= units->meter / units->year;
605     PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL));
606     PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL));
607     PetscCall(PetscOptionsReal("-thi_inertia", "Coefficient of accelaration term in velocity system, physical is almost zero", NULL, thi->inertia, &thi->inertia, NULL));
608     PetscCall(PetscOptionsInt("-thi_nlevels", "Number of levels of refinement", "", thi->nlevels, &thi->nlevels, NULL));
609     PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL));
610     PetscCall(PetscStrallocpy(mtype, &thi->mattype));
611     PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL));
612     PetscCall(PetscOptionsString("-thi_monitor", "Basename to write state files to", NULL, monitor_basename, monitor_basename, sizeof(monitor_basename), &flg));
613     if (flg) {
614       PetscCall(PetscStrallocpy(monitor_basename, &thi->monitor_basename));
615       thi->monitor_interval = 1;
616       PetscCall(PetscOptionsInt("-thi_monitor_interval", "Frequency at which to write state files", NULL, thi->monitor_interval, &thi->monitor_interval, NULL));
617     }
618   }
619   PetscOptionsEnd();
620 
621   /* dimensionalize */
622   thi->Lx *= units->meter;
623   thi->Ly *= units->meter;
624   thi->Lz *= units->meter;
625   thi->alpha *= PETSC_PI / 180;
626 
627   PRangeClear(&thi->eta);
628   PRangeClear(&thi->beta2);
629 
630   {
631     PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowRealInt(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
632               driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
633     THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
634     thi->rhog = rho * grav;
635     if (thi->verbose) {
636       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal));
637       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving));
638       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu, 2 * eta * gradu / driving)));
639       THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
640       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu, 2 * eta * 1e-3 * gradu / driving)));
641     }
642   }
643 
644   *inthi = thi;
645   PetscFunctionReturn(0);
646 }
647 
648 /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
649  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
650  * the horizontal. */
651 static PetscErrorCode THIFixGhosts(THI thi, DM da3, DM da2, Vec X3, Vec X2)
652 {
653   DMDALocalInfo info;
654   PrmNode     **x2;
655   PetscInt      i, j;
656 
657   PetscFunctionBeginUser;
658   PetscCall(DMDAGetLocalInfo(da3, &info));
659   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
660   PetscCall(DMDAVecGetArray(da2, X2, &x2));
661   for (i = info.gzs; i < info.gzs + info.gzm; i++) {
662     if (i > -1 && i < info.mz) continue;
663     for (j = info.gys; j < info.gys + info.gym; j++) x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i < 0 ? 1.0 : -1.0);
664   }
665   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
666   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
667   PetscFunctionReturn(0);
668 }
669 
670 static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, PrmNode **p)
671 {
672   PetscInt i, j, xs, xm, ys, ym, mx, my;
673 
674   PetscFunctionBeginUser;
675   PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0));
676   PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
677   for (i = xs; i < xs + xm; i++) {
678     for (j = ys; j < ys + ym; j++) {
679       PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
680       thi->initialize(thi, xx, yy, &p[i][j]);
681     }
682   }
683   PetscFunctionReturn(0);
684 }
685 
686 static PetscErrorCode THIInitial(THI thi, DM pack, Vec X)
687 {
688   DM        da3, da2;
689   PetscInt  i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
690   PetscReal hx, hy;
691   PrmNode **prm;
692   Node   ***x;
693   Vec       X3g, X2g, X2;
694 
695   PetscFunctionBeginUser;
696   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
697   PetscCall(DMCompositeGetAccess(pack, X, &X3g, &X2g));
698   PetscCall(DMGetLocalVector(da2, &X2));
699 
700   PetscCall(DMDAGetInfo(da3, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
701   PetscCall(DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm));
702   PetscCall(DMDAVecGetArray(da3, X3g, &x));
703   PetscCall(DMDAVecGetArray(da2, X2, &prm));
704 
705   PetscCall(THIInitializePrm(thi, da2, prm));
706 
707   hx = thi->Lx / mx;
708   hy = thi->Ly / my;
709   for (i = xs; i < xs + xm; i++) {
710     for (j = ys; j < ys + ym; j++) {
711       for (k = zs; k < zs + zm; k++) {
712         const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
713         x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
714         x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
715       }
716     }
717   }
718 
719   PetscCall(DMDAVecRestoreArray(da3, X3g, &x));
720   PetscCall(DMDAVecRestoreArray(da2, X2, &prm));
721 
722   PetscCall(DMLocalToGlobalBegin(da2, X2, INSERT_VALUES, X2g));
723   PetscCall(DMLocalToGlobalEnd(da2, X2, INSERT_VALUES, X2g));
724   PetscCall(DMRestoreLocalVector(da2, &X2));
725 
726   PetscCall(DMCompositeRestoreAccess(pack, X, &X3g, &X2g));
727   PetscFunctionReturn(0);
728 }
729 
730 static void PointwiseNonlinearity(THI thi, const Node n[restrict 8], const PetscReal phi[restrict 3], PetscReal dphi[restrict 8][3], PetscScalar *restrict u, PetscScalar *restrict v, PetscScalar du[restrict 3], PetscScalar dv[restrict 3], PetscReal *eta, PetscReal *deta)
731 {
732   PetscInt    l, ll;
733   PetscScalar gam;
734 
735   du[0] = du[1] = du[2] = 0;
736   dv[0] = dv[1] = dv[2] = 0;
737   *u                    = 0;
738   *v                    = 0;
739   for (l = 0; l < 8; l++) {
740     *u += phi[l] * n[l].u;
741     *v += phi[l] * n[l].v;
742     for (ll = 0; ll < 3; ll++) {
743       du[ll] += dphi[l][ll] * n[l].u;
744       dv[ll] += dphi[l][ll] * n[l].v;
745     }
746   }
747   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0] * dv[1] + 0.25 * Sqr(du[1] + dv[0]) + 0.25 * Sqr(du[2]) + 0.25 * Sqr(dv[2]);
748   THIViscosity(thi, PetscRealPart(gam), eta, deta);
749 }
750 
751 static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const Node ***xdot, Node ***f, THI thi)
752 {
753   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l;
754   PetscReal hx, hy, etamin, etamax, beta2min, beta2max;
755 
756   PetscFunctionBeginUser;
757   xs = info->zs;
758   ys = info->ys;
759   xm = info->zm;
760   ym = info->ym;
761   zm = info->xm;
762   hx = thi->Lx / info->mz;
763   hy = thi->Ly / info->my;
764 
765   etamin   = 1e100;
766   etamax   = 0;
767   beta2min = 1e100;
768   beta2max = 0;
769 
770   for (i = xs; i < xs + xm; i++) {
771     for (j = ys; j < ys + ym; j++) {
772       PrmNode pn[4], dpn[4][2];
773       QuadExtract(prm, i, j, pn);
774       PetscCall(QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn));
775       for (k = 0; k < zm - 1; k++) {
776         PetscInt  ls = 0;
777         Node      n[8], ndot[8], *fn[8];
778         PetscReal zn[8], etabase = 0;
779 
780         PrmHexGetZ(pn, k, zm, zn);
781         HexExtract(x, i, j, k, n);
782         HexExtract(xdot, i, j, k, ndot);
783         HexExtractRef(f, i, j, k, fn);
784         if (thi->no_slip && k == 0) {
785           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
786           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
787           ls = 4;
788         }
789         for (q = 0; q < 8; q++) {
790           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
791           PetscScalar du[3], dv[3], u, v, udot = 0, vdot = 0;
792           for (l = ls; l < 8; l++) {
793             udot += HexQInterp[q][l] * ndot[l].u;
794             vdot += HexQInterp[q][l] * ndot[l].v;
795           }
796           HexGrad(HexQDeriv[q], zn, dz);
797           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
798           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
799           jw /= thi->rhog; /* scales residuals to be O(1) */
800           if (q == 0) etabase = eta;
801           RangeUpdate(&etamin, &etamax, eta);
802           for (l = ls; l < 8; l++) { /* test functions */
803             const PetscScalar ds[2] = {dpn[q % 4][0].h + dpn[q % 4][0].b, dpn[q % 4][1].h + dpn[q % 4][1].b};
804             const PetscReal   pp = phi[l], *dp = dphi[l];
805             fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
806             fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
807             fn[l]->u += pp * jw * udot * thi->inertia * pp;
808             fn[l]->v += pp * jw * vdot * thi->inertia * pp;
809           }
810         }
811         if (k == 0) { /* we are on a bottom face */
812           if (thi->no_slip) {
813             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
814             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
815             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
816             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
817             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
818             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
819             * assembled matrix (see the similar block in THIJacobianLocal).
820             *
821             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
822             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
823             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
824             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
825             */
826             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1.);
827             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
828             fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
829             fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
830           } else {                    /* Integrate over bottom face to apply boundary condition */
831             for (q = 0; q < 4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
832               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
833               PetscScalar     u = 0, v = 0, rbeta2 = 0;
834               PetscReal       beta2, dbeta2;
835               for (l = 0; l < 4; l++) {
836                 u += phi[l] * n[l].u;
837                 v += phi[l] * n[l].v;
838                 rbeta2 += phi[l] * pn[l].beta2;
839               }
840               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
841               RangeUpdate(&beta2min, &beta2max, beta2);
842               for (l = 0; l < 4; l++) {
843                 const PetscReal pp = phi[l];
844                 fn[ls + l]->u += pp * jw * beta2 * u;
845                 fn[ls + l]->v += pp * jw * beta2 * v;
846               }
847             }
848           }
849         }
850       }
851     }
852   }
853 
854   PetscCall(PRangeMinMax(&thi->eta, etamin, etamax));
855   PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max));
856   PetscFunctionReturn(0);
857 }
858 
859 static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const PrmNode **prmdot, PrmNode **f, THI thi)
860 {
861   PetscInt xs, ys, xm, ym, zm, i, j, k;
862 
863   PetscFunctionBeginUser;
864   xs = info->zs;
865   ys = info->ys;
866   xm = info->zm;
867   ym = info->ym;
868   zm = info->xm;
869 
870   for (i = xs; i < xs + xm; i++) {
871     for (j = ys; j < ys + ym; j++) {
872       PetscScalar div = 0, erate, h[8];
873       PrmNodeGetFaceMeasure(prm, i, j, h);
874       for (k = 0; k < zm; k++) {
875         PetscScalar weight = (k == 0 || k == zm - 1) ? 0.5 / (zm - 1) : 1.0 / (zm - 1);
876         if (0) { /* centered flux */
877           div += (-weight * h[0] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j - 1][k].u, x[i][j - 1][k].u) - weight * h[1] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j + 1][k].u, x[i][j + 1][k].u) +
878                   weight * h[2] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j + 1][k].u, x[i][j + 1][k].u) + weight * h[3] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j - 1][k].u, x[i][j - 1][k].u) -
879                   weight * h[4] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i + 1][j - 1][k].v, x[i + 1][j][k].v) - weight * h[5] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i - 1][j - 1][k].v, x[i - 1][j][k].v) +
880                   weight * h[6] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i - 1][j + 1][k].v, x[i - 1][j][k].v) + weight * h[7] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i + 1][j + 1][k].v, x[i + 1][j][k].v));
881         } else { /* Upwind flux */
882           div += weight * (-UpwindFluxXW(x, prm, h, i, j, k, 1) - UpwindFluxXW(x, prm, h, i, j, k, -1) + UpwindFluxXE(x, prm, h, i, j, k, 1) + UpwindFluxXE(x, prm, h, i, j, k, -1) - UpwindFluxYS(x, prm, h, i, j, k, 1) - UpwindFluxYS(x, prm, h, i, j, k, -1) + UpwindFluxYN(x, prm, h, i, j, k, 1) + UpwindFluxYN(x, prm, h, i, j, k, -1));
883         }
884       }
885       /* printf("div[%d][%d] %g\n",i,j,div); */
886       THIErosion(thi, &x[i][j][0], &erate, NULL);
887       f[i][j].b     = prmdot[i][j].b - erate;
888       f[i][j].h     = prmdot[i][j].h + div;
889       f[i][j].beta2 = prmdot[i][j].beta2;
890     }
891   }
892   PetscFunctionReturn(0);
893 }
894 
895 static PetscErrorCode THIFunction(TS ts, PetscReal t, Vec X, Vec Xdot, Vec F, void *ctx)
896 {
897   THI             thi = (THI)ctx;
898   DM              pack, da3, da2;
899   Vec             X3, X2, Xdot3, Xdot2, F3, F2, F3g, F2g;
900   const Node   ***x3, ***xdot3;
901   const PrmNode **x2, **xdot2;
902   Node         ***f3;
903   PrmNode       **f2;
904   DMDALocalInfo   info3;
905 
906   PetscFunctionBeginUser;
907   PetscCall(TSGetDM(ts, &pack));
908   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
909   PetscCall(DMDAGetLocalInfo(da3, &info3));
910   PetscCall(DMCompositeGetLocalVectors(pack, &X3, &X2));
911   PetscCall(DMCompositeGetLocalVectors(pack, &Xdot3, &Xdot2));
912   PetscCall(DMCompositeScatter(pack, X, X3, X2));
913   PetscCall(THIFixGhosts(thi, da3, da2, X3, X2));
914   PetscCall(DMCompositeScatter(pack, Xdot, Xdot3, Xdot2));
915 
916   PetscCall(DMGetLocalVector(da3, &F3));
917   PetscCall(DMGetLocalVector(da2, &F2));
918   PetscCall(VecZeroEntries(F3));
919 
920   PetscCall(DMDAVecGetArray(da3, X3, &x3));
921   PetscCall(DMDAVecGetArray(da2, X2, &x2));
922   PetscCall(DMDAVecGetArray(da3, Xdot3, &xdot3));
923   PetscCall(DMDAVecGetArray(da2, Xdot2, &xdot2));
924   PetscCall(DMDAVecGetArray(da3, F3, &f3));
925   PetscCall(DMDAVecGetArray(da2, F2, &f2));
926 
927   PetscCall(THIFunctionLocal_3D(&info3, x3, x2, xdot3, f3, thi));
928   PetscCall(THIFunctionLocal_2D(&info3, x3, x2, xdot2, f2, thi));
929 
930   PetscCall(DMDAVecRestoreArray(da3, X3, &x3));
931   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
932   PetscCall(DMDAVecRestoreArray(da3, Xdot3, &xdot3));
933   PetscCall(DMDAVecRestoreArray(da2, Xdot2, &xdot2));
934   PetscCall(DMDAVecRestoreArray(da3, F3, &f3));
935   PetscCall(DMDAVecRestoreArray(da2, F2, &f2));
936 
937   PetscCall(DMCompositeRestoreLocalVectors(pack, &X3, &X2));
938   PetscCall(DMCompositeRestoreLocalVectors(pack, &Xdot3, &Xdot2));
939 
940   PetscCall(VecZeroEntries(F));
941   PetscCall(DMCompositeGetAccess(pack, F, &F3g, &F2g));
942   PetscCall(DMLocalToGlobalBegin(da3, F3, ADD_VALUES, F3g));
943   PetscCall(DMLocalToGlobalEnd(da3, F3, ADD_VALUES, F3g));
944   PetscCall(DMLocalToGlobalBegin(da2, F2, INSERT_VALUES, F2g));
945   PetscCall(DMLocalToGlobalEnd(da2, F2, INSERT_VALUES, F2g));
946 
947   if (thi->verbose) {
948     PetscViewer viewer;
949     PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi), &viewer));
950     PetscCall(PetscViewerASCIIPrintf(viewer, "3D_Velocity residual (bs=2):\n"));
951     PetscCall(PetscViewerASCIIPushTab(viewer));
952     PetscCall(VecView(F3, viewer));
953     PetscCall(PetscViewerASCIIPopTab(viewer));
954     PetscCall(PetscViewerASCIIPrintf(viewer, "2D_Fields residual (bs=3):\n"));
955     PetscCall(PetscViewerASCIIPushTab(viewer));
956     PetscCall(VecView(F2, viewer));
957     PetscCall(PetscViewerASCIIPopTab(viewer));
958   }
959 
960   PetscCall(DMCompositeRestoreAccess(pack, F, &F3g, &F2g));
961 
962   PetscCall(DMRestoreLocalVector(da3, &F3));
963   PetscCall(DMRestoreLocalVector(da2, &F2));
964   PetscFunctionReturn(0);
965 }
966 
967 static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer)
968 {
969   PetscReal   nrm;
970   PetscInt    m;
971   PetscMPIInt rank;
972 
973   PetscFunctionBeginUser;
974   PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm));
975   PetscCall(MatGetSize(B, &m, 0));
976   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank));
977   if (rank == 0) {
978     PetscScalar val0, val2;
979     PetscCall(MatGetValue(B, 0, 0, &val0));
980     PetscCall(MatGetValue(B, 2, 2, &val2));
981     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %8" PetscInt_FMT "  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2), (double)thi->eta.cmin,
982                                      (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
983   }
984   PetscFunctionReturn(0);
985 }
986 
987 static PetscErrorCode THISurfaceStatistics(DM pack, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean)
988 {
989   DM          da3, da2;
990   Vec         X3, X2;
991   Node     ***x;
992   PetscInt    i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
993   PetscReal   umin = 1e100, umax = -1e100;
994   PetscScalar usum = 0.0, gusum;
995 
996   PetscFunctionBeginUser;
997   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
998   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
999   *min = *max = *mean = 0;
1000   PetscCall(DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1001   PetscCall(DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm));
1002   PetscCheck(zs == 0 && zm == mz, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Unexpected decomposition");
1003   PetscCall(DMDAVecGetArray(da3, X3, &x));
1004   for (i = xs; i < xs + xm; i++) {
1005     for (j = ys; j < ys + ym; j++) {
1006       PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
1007       RangeUpdate(&umin, &umax, u);
1008       usum += u;
1009     }
1010   }
1011   PetscCall(DMDAVecRestoreArray(da3, X3, &x));
1012   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1013 
1014   PetscCallMPI(MPI_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da3)));
1015   PetscCallMPI(MPI_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da3)));
1016   PetscCallMPI(MPI_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da3)));
1017   *mean = PetscRealPart(gusum) / (mx * my);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 static PetscErrorCode THISolveStatistics(THI thi, TS ts, PetscInt coarsened, const char name[])
1022 {
1023   MPI_Comm comm;
1024   DM       pack;
1025   Vec      X, X3, X2;
1026 
1027   PetscFunctionBeginUser;
1028   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1029   PetscCall(TSGetDM(ts, &pack));
1030   PetscCall(TSGetSolution(ts, &X));
1031   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
1032   PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name));
1033   {
1034     PetscInt            its, lits;
1035     SNESConvergedReason reason;
1036     SNES                snes;
1037     PetscCall(TSGetSNES(ts, &snes));
1038     PetscCall(SNESGetIterationNumber(snes, &its));
1039     PetscCall(SNESGetConvergedReason(snes, &reason));
1040     PetscCall(SNESGetLinearSolveIterations(snes, &lits));
1041     PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits));
1042   }
1043   {
1044     PetscReal    nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
1045     PetscInt     i, j, m;
1046     PetscScalar *x;
1047     PetscCall(VecNorm(X3, NORM_2, &nrm2));
1048     PetscCall(VecGetLocalSize(X3, &m));
1049     PetscCall(VecGetArray(X3, &x));
1050     for (i = 0; i < m; i += 2) {
1051       PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
1052       tmin[0] = PetscMin(u, tmin[0]);
1053       tmin[1] = PetscMin(v, tmin[1]);
1054       tmin[2] = PetscMin(c, tmin[2]);
1055       tmax[0] = PetscMax(u, tmax[0]);
1056       tmax[1] = PetscMax(v, tmax[1]);
1057       tmax[2] = PetscMax(c, tmax[2]);
1058     }
1059     PetscCall(VecRestoreArray(X, &x));
1060     PetscCallMPI(MPI_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi)));
1061     PetscCallMPI(MPI_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi)));
1062     /* Dimensionalize to meters/year */
1063     nrm2 *= thi->units->year / thi->units->meter;
1064     for (j = 0; j < 3; j++) {
1065       min[j] *= thi->units->year / thi->units->meter;
1066       max[j] *= thi->units->year / thi->units->meter;
1067     }
1068     PetscCall(PetscPrintf(comm, "|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]));
1069     {
1070       PetscReal umin, umax, umean;
1071       PetscCall(THISurfaceStatistics(pack, X, &umin, &umax, &umean));
1072       umin *= thi->units->year / thi->units->meter;
1073       umax *= thi->units->year / thi->units->meter;
1074       umean *= thi->units->year / thi->units->meter;
1075       PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean));
1076     }
1077     /* These values stay nondimensional */
1078     PetscCall(PetscPrintf(comm, "Global eta range   [%g, %g], converged range [%g, %g]\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax));
1079     PetscCall(PetscPrintf(comm, "Global beta2 range [%g, %g], converged range [%g, %g]\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
1080   }
1081   PetscCall(PetscPrintf(comm, "\n"));
1082   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info, PetscInt i, PetscInt j, PetscInt k)
1087 {
1088   return ((i - info->gzs) * info->gym + (j - info->gys)) * info->gxm + (k - info->gxs);
1089 }
1090 static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info, PetscInt i, PetscInt j)
1091 {
1092   return (i - info->gzs) * info->gym + (j - info->gys);
1093 }
1094 
1095 static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, Mat B, Mat Bcpl, THI thi)
1096 {
1097   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1098   PetscReal hx, hy;
1099 
1100   PetscFunctionBeginUser;
1101   xs = info->zs;
1102   ys = info->ys;
1103   xm = info->zm;
1104   ym = info->ym;
1105   zm = info->xm;
1106   hx = thi->Lx / info->mz;
1107   hy = thi->Ly / info->my;
1108 
1109   for (i = xs; i < xs + xm; i++) {
1110     for (j = ys; j < ys + ym; j++) {
1111       PrmNode pn[4], dpn[4][2];
1112       QuadExtract(prm, i, j, pn);
1113       PetscCall(QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn));
1114       for (k = 0; k < zm - 1; k++) {
1115         Node        n[8];
1116         PetscReal   zn[8], etabase = 0;
1117         PetscScalar Ke[8 * NODE_SIZE][8 * NODE_SIZE], Kcpl[8 * NODE_SIZE][4 * PRMNODE_SIZE];
1118         PetscInt    ls = 0;
1119 
1120         PrmHexGetZ(pn, k, zm, zn);
1121         HexExtract(x, i, j, k, n);
1122         PetscCall(PetscMemzero(Ke, sizeof(Ke)));
1123         PetscCall(PetscMemzero(Kcpl, sizeof(Kcpl)));
1124         if (thi->no_slip && k == 0) {
1125           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1126           ls = 4;
1127         }
1128         for (q = 0; q < 8; q++) {
1129           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
1130           PetscScalar du[3], dv[3], u, v;
1131           HexGrad(HexQDeriv[q], zn, dz);
1132           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1133           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1134           jw /= thi->rhog; /* residuals are scaled by this factor */
1135           if (q == 0) etabase = eta;
1136           for (l = ls; l < 8; l++) { /* test functions */
1137             const PetscReal pp = phi[l], *restrict dp = dphi[l];
1138             for (ll = ls; ll < 8; ll++) { /* trial functions */
1139               const PetscReal *restrict dpl = dphi[ll];
1140               PetscScalar dgdu, dgdv;
1141               dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1142               dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1143               /* Picard part */
1144               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1145               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1146               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1147               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1148               /* extra Newton terms */
1149               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1150               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1151               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1152               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1153               /* inertial part */
1154               Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * thi->inertia * pp;
1155               Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * thi->inertia * pp;
1156             }
1157             for (ll = 0; ll < 4; ll++) {                                                              /* Trial functions for surface/bed */
1158               const PetscReal dpl[] = {QuadQDeriv[q % 4][ll][0] / hx, QuadQDeriv[q % 4][ll][1] / hy}; /* surface = h + b */
1159               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[0];
1160               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[0];
1161               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[1];
1162               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[1];
1163             }
1164           }
1165         }
1166         if (k == 0) { /* on a bottom face */
1167           if (thi->no_slip) {
1168             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1);
1169             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1170             Ke[0][0] = thi->dirichlet_scale * diagu;
1171             Ke[0][1] = 0;
1172             Ke[1][0] = 0;
1173             Ke[1][1] = thi->dirichlet_scale * diagv;
1174           } else {
1175             for (q = 0; q < 4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1176               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
1177               PetscScalar     u = 0, v = 0, rbeta2 = 0;
1178               PetscReal       beta2, dbeta2;
1179               for (l = 0; l < 4; l++) {
1180                 u += phi[l] * n[l].u;
1181                 v += phi[l] * n[l].v;
1182                 rbeta2 += phi[l] * pn[l].beta2;
1183               }
1184               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1185               for (l = 0; l < 4; l++) {
1186                 const PetscReal pp = phi[l];
1187                 for (ll = 0; ll < 4; ll++) {
1188                   const PetscReal ppl = phi[ll];
1189                   Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1190                   Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1191                   Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1192                   Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1193                 }
1194               }
1195             }
1196           }
1197         }
1198         {
1199           const PetscInt rc3blocked[8]                 = {DMDALocalIndex3D(info, i + 0, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 1, k + 0), DMDALocalIndex3D(info, i + 0, j + 1, k + 0),
1200                                                           DMDALocalIndex3D(info, i + 0, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 1, k + 1), DMDALocalIndex3D(info, i + 0, j + 1, k + 1)},
1201                          col2blocked[PRMNODE_SIZE * 4] = {DMDALocalIndex2D(info, i + 0, j + 0), DMDALocalIndex2D(info, i + 1, j + 0), DMDALocalIndex2D(info, i + 1, j + 1), DMDALocalIndex2D(info, i + 0, j + 1)};
1202 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1203           for (l = 0; l < 8; l++) {
1204             for (ll = l + 1; ll < 8; ll++) {
1205               Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1206               Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1207               Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1208               Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1209             }
1210           }
1211 #endif
1212           PetscCall(MatSetValuesBlockedLocal(B, 8, rc3blocked, 8, rc3blocked, &Ke[0][0], ADD_VALUES)); /* velocity-velocity coupling can use blocked insertion */
1213           {                                                                                            /* The off-diagonal part cannot (yet) */
1214             PetscInt row3scalar[NODE_SIZE * 8], col2scalar[PRMNODE_SIZE * 4];
1215             for (l = 0; l < 8; l++)
1216               for (ll = 0; ll < NODE_SIZE; ll++) row3scalar[l * NODE_SIZE + ll] = rc3blocked[l] * NODE_SIZE + ll;
1217             for (l = 0; l < 4; l++)
1218               for (ll = 0; ll < PRMNODE_SIZE; ll++) col2scalar[l * PRMNODE_SIZE + ll] = col2blocked[l] * PRMNODE_SIZE + ll;
1219             PetscCall(MatSetValuesLocal(Bcpl, 8 * NODE_SIZE, row3scalar, 4 * PRMNODE_SIZE, col2scalar, &Kcpl[0][0], ADD_VALUES));
1220           }
1221         }
1222       }
1223     }
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, const Node ***x3, const PrmNode **x2, const PrmNode **xdot2, PetscReal a, Mat B22, Mat B21, THI thi)
1229 {
1230   PetscInt xs, ys, xm, ym, zm, i, j, k;
1231 
1232   PetscFunctionBeginUser;
1233   xs = info->zs;
1234   ys = info->ys;
1235   xm = info->zm;
1236   ym = info->ym;
1237   zm = info->xm;
1238 
1239   PetscCheck(zm <= 1024, ((PetscObject)info->da)->comm, PETSC_ERR_SUP, "Need to allocate more space");
1240   for (i = xs; i < xs + xm; i++) {
1241     for (j = ys; j < ys + ym; j++) {
1242       { /* Self-coupling */
1243         const PetscInt    row[]  = {DMDALocalIndex2D(info, i, j)};
1244         const PetscInt    col[]  = {DMDALocalIndex2D(info, i, j)};
1245         const PetscScalar vals[] = {a, 0, 0, 0, a, 0, 0, 0, a};
1246         PetscCall(MatSetValuesBlockedLocal(B22, 1, row, 1, col, vals, INSERT_VALUES));
1247       }
1248       for (k = 0; k < zm; k++) { /* Coupling to velocity problem */
1249         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1250         const PetscInt    row[]  = {FieldIndex(PrmNode, DMDALocalIndex2D(info, i, j), h)};
1251         const PetscInt    cols[] = {FieldIndex(Node, DMDALocalIndex3D(info, i - 1, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i + 1, j, k), u),
1252                                     FieldIndex(Node, DMDALocalIndex3D(info, i, j - 1, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j + 1, k), v)};
1253         const PetscScalar w = (k && k < zm - 1) ? 0.5 : 0.25, hW = w * (x2[i - 1][j].h + x2[i][j].h) / (zm - 1.), hE = w * (x2[i][j].h + x2[i + 1][j].h) / (zm - 1.), hS = w * (x2[i][j - 1].h + x2[i][j].h) / (zm - 1.),
1254                           hN = w * (x2[i][j].h + x2[i][j + 1].h) / (zm - 1.);
1255         PetscScalar *vals, vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0), ((PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW), ((PetscRealPart(x3[i][j][k].u) > 0) ? 0 : +hE),
1256                                             ((PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0), ((PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS), ((PetscRealPart(x3[i][j][k].v) > 0) ? 0 : +hN)},
1257                            vals_centered[] = {-0.5 * hW, 0.5 * (-hW + hE), 0.5 * hE, -0.5 * hS, 0.5 * (-hS + hN), 0.5 * hN};
1258         vals                               = 1 ? vals_upwind : vals_centered;
1259         if (k == 0) {
1260           Node derate;
1261           THIErosion(thi, &x3[i][j][0], NULL, &derate);
1262           vals[1] -= derate.u;
1263           vals[4] -= derate.v;
1264         }
1265         PetscCall(MatSetValuesLocal(B21, 1, row, 6, cols, vals, INSERT_VALUES));
1266       }
1267     }
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 static PetscErrorCode THIJacobian(TS ts, PetscReal t, Vec X, Vec Xdot, PetscReal a, Mat A, Mat B, void *ctx)
1273 {
1274   THI             thi = (THI)ctx;
1275   DM              pack, da3, da2;
1276   Vec             X3, X2, Xdot2;
1277   Mat             B11, B12, B21, B22;
1278   DMDALocalInfo   info3;
1279   IS             *isloc;
1280   const Node   ***x3;
1281   const PrmNode **x2, **xdot2;
1282 
1283   PetscFunctionBeginUser;
1284   PetscCall(TSGetDM(ts, &pack));
1285   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1286   PetscCall(DMDAGetLocalInfo(da3, &info3));
1287   PetscCall(DMCompositeGetLocalVectors(pack, &X3, &X2));
1288   PetscCall(DMCompositeGetLocalVectors(pack, NULL, &Xdot2));
1289   PetscCall(DMCompositeScatter(pack, X, X3, X2));
1290   PetscCall(THIFixGhosts(thi, da3, da2, X3, X2));
1291   PetscCall(DMCompositeScatter(pack, Xdot, NULL, Xdot2));
1292 
1293   PetscCall(MatZeroEntries(B));
1294 
1295   PetscCall(DMCompositeGetLocalISs(pack, &isloc));
1296   PetscCall(MatGetLocalSubMatrix(B, isloc[0], isloc[0], &B11));
1297   PetscCall(MatGetLocalSubMatrix(B, isloc[0], isloc[1], &B12));
1298   PetscCall(MatGetLocalSubMatrix(B, isloc[1], isloc[0], &B21));
1299   PetscCall(MatGetLocalSubMatrix(B, isloc[1], isloc[1], &B22));
1300 
1301   PetscCall(DMDAVecGetArray(da3, X3, &x3));
1302   PetscCall(DMDAVecGetArray(da2, X2, &x2));
1303   PetscCall(DMDAVecGetArray(da2, Xdot2, &xdot2));
1304 
1305   PetscCall(THIJacobianLocal_Momentum(&info3, x3, x2, B11, B12, thi));
1306 
1307   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1308   PetscCall(MatAssemblyBegin(B, MAT_FLUSH_ASSEMBLY));
1309   PetscCall(MatAssemblyEnd(B, MAT_FLUSH_ASSEMBLY));
1310 
1311   PetscCall(THIJacobianLocal_2D(&info3, x3, x2, xdot2, a, B22, B21, thi));
1312 
1313   PetscCall(DMDAVecRestoreArray(da3, X3, &x3));
1314   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
1315   PetscCall(DMDAVecRestoreArray(da2, Xdot2, &xdot2));
1316 
1317   PetscCall(MatRestoreLocalSubMatrix(B, isloc[0], isloc[0], &B11));
1318   PetscCall(MatRestoreLocalSubMatrix(B, isloc[0], isloc[1], &B12));
1319   PetscCall(MatRestoreLocalSubMatrix(B, isloc[1], isloc[0], &B21));
1320   PetscCall(MatRestoreLocalSubMatrix(B, isloc[1], isloc[1], &B22));
1321   PetscCall(ISDestroy(&isloc[0]));
1322   PetscCall(ISDestroy(&isloc[1]));
1323   PetscCall(PetscFree(isloc));
1324 
1325   PetscCall(DMCompositeRestoreLocalVectors(pack, &X3, &X2));
1326   PetscCall(DMCompositeRestoreLocalVectors(pack, 0, &Xdot2));
1327 
1328   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1329   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1330   if (A != B) {
1331     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1332     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1333   }
1334   if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1339  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1340  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1341  */
1342 static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM pack, Vec X, const char filename[], const char filename2[])
1343 {
1344   const PetscInt dof = NODE_SIZE, dof2 = PRMNODE_SIZE;
1345   Units          units = thi->units;
1346   MPI_Comm       comm;
1347   PetscViewer    viewer3, viewer2;
1348   PetscMPIInt    rank, size, tag, nn, nmax, nn2, nmax2;
1349   PetscInt       mx, my, mz, r, range[6];
1350   PetscScalar   *x, *x2;
1351   DM             da3, da2;
1352   Vec            X3, X2;
1353 
1354   PetscFunctionBeginUser;
1355   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1356   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1357   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
1358   PetscCall(DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1359   PetscCallMPI(MPI_Comm_size(comm, &size));
1360   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1361   PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer3));
1362   PetscCall(PetscViewerASCIIOpen(comm, filename2, &viewer2));
1363   PetscCall(PetscViewerASCIIPrintf(viewer3, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1364   PetscCall(PetscViewerASCIIPrintf(viewer2, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1365   PetscCall(PetscViewerASCIIPrintf(viewer3, "  <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1));
1366   PetscCall(PetscViewerASCIIPrintf(viewer2, "  <StructuredGrid WholeExtent=\"%d %d %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, 0, 0, my - 1, 0, mx - 1));
1367 
1368   PetscCall(DMDAGetCorners(da3, range, range + 1, range + 2, range + 3, range + 4, range + 5));
1369   PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn));
1370   PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm));
1371   PetscCall(PetscMPIIntCast(range[4] * range[5] * dof2, &nn2));
1372   PetscCallMPI(MPI_Reduce(&nn2, &nmax2, 1, MPI_INT, MPI_MAX, 0, comm));
1373   tag = ((PetscObject)viewer3)->tag;
1374   PetscCall(VecGetArrayRead(X3, (const PetscScalar **)&x));
1375   PetscCall(VecGetArrayRead(X2, (const PetscScalar **)&x2));
1376   if (rank == 0) {
1377     PetscScalar *array, *array2;
1378     PetscCall(PetscMalloc2(nmax, &array, nmax2, &array2));
1379     for (r = 0; r < size; r++) {
1380       PetscInt i, j, k, f, xs, xm, ys, ym, zs, zm;
1381       Node    *y3;
1382       PetscScalar(*y2)[PRMNODE_SIZE];
1383       MPI_Status status;
1384 
1385       if (r) PetscCallMPI(MPI_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE));
1386       zs = range[0];
1387       ys = range[1];
1388       xs = range[2];
1389       zm = range[3];
1390       ym = range[4];
1391       xm = range[5];
1392       PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1393       if (r) {
1394         PetscCallMPI(MPI_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status));
1395         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn));
1396         PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "corrupt da3 send");
1397         y3 = (Node *)array;
1398         PetscCallMPI(MPI_Recv(array2, nmax2, MPIU_SCALAR, r, tag, comm, &status));
1399         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn2));
1400         PetscCheck(nn2 == xm * ym * dof2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "corrupt da2 send");
1401         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1402       } else {
1403         y3 = (Node *)x;
1404         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1405       }
1406       PetscCall(PetscViewerASCIIPrintf(viewer3, "    <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1));
1407       PetscCall(PetscViewerASCIIPrintf(viewer2, "    <Piece Extent=\"%d %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", 0, 0, ys, ys + ym - 1, xs, xs + xm - 1));
1408 
1409       PetscCall(PetscViewerASCIIPrintf(viewer3, "      <Points>\n"));
1410       PetscCall(PetscViewerASCIIPrintf(viewer2, "      <Points>\n"));
1411       PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1412       PetscCall(PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1413       for (i = xs; i < xs + xm; i++) {
1414         for (j = ys; j < ys + ym; j++) {
1415           PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, b = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, b)]), h = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, h)]);
1416           for (k = zs; k < zs + zm; k++) {
1417             PetscReal zz = b + h * k / (mz - 1.);
1418             PetscCall(PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)xx, (double)yy, (double)zz));
1419           }
1420           PetscCall(PetscViewerASCIIPrintf(viewer2, "%f %f %f\n", (double)xx, (double)yy, (double)0.0));
1421         }
1422       }
1423       PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1424       PetscCall(PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n"));
1425       PetscCall(PetscViewerASCIIPrintf(viewer3, "      </Points>\n"));
1426       PetscCall(PetscViewerASCIIPrintf(viewer2, "      </Points>\n"));
1427 
1428       { /* Velocity and rank (3D) */
1429         PetscCall(PetscViewerASCIIPrintf(viewer3, "      <PointData>\n"));
1430         PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1431         for (i = 0; i < nn / dof; i++) PetscCall(PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)(PetscRealPart(y3[i].u) * units->year / units->meter), (double)(PetscRealPart(y3[i].v) * units->year / units->meter), 0.0));
1432         PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1433 
1434         PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1435         for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer3, "%" PetscInt_FMT "\n", r));
1436         PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1437         PetscCall(PetscViewerASCIIPrintf(viewer3, "      </PointData>\n"));
1438       }
1439 
1440       { /* 2D */
1441         PetscCall(PetscViewerASCIIPrintf(viewer2, "      <PointData>\n"));
1442         for (f = 0; f < PRMNODE_SIZE; f++) {
1443           const char *fieldname;
1444           PetscCall(DMDAGetFieldName(da2, f, &fieldname));
1445           PetscCall(PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n", fieldname));
1446           for (i = 0; i < nn2 / PRMNODE_SIZE; i++) PetscCall(PetscViewerASCIIPrintf(viewer2, "%g\n", (double)y2[i][f]));
1447           PetscCall(PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n"));
1448         }
1449         PetscCall(PetscViewerASCIIPrintf(viewer2, "      </PointData>\n"));
1450       }
1451 
1452       PetscCall(PetscViewerASCIIPrintf(viewer3, "    </Piece>\n"));
1453       PetscCall(PetscViewerASCIIPrintf(viewer2, "    </Piece>\n"));
1454     }
1455     PetscCall(PetscFree2(array, array2));
1456   } else {
1457     PetscCallMPI(MPI_Send(range, 6, MPIU_INT, 0, tag, comm));
1458     PetscCallMPI(MPI_Send(x, nn, MPIU_SCALAR, 0, tag, comm));
1459     PetscCallMPI(MPI_Send(x2, nn2, MPIU_SCALAR, 0, tag, comm));
1460   }
1461   PetscCall(VecRestoreArrayRead(X3, (const PetscScalar **)&x));
1462   PetscCall(VecRestoreArrayRead(X2, (const PetscScalar **)&x2));
1463   PetscCall(PetscViewerASCIIPrintf(viewer3, "  </StructuredGrid>\n"));
1464   PetscCall(PetscViewerASCIIPrintf(viewer2, "  </StructuredGrid>\n"));
1465 
1466   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1467   PetscCall(PetscViewerASCIIPrintf(viewer3, "</VTKFile>\n"));
1468   PetscCall(PetscViewerASCIIPrintf(viewer2, "</VTKFile>\n"));
1469   PetscCall(PetscViewerDestroy(&viewer3));
1470   PetscCall(PetscViewerDestroy(&viewer2));
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 static PetscErrorCode THITSMonitor(TS ts, PetscInt step, PetscReal t, Vec X, void *ctx)
1475 {
1476   THI  thi = (THI)ctx;
1477   DM   pack;
1478   char filename3[PETSC_MAX_PATH_LEN], filename2[PETSC_MAX_PATH_LEN];
1479 
1480   PetscFunctionBeginUser;
1481   if (step < 0) PetscFunctionReturn(0); /* negative one is used to indicate an interpolated solution */
1482   PetscCall(PetscPrintf(PetscObjectComm((PetscObject)ts), "%3" PetscInt_FMT ": t=%g\n", step, (double)t));
1483   if (thi->monitor_interval && step % thi->monitor_interval) PetscFunctionReturn(0);
1484   PetscCall(TSGetDM(ts, &pack));
1485   PetscCall(PetscSNPrintf(filename3, sizeof(filename3), "%s-3d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step));
1486   PetscCall(PetscSNPrintf(filename2, sizeof(filename2), "%s-2d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step));
1487   PetscCall(THIDAVecView_VTK_XML(thi, pack, X, filename3, filename2));
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 static PetscErrorCode THICreateDM3d(THI thi, DM *dm3d)
1492 {
1493   MPI_Comm comm;
1494   PetscInt M = 3, N = 3, P = 2;
1495   DM       da;
1496 
1497   PetscFunctionBeginUser;
1498   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1499   PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1500   {
1501     PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL));
1502     N = M;
1503     PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL));
1504     PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL));
1505   }
1506   PetscOptionsEnd();
1507   PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da));
1508   PetscCall(DMSetFromOptions(da));
1509   PetscCall(DMSetUp(da));
1510   PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
1511   PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
1512   *dm3d = da;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 int main(int argc, char *argv[])
1517 {
1518   MPI_Comm  comm;
1519   DM        pack, da3, da2;
1520   TS        ts;
1521   THI       thi;
1522   Vec       X;
1523   Mat       B;
1524   PetscInt  i, steps;
1525   PetscReal ftime;
1526 
1527   PetscFunctionBeginUser;
1528   PetscCall(PetscInitialize(&argc, &argv, 0, help));
1529   comm = PETSC_COMM_WORLD;
1530 
1531   PetscCall(THICreate(comm, &thi));
1532   PetscCall(THICreateDM3d(thi, &da3));
1533   {
1534     PetscInt        Mx, My, mx, my, s;
1535     DMDAStencilType st;
1536     PetscCall(DMDAGetInfo(da3, 0, 0, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st));
1537     PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2));
1538     PetscCall(DMSetUp(da2));
1539   }
1540 
1541   PetscCall(PetscObjectSetName((PetscObject)da3, "3D_Velocity"));
1542   PetscCall(DMSetOptionsPrefix(da3, "f3d_"));
1543   PetscCall(DMDASetFieldName(da3, 0, "u"));
1544   PetscCall(DMDASetFieldName(da3, 1, "v"));
1545   PetscCall(PetscObjectSetName((PetscObject)da2, "2D_Fields"));
1546   PetscCall(DMSetOptionsPrefix(da2, "f2d_"));
1547   PetscCall(DMDASetFieldName(da2, 0, "b"));
1548   PetscCall(DMDASetFieldName(da2, 1, "h"));
1549   PetscCall(DMDASetFieldName(da2, 2, "beta2"));
1550   PetscCall(DMCompositeCreate(comm, &pack));
1551   PetscCall(DMCompositeAddDM(pack, da3));
1552   PetscCall(DMCompositeAddDM(pack, da2));
1553   PetscCall(DMDestroy(&da3));
1554   PetscCall(DMDestroy(&da2));
1555   PetscCall(DMSetUp(pack));
1556   PetscCall(DMCreateMatrix(pack, &B));
1557   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1558   PetscCall(MatSetOptionsPrefix(B, "thi_"));
1559 
1560   for (i = 0; i < thi->nlevels; i++) {
1561     PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
1562     PetscInt  Mx, My, Mz;
1563     PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1564     PetscCall(DMDAGetInfo(da3, 0, &Mz, &My, &Mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1565     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n", i, Lx, Ly, Lz, Mx, My, Mz, Mx * My * Mz, Lx / Mx, Ly / My, 1000. / (Mz - 1)));
1566   }
1567 
1568   PetscCall(DMCreateGlobalVector(pack, &X));
1569   PetscCall(THIInitial(thi, pack, X));
1570 
1571   PetscCall(TSCreate(comm, &ts));
1572   PetscCall(TSSetDM(ts, pack));
1573   PetscCall(TSSetProblemType(ts, TS_NONLINEAR));
1574   PetscCall(TSMonitorSet(ts, THITSMonitor, thi, NULL));
1575   PetscCall(TSSetType(ts, TSTHETA));
1576   PetscCall(TSSetIFunction(ts, NULL, THIFunction, thi));
1577   PetscCall(TSSetIJacobian(ts, B, B, THIJacobian, thi));
1578   PetscCall(TSSetMaxTime(ts, 10.0));
1579   PetscCall(TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER));
1580   PetscCall(TSSetSolution(ts, X));
1581   PetscCall(TSSetTimeStep(ts, 1e-3));
1582   PetscCall(TSSetFromOptions(ts));
1583 
1584   PetscCall(TSSolve(ts, X));
1585   PetscCall(TSGetSolveTime(ts, &ftime));
1586   PetscCall(TSGetStepNumber(ts, &steps));
1587   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Steps %" PetscInt_FMT "  final time %g\n", steps, (double)ftime));
1588 
1589   if (0) PetscCall(THISolveStatistics(thi, ts, 0, "Full"));
1590 
1591   {
1592     PetscBool flg;
1593     char      filename[PETSC_MAX_PATH_LEN] = "";
1594     PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg));
1595     if (flg) PetscCall(THIDAVecView_VTK_XML(thi, pack, X, filename, NULL));
1596   }
1597 
1598   PetscCall(VecDestroy(&X));
1599   PetscCall(MatDestroy(&B));
1600   PetscCall(DMDestroy(&pack));
1601   PetscCall(TSDestroy(&ts));
1602   PetscCall(THIDestroy(&thi));
1603   PetscCall(PetscFinalize());
1604   return 0;
1605 }
1606