xref: /petsc/src/ts/tutorials/ex14.c (revision ccfb0f9f40a0131988d7995ed9679700dae2a75a)
1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2 \n\
3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4 using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5 to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
6 boundary conditions in the x- and y-directions.\n\
7 \n\
8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10 \n\
11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12 \n\n";
13 
14 /*
15 The equations for horizontal velocity (u,v) are
16 
17   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19 
20 where
21 
22   eta = B/2 (epsilon + gamma)^((p-2)/2)
23 
24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25 written in terms of the second invariant
26 
27   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28 
29 The surface boundary conditions are the natural conditions.  The basal boundary conditions
30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31 
32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33 
34 The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
35 map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
36 the Jacobian of the coordinate transformation from a reference element to the physical element.
37 
38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39 specially so that columns are never distributed, and are always contiguous in memory.
40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41 and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
42 use compatible domain decomposition relative to the 3D DMDAs.
43 
44 */
45 
46 #include <petscts.h>
47 #include <petscdm.h>
48 #include <petscdmda.h>
49 #include <petscdmcomposite.h>
50 #include <ctype.h> /* toupper() */
51 #include <petsc/private/petscimpl.h>
52 
53 #if defined __SSE2__
54   #include <emmintrin.h>
55 #endif
56 
57 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
58 #define USE_SSE2_KERNELS (!defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && defined __SSE2__)
59 
60 #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
61   #if defined __cplusplus /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
62     #define restrict
63   #else
64     #define restrict PETSC_RESTRICT
65   #endif
66 #endif
67 
68 static PetscClassId THI_CLASSID;
69 
70 typedef enum {
71   QUAD_GAUSS,
72   QUAD_LOBATTO
73 } QuadratureType;
74 static const char     *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
75 static const PetscReal HexQWeights[8]    = {1, 1, 1, 1, 1, 1, 1, 1};
76 static const PetscReal HexQNodes[]       = {-0.57735026918962573, 0.57735026918962573};
77 #define G 0.57735026918962573
78 #define H (0.5 * (1. + G))
79 #define L (0.5 * (1. - G))
80 #define M (-0.5)
81 #define P (0.5)
82 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
83 static const PetscReal HexQInterp_Lobatto[8][8] = {
84   {H, 0, 0, 0, L, 0, 0, 0},
85   {0, H, 0, 0, 0, L, 0, 0},
86   {0, 0, H, 0, 0, 0, L, 0},
87   {0, 0, 0, H, 0, 0, 0, L},
88   {L, 0, 0, 0, H, 0, 0, 0},
89   {0, L, 0, 0, 0, H, 0, 0},
90   {0, 0, L, 0, 0, 0, H, 0},
91   {0, 0, 0, L, 0, 0, 0, H}
92 };
93 static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
94   {{M * H, M *H, M}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0},     {M * L, M *L, P}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0}    },
95   {{M * H, 0, 0},    {P * H, M *H, M}, {0, P *H, 0},     {0, 0, 0},        {M * L, 0, 0},    {P * L, M *L, P}, {0, P *L, 0},     {0, 0, 0}       },
96   {{0, 0, 0},        {0, M *H, 0},     {P * H, P *H, M}, {M * H, 0, 0},    {0, 0, 0},        {0, M *L, 0},     {P * L, P *L, P}, {M * L, 0, 0}   },
97   {{0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, M}, {0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, P}},
98   {{M * L, M *L, M}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0},     {M * H, M *H, P}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0}    },
99   {{M * L, 0, 0},    {P * L, M *L, M}, {0, P *L, 0},     {0, 0, 0},        {M * H, 0, 0},    {P * H, M *H, P}, {0, P *H, 0},     {0, 0, 0}       },
100   {{0, 0, 0},        {0, M *L, 0},     {P * L, P *L, M}, {M * L, 0, 0},    {0, 0, 0},        {0, M *H, 0},     {P * H, P *H, P}, {M * H, 0, 0}   },
101   {{0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, M}, {0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, P}}
102 };
103 /* Stanndard Gauss */
104 static const PetscReal HexQInterp_Gauss[8][8] = {
105   {H * H * H, L * H * H, L * L * H, H * L * H, H * H * L, L * H * L, L * L * L, H * L * L},
106   {L * H * H, H * H * H, H * L * H, L * L * H, L * H * L, H * H * L, H * L * L, L * L * L},
107   {L * L * H, H * L * H, H * H * H, L * H * H, L * L * L, H * L * L, H * H * L, L * H * L},
108   {H * L * H, L * L * H, L * H * H, H * H * H, H * L * L, L * L * L, L * H * L, H * H * L},
109   {H * H * L, L * H * L, L * L * L, H * L * L, H * H * H, L * H * H, L * L * H, H * L * H},
110   {L * H * L, H * H * L, H * L * L, L * L * L, L * H * H, H * H * H, H * L * H, L * L * H},
111   {L * L * L, H * L * L, H * H * L, L * H * L, L * L * H, H * L * H, H * H * H, L * H * H},
112   {H * L * L, L * L * L, L * H * L, H * H * L, H * L * H, L * L * H, L * H * H, H * H * H}
113 };
114 static const PetscReal HexQDeriv_Gauss[8][8][3] = {
115   {{M * H * H, H * M * H, H * H * M}, {P * H * H, L * M * H, L * H * M}, {P * L * H, L * P * H, L * L * M}, {M * L * H, H * P * H, H * L * M}, {M * H * L, H * M * L, H * H * P}, {P * H * L, L * M * L, L * H * P}, {P * L * L, L * P * L, L * L * P}, {M * L * L, H * P * L, H * L * P}},
116   {{M * H * H, L * M * H, L * H * M}, {P * H * H, H * M * H, H * H * M}, {P * L * H, H * P * H, H * L * M}, {M * L * H, L * P * H, L * L * M}, {M * H * L, L * M * L, L * H * P}, {P * H * L, H * M * L, H * H * P}, {P * L * L, H * P * L, H * L * P}, {M * L * L, L * P * L, L * L * P}},
117   {{M * L * H, L * M * H, L * L * M}, {P * L * H, H * M * H, H * L * M}, {P * H * H, H * P * H, H * H * M}, {M * H * H, L * P * H, L * H * M}, {M * L * L, L * M * L, L * L * P}, {P * L * L, H * M * L, H * L * P}, {P * H * L, H * P * L, H * H * P}, {M * H * L, L * P * L, L * H * P}},
118   {{M * L * H, H * M * H, H * L * M}, {P * L * H, L * M * H, L * L * M}, {P * H * H, L * P * H, L * H * M}, {M * H * H, H * P * H, H * H * M}, {M * L * L, H * M * L, H * L * P}, {P * L * L, L * M * L, L * L * P}, {P * H * L, L * P * L, L * H * P}, {M * H * L, H * P * L, H * H * P}},
119   {{M * H * L, H * M * L, H * H * M}, {P * H * L, L * M * L, L * H * M}, {P * L * L, L * P * L, L * L * M}, {M * L * L, H * P * L, H * L * M}, {M * H * H, H * M * H, H * H * P}, {P * H * H, L * M * H, L * H * P}, {P * L * H, L * P * H, L * L * P}, {M * L * H, H * P * H, H * L * P}},
120   {{M * H * L, L * M * L, L * H * M}, {P * H * L, H * M * L, H * H * M}, {P * L * L, H * P * L, H * L * M}, {M * L * L, L * P * L, L * L * M}, {M * H * H, L * M * H, L * H * P}, {P * H * H, H * M * H, H * H * P}, {P * L * H, H * P * H, H * L * P}, {M * L * H, L * P * H, L * L * P}},
121   {{M * L * L, L * M * L, L * L * M}, {P * L * L, H * M * L, H * L * M}, {P * H * L, H * P * L, H * H * M}, {M * H * L, L * P * L, L * H * M}, {M * L * H, L * M * H, L * L * P}, {P * L * H, H * M * H, H * L * P}, {P * H * H, H * P * H, H * H * P}, {M * H * H, L * P * H, L * H * P}},
122   {{M * L * L, H * M * L, H * L * M}, {P * L * L, L * M * L, L * L * M}, {P * H * L, L * P * L, L * H * M}, {M * H * L, H * P * L, H * H * M}, {M * L * H, H * M * H, H * L * P}, {P * L * H, L * M * H, L * L * P}, {P * H * H, L * P * H, L * H * P}, {M * H * H, H * P * H, H * H * P}}
123 };
124 static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
125 /* Standard 2x2 Gauss quadrature for the bottom layer. */
126 static const PetscReal QuadQInterp[4][4] = {
127   {H * H, L *H, L *L, H *L},
128   {L * H, H *H, H *L, L *L},
129   {L * L, H *L, H *H, L *H},
130   {H * L, L *L, L *H, H *H}
131 };
132 static const PetscReal QuadQDeriv[4][4][2] = {
133   {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}},
134   {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}},
135   {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}},
136   {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}}
137 };
138 #undef G
139 #undef H
140 #undef L
141 #undef M
142 #undef P
143 
144 #define HexExtract(x, i, j, k, n) \
145   do { \
146     (n)[0] = (x)[i][j][k]; \
147     (n)[1] = (x)[i + 1][j][k]; \
148     (n)[2] = (x)[i + 1][j + 1][k]; \
149     (n)[3] = (x)[i][j + 1][k]; \
150     (n)[4] = (x)[i][j][k + 1]; \
151     (n)[5] = (x)[i + 1][j][k + 1]; \
152     (n)[6] = (x)[i + 1][j + 1][k + 1]; \
153     (n)[7] = (x)[i][j + 1][k + 1]; \
154   } while (0)
155 
156 #define HexExtractRef(x, i, j, k, n) \
157   do { \
158     (n)[0] = &(x)[i][j][k]; \
159     (n)[1] = &(x)[i + 1][j][k]; \
160     (n)[2] = &(x)[i + 1][j + 1][k]; \
161     (n)[3] = &(x)[i][j + 1][k]; \
162     (n)[4] = &(x)[i][j][k + 1]; \
163     (n)[5] = &(x)[i + 1][j][k + 1]; \
164     (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
165     (n)[7] = &(x)[i][j + 1][k + 1]; \
166   } while (0)
167 
168 #define QuadExtract(x, i, j, n) \
169   do { \
170     (n)[0] = (x)[i][j]; \
171     (n)[1] = (x)[i + 1][j]; \
172     (n)[2] = (x)[i + 1][j + 1]; \
173     (n)[3] = (x)[i][j + 1]; \
174   } while (0)
175 
176 static PetscScalar Sqr(PetscScalar a)
177 {
178   return a * a;
179 }
180 
181 static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[])
182 {
183   PetscInt i;
184   dz[0] = dz[1] = dz[2] = 0;
185   for (i = 0; i < 8; i++) {
186     dz[0] += dphi[i][0] * zn[i];
187     dz[1] += dphi[i][1] * zn[i];
188     dz[2] += dphi[i][2] * zn[i];
189   }
190 }
191 
192 static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[restrict], PetscReal phi[restrict], PetscReal dphi[restrict][3], PetscReal *restrict jw)
193 {
194   const PetscReal jac[3][3] =
195     {
196       {hx / 2, 0,      0    },
197       {0,      hy / 2, 0    },
198       {dz[0],  dz[1],  dz[2]}
199   },
200                   ijac[3][3] = {{1 / jac[0][0], 0, 0}, {0, 1 / jac[1][1], 0}, {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}}, jdet = jac[0][0] * jac[1][1] * jac[2][2];
201   PetscInt i;
202 
203   for (i = 0; i < 8; i++) {
204     const PetscReal *dphir = HexQDeriv[q][i];
205     phi[i]                 = HexQInterp[q][i];
206     dphi[i][0]             = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
207     dphi[i][1]             = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
208     dphi[i][2]             = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
209   }
210   *jw = 1.0 * jdet;
211 }
212 
213 typedef struct _p_THI   *THI;
214 typedef struct _n_Units *Units;
215 
216 typedef struct {
217   PetscScalar u, v;
218 } Node;
219 
220 typedef struct {
221   PetscScalar b;     /* bed */
222   PetscScalar h;     /* thickness */
223   PetscScalar beta2; /* friction */
224 } PrmNode;
225 
226 #define FieldSize(ntype)             ((PetscInt)(sizeof(ntype) / sizeof(PetscScalar)))
227 #define FieldOffset(ntype, member)   ((PetscInt)(offsetof(ntype, member) / sizeof(PetscScalar)))
228 #define FieldIndex(ntype, i, member) ((PetscInt)((i) * FieldSize(ntype) + FieldOffset(ntype, member)))
229 #define NODE_SIZE                    FieldSize(Node)
230 #define PRMNODE_SIZE                 FieldSize(PrmNode)
231 
232 typedef struct {
233   PetscReal min, max, cmin, cmax;
234 } PRange;
235 
236 struct _p_THI {
237   PETSCHEADER(int);
238   void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
239   PetscInt  nlevels;
240   PetscInt  zlevels;
241   PetscReal Lx, Ly, Lz; /* Model domain */
242   PetscReal alpha;      /* Bed angle */
243   Units     units;
244   PetscReal dirichlet_scale;
245   PetscReal ssa_friction_scale;
246   PetscReal inertia;
247   PRange    eta;
248   PRange    beta2;
249   struct {
250     PetscReal Bd2, eps, exponent, glen_n;
251   } viscosity;
252   struct {
253     PetscReal irefgam, eps2, exponent;
254   } friction;
255   struct {
256     PetscReal rate, exponent, refvel;
257   } erosion;
258   PetscReal rhog;
259   PetscBool no_slip;
260   PetscBool verbose;
261   char     *mattype;
262   char     *monitor_basename;
263   PetscInt  monitor_interval;
264 };
265 
266 struct _n_Units {
267   /* fundamental */
268   PetscReal meter;
269   PetscReal kilogram;
270   PetscReal second;
271   /* derived */
272   PetscReal Pascal;
273   PetscReal year;
274 };
275 
276 static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[])
277 {
278   const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1,       pn[1].b + pn[1].h * (PetscScalar)k / zm1,       pn[2].b + pn[2].h * (PetscScalar)k / zm1,       pn[3].b + pn[3].h * (PetscScalar)k / zm1,
279                                             pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
280   PetscInt          i;
281   for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
282 }
283 
284 /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
285 static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2], PetscReal hx, PetscReal hy, const PrmNode pn[4], PrmNode dp[4][2])
286 {
287   PetscInt q, i, f;
288   const PetscScalar (*restrict pg)[PRMNODE_SIZE] = (const PetscScalar (*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
289   PetscScalar (*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar (*)[2][PRMNODE_SIZE])dp;
290 
291   PetscFunctionBeginUser;
292   PetscCall(PetscArrayzero(dpg, 4));
293   for (q = 0; q < 4; q++) {
294     for (i = 0; i < 4; i++) {
295       for (f = 0; f < PRMNODE_SIZE; f++) {
296         dpg[q][0][f] += dphi[q][i][0] / hx * pg[i][f];
297         dpg[q][1][f] += dphi[q][i][1] / hy * pg[i][f];
298       }
299     }
300   }
301   PetscFunctionReturn(PETSC_SUCCESS);
302 }
303 
304 static inline PetscReal StaggeredMidpoint2D(PetscScalar a, PetscScalar b, PetscScalar c, PetscScalar d)
305 {
306   return 0.5 * PetscRealPart(0.75 * a + 0.75 * b + 0.25 * c + 0.25 * d);
307 }
308 static inline PetscReal UpwindFlux1D(PetscReal u, PetscReal hL, PetscReal hR)
309 {
310   return (u > 0) ? hL * u : hR * u;
311 }
312 
313 #define UpwindFluxXW(x3, x2, h, i, j, k, dj) \
314   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i - 1][j][k].u, x3[i - 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i - 1][j].h + 0.25 * x2[i - 1][j + dj].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h))
315 #define UpwindFluxXE(x3, x2, h, i, j, k, dj) \
316   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i + 1][j][k].u, x3[i + 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h), PetscRealPart(0.75 * x2[i + 1][j].h + 0.25 * x2[i + 1][j + dj].h))
317 #define UpwindFluxYS(x3, x2, h, i, j, k, di) \
318   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j - 1][k].v, x3[i + di][j - 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j - 1].h + 0.25 * x2[i + di][j - 1].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h))
319 #define UpwindFluxYN(x3, x2, h, i, j, k, di) \
320   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j + 1][k].v, x3[i + di][j + 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h), PetscRealPart(0.75 * x2[i][j + 1].h + 0.25 * x2[i + di][j + 1].h))
321 
322 static void PrmNodeGetFaceMeasure(const PrmNode **p, PetscInt i, PetscInt j, PetscScalar h[])
323 {
324   /* West */
325   h[0] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j - 1].h, p[i][j - 1].h);
326   h[1] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j + 1].h, p[i][j + 1].h);
327   /* East */
328   h[2] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j + 1].h, p[i][j + 1].h);
329   h[3] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j - 1].h, p[i][j - 1].h);
330   /* South */
331   h[4] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i + 1][j - 1].h, p[i + 1][j].h);
332   h[5] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i - 1][j - 1].h, p[i - 1][j].h);
333   /* North */
334   h[6] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i - 1][j + 1].h, p[i - 1][j].h);
335   h[7] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i + 1][j + 1].h, p[i + 1][j].h);
336 }
337 
338 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
339 static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p)
340 {
341   Units     units = thi->units;
342   PetscReal s     = -x * PetscSinReal(thi->alpha);
343   p->b            = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
344   p->h            = s - p->b;
345   p->beta2        = -1e-10; /* This value is not used, but it should not be huge because that would change the finite difference step size  */
346 }
347 
348 static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p)
349 {
350   Units     units = thi->units;
351   PetscReal s     = -x * PetscSinReal(thi->alpha);
352   p->b            = s - 1000 * units->meter;
353   p->h            = s - p->b;
354   /* tau_b = beta2 v   is a stress (Pa).
355    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
356   p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
357 }
358 
359 /* These are just toys */
360 
361 /* From Fred Herman */
362 static void THIInitialize_HOM_F(THI thi, PetscReal x, PetscReal y, PrmNode *p)
363 {
364   Units     units = thi->units;
365   PetscReal s     = -x * PetscSinReal(thi->alpha);
366   p->b            = s - 1000 * units->meter + 100 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx); /* * sin(y*2*PETSC_PI/thi->Ly); */
367   p->h            = s - p->b;
368   p->h            = (1 - (atan((x - thi->Lx / 2) / 1.) + PETSC_PI / 2.) / PETSC_PI) * 500 * units->meter + 1 * units->meter;
369   s               = PetscRealPart(p->b + p->h);
370   p->beta2        = -1e-10;
371   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
372 }
373 
374 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
375 static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
376 {
377   Units     units = thi->units;
378   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
379   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
380   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
381   p->h     = s - p->b;
382   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
383 }
384 
385 /* Like Z, but with 200 meter cliffs */
386 static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
387 {
388   Units     units = thi->units;
389   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
390   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
391   p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
392   if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
393   p->h     = s - p->b;
394   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
395 }
396 
397 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
398 static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p)
399 {
400   Units     units = thi->units;
401   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
402   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
403   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
404   p->h     = s - p->b;
405   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
406 }
407 
408 static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2)
409 {
410   if (thi->friction.irefgam == 0) {
411     Units units           = thi->units;
412     thi->friction.irefgam = 1. / (0.5 * PetscSqr(100 * units->meter / units->year));
413     thi->friction.eps2    = 0.5 * PetscSqr(1.e-4 / thi->friction.irefgam);
414   }
415   if (thi->friction.exponent == 0) {
416     *beta2  = rbeta2;
417     *dbeta2 = 0;
418   } else {
419     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
420     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
421   }
422 }
423 
424 static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta)
425 {
426   PetscReal Bd2, eps, exponent;
427   if (thi->viscosity.Bd2 == 0) {
428     Units           units   = thi->units;
429     const PetscReal n       = thi->viscosity.glen_n,                                  /* Glen exponent */
430       p                     = 1. + 1. / n,                                            /* for Stokes */
431       A                     = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
432       B                     = PetscPowReal(A, -1. / n);                               /* hardness parameter */
433     thi->viscosity.Bd2      = B / 2;
434     thi->viscosity.exponent = (p - 2) / 2;
435     thi->viscosity.eps      = 0.5 * PetscSqr(1e-5 / units->year);
436   }
437   Bd2      = thi->viscosity.Bd2;
438   exponent = thi->viscosity.exponent;
439   eps      = thi->viscosity.eps;
440   *eta     = Bd2 * PetscPowReal(eps + gam, exponent);
441   *deta    = exponent * (*eta) / (eps + gam);
442 }
443 
444 static void THIErosion(THI thi, const Node *vel, PetscScalar *erate, Node *derate)
445 {
446   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel), rate = -thi->erosion.rate * PetscPowScalar(magref2, 0.5 * thi->erosion.exponent);
447   if (erate) *erate = rate;
448   if (derate) {
449     if (thi->erosion.exponent == 1) {
450       derate->u = 0;
451       derate->v = 0;
452     } else {
453       derate->u = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
454       derate->v = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
455     }
456   }
457 }
458 
459 static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x)
460 {
461   if (x < *min) *min = x;
462   if (x > *max) *max = x;
463 }
464 
465 static void PRangeClear(PRange *p)
466 {
467   p->cmin = p->min = 1e100;
468   p->cmax = p->max = -1e100;
469 }
470 
471 static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max)
472 {
473   PetscFunctionBeginUser;
474   p->cmin = min;
475   p->cmax = max;
476   if (min < p->min) p->min = min;
477   if (max > p->max) p->max = max;
478   PetscFunctionReturn(PETSC_SUCCESS);
479 }
480 
481 static PetscErrorCode THIDestroy(THI *thi)
482 {
483   PetscFunctionBeginUser;
484   if (--((PetscObject)*thi)->refct > 0) PetscFunctionReturn(PETSC_SUCCESS);
485   PetscCall(PetscFree((*thi)->units));
486   PetscCall(PetscFree((*thi)->mattype));
487   PetscCall(PetscFree((*thi)->monitor_basename));
488   PetscCall(PetscHeaderDestroy(thi));
489   PetscFunctionReturn(PETSC_SUCCESS);
490 }
491 
492 static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi)
493 {
494   static PetscBool registered = PETSC_FALSE;
495   THI              thi;
496   Units            units;
497   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
498 
499   PetscFunctionBeginUser;
500   *inthi = 0;
501   if (!registered) {
502     PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID));
503     registered = PETSC_TRUE;
504   }
505   PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "THI", comm, THIDestroy, 0));
506 
507   PetscCall(PetscNew(&thi->units));
508 
509   units           = thi->units;
510   units->meter    = 1e-2;
511   units->second   = 1e-7;
512   units->kilogram = 1e-12;
513 
514   PetscOptionsBegin(comm, NULL, "Scaled units options", "");
515   {
516     PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL));
517     PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL));
518     PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL));
519   }
520   PetscOptionsEnd();
521   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
522   units->year   = 31556926. * units->second, /* seconds per year */
523 
524     thi->Lx            = 10.e3;
525   thi->Ly              = 10.e3;
526   thi->Lz              = 1000;
527   thi->nlevels         = 1;
528   thi->dirichlet_scale = 1;
529   thi->verbose         = PETSC_FALSE;
530 
531   thi->viscosity.glen_n = 3.;
532   thi->erosion.rate     = 1e-3; /* m/a */
533   thi->erosion.exponent = 1.;
534   thi->erosion.refvel   = 1.; /* m/a */
535 
536   PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
537   {
538     QuadratureType quad       = QUAD_GAUSS;
539     char           homexp[]   = "A";
540     char           mtype[256] = MATSBAIJ;
541     PetscReal      L, m = 1.0;
542     PetscBool      flg;
543     L = thi->Lx;
544     PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg));
545     if (flg) thi->Lx = thi->Ly = L;
546     PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL));
547     PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL));
548     PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL));
549     PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL));
550     switch (homexp[0] = toupper(homexp[0])) {
551     case 'A':
552       thi->initialize = THIInitialize_HOM_A;
553       thi->no_slip    = PETSC_TRUE;
554       thi->alpha      = 0.5;
555       break;
556     case 'C':
557       thi->initialize = THIInitialize_HOM_C;
558       thi->no_slip    = PETSC_FALSE;
559       thi->alpha      = 0.1;
560       break;
561     case 'F':
562       thi->initialize = THIInitialize_HOM_F;
563       thi->no_slip    = PETSC_FALSE;
564       thi->alpha      = 0.5;
565       break;
566     case 'X':
567       thi->initialize = THIInitialize_HOM_X;
568       thi->no_slip    = PETSC_FALSE;
569       thi->alpha      = 0.3;
570       break;
571     case 'Y':
572       thi->initialize = THIInitialize_HOM_Y;
573       thi->no_slip    = PETSC_FALSE;
574       thi->alpha      = 0.5;
575       break;
576     case 'Z':
577       thi->initialize = THIInitialize_HOM_Z;
578       thi->no_slip    = PETSC_FALSE;
579       thi->alpha      = 0.5;
580       break;
581     default:
582       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
583     }
584     PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL));
585     switch (quad) {
586     case QUAD_GAUSS:
587       HexQInterp = HexQInterp_Gauss;
588       HexQDeriv  = HexQDeriv_Gauss;
589       break;
590     case QUAD_LOBATTO:
591       HexQInterp = HexQInterp_Lobatto;
592       HexQDeriv  = HexQDeriv_Lobatto;
593       break;
594     }
595     PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL));
596     PetscCall(PetscOptionsReal("-thi_viscosity_glen_n", "Exponent in Glen flow law, 1=linear, infty=ideal plastic", NULL, thi->viscosity.glen_n, &thi->viscosity.glen_n, NULL));
597     PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL));
598     thi->friction.exponent = (m - 1) / 2;
599     PetscCall(PetscOptionsReal("-thi_erosion_rate", "Rate of erosion relative to sliding velocity at reference velocity (m/a)", NULL, thi->erosion.rate, &thi->erosion.rate, NULL));
600     PetscCall(PetscOptionsReal("-thi_erosion_exponent", "Power of sliding velocity appearing in erosion relation", NULL, thi->erosion.exponent, &thi->erosion.exponent, NULL));
601     PetscCall(PetscOptionsReal("-thi_erosion_refvel", "Reference sliding velocity for erosion (m/a)", NULL, thi->erosion.refvel, &thi->erosion.refvel, NULL));
602     thi->erosion.rate *= units->meter / units->year;
603     thi->erosion.refvel *= units->meter / units->year;
604     PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL));
605     PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL));
606     PetscCall(PetscOptionsReal("-thi_inertia", "Coefficient of acceleration term in velocity system, physical is almost zero", NULL, thi->inertia, &thi->inertia, NULL));
607     PetscCall(PetscOptionsInt("-thi_nlevels", "Number of levels of refinement", "", thi->nlevels, &thi->nlevels, NULL));
608     PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL));
609     PetscCall(PetscStrallocpy(mtype, &thi->mattype));
610     PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL));
611     PetscCall(PetscOptionsString("-thi_monitor", "Basename to write state files to", NULL, monitor_basename, monitor_basename, sizeof(monitor_basename), &flg));
612     if (flg) {
613       PetscCall(PetscStrallocpy(monitor_basename, &thi->monitor_basename));
614       thi->monitor_interval = 1;
615       PetscCall(PetscOptionsInt("-thi_monitor_interval", "Frequency at which to write state files", NULL, thi->monitor_interval, &thi->monitor_interval, NULL));
616     }
617   }
618   PetscOptionsEnd();
619 
620   /* dimensionalize */
621   thi->Lx *= units->meter;
622   thi->Ly *= units->meter;
623   thi->Lz *= units->meter;
624   thi->alpha *= PETSC_PI / 180;
625 
626   PRangeClear(&thi->eta);
627   PRangeClear(&thi->beta2);
628 
629   {
630     PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowRealInt(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
631               driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
632     THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
633     thi->rhog = rho * grav;
634     if (thi->verbose) {
635       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal));
636       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving));
637       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu, 2 * eta * gradu / driving)));
638       THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
639       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu, 2 * eta * 1e-3 * gradu / driving)));
640     }
641   }
642 
643   *inthi = thi;
644   PetscFunctionReturn(PETSC_SUCCESS);
645 }
646 
647 /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
648  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
649  * the horizontal. */
650 static PetscErrorCode THIFixGhosts(THI thi, DM da3, DM da2, Vec X3, Vec X2)
651 {
652   DMDALocalInfo info;
653   PrmNode     **x2;
654   PetscInt      i, j;
655 
656   PetscFunctionBeginUser;
657   PetscCall(DMDAGetLocalInfo(da3, &info));
658   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
659   PetscCall(DMDAVecGetArray(da2, X2, &x2));
660   for (i = info.gzs; i < info.gzs + info.gzm; i++) {
661     if (i > -1 && i < info.mz) continue;
662     for (j = info.gys; j < info.gys + info.gym; j++) x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i < 0 ? 1.0 : -1.0);
663   }
664   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
665   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
666   PetscFunctionReturn(PETSC_SUCCESS);
667 }
668 
669 static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, PrmNode **p)
670 {
671   PetscInt i, j, xs, xm, ys, ym, mx, my;
672 
673   PetscFunctionBeginUser;
674   PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0));
675   PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
676   for (i = xs; i < xs + xm; i++) {
677     for (j = ys; j < ys + ym; j++) {
678       PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
679       thi->initialize(thi, xx, yy, &p[i][j]);
680     }
681   }
682   PetscFunctionReturn(PETSC_SUCCESS);
683 }
684 
685 static PetscErrorCode THIInitial(THI thi, DM pack, Vec X)
686 {
687   DM        da3, da2;
688   PetscInt  i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
689   PetscReal hx, hy;
690   PrmNode **prm;
691   Node   ***x;
692   Vec       X3g, X2g, X2;
693 
694   PetscFunctionBeginUser;
695   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
696   PetscCall(DMCompositeGetAccess(pack, X, &X3g, &X2g));
697   PetscCall(DMGetLocalVector(da2, &X2));
698 
699   PetscCall(DMDAGetInfo(da3, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
700   PetscCall(DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm));
701   PetscCall(DMDAVecGetArray(da3, X3g, &x));
702   PetscCall(DMDAVecGetArray(da2, X2, &prm));
703 
704   PetscCall(THIInitializePrm(thi, da2, prm));
705 
706   hx = thi->Lx / mx;
707   hy = thi->Ly / my;
708   for (i = xs; i < xs + xm; i++) {
709     for (j = ys; j < ys + ym; j++) {
710       for (k = zs; k < zs + zm; k++) {
711         const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
712         x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
713         x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
714       }
715     }
716   }
717 
718   PetscCall(DMDAVecRestoreArray(da3, X3g, &x));
719   PetscCall(DMDAVecRestoreArray(da2, X2, &prm));
720 
721   PetscCall(DMLocalToGlobalBegin(da2, X2, INSERT_VALUES, X2g));
722   PetscCall(DMLocalToGlobalEnd(da2, X2, INSERT_VALUES, X2g));
723   PetscCall(DMRestoreLocalVector(da2, &X2));
724 
725   PetscCall(DMCompositeRestoreAccess(pack, X, &X3g, &X2g));
726   PetscFunctionReturn(PETSC_SUCCESS);
727 }
728 
729 static void PointwiseNonlinearity(THI thi, const Node n[restrict 8], const PetscReal phi[restrict 3], PetscReal dphi[restrict 8][3], PetscScalar *restrict u, PetscScalar *restrict v, PetscScalar du[restrict 3], PetscScalar dv[restrict 3], PetscReal *eta, PetscReal *deta)
730 {
731   PetscInt    l, ll;
732   PetscScalar gam;
733 
734   du[0] = du[1] = du[2] = 0;
735   dv[0] = dv[1] = dv[2] = 0;
736   *u                    = 0;
737   *v                    = 0;
738   for (l = 0; l < 8; l++) {
739     *u += phi[l] * n[l].u;
740     *v += phi[l] * n[l].v;
741     for (ll = 0; ll < 3; ll++) {
742       du[ll] += dphi[l][ll] * n[l].u;
743       dv[ll] += dphi[l][ll] * n[l].v;
744     }
745   }
746   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0] * dv[1] + 0.25 * Sqr(du[1] + dv[0]) + 0.25 * Sqr(du[2]) + 0.25 * Sqr(dv[2]);
747   THIViscosity(thi, PetscRealPart(gam), eta, deta);
748 }
749 
750 static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const Node ***xdot, Node ***f, THI thi)
751 {
752   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l;
753   PetscReal hx, hy, etamin, etamax, beta2min, beta2max;
754 
755   PetscFunctionBeginUser;
756   xs = info->zs;
757   ys = info->ys;
758   xm = info->zm;
759   ym = info->ym;
760   zm = info->xm;
761   hx = thi->Lx / info->mz;
762   hy = thi->Ly / info->my;
763 
764   etamin   = 1e100;
765   etamax   = 0;
766   beta2min = 1e100;
767   beta2max = 0;
768 
769   for (i = xs; i < xs + xm; i++) {
770     for (j = ys; j < ys + ym; j++) {
771       PrmNode pn[4], dpn[4][2];
772       QuadExtract(prm, i, j, pn);
773       PetscCall(QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn));
774       for (k = 0; k < zm - 1; k++) {
775         PetscInt  ls = 0;
776         Node      n[8], ndot[8], *fn[8];
777         PetscReal zn[8], etabase = 0;
778 
779         PrmHexGetZ(pn, k, zm, zn);
780         HexExtract(x, i, j, k, n);
781         HexExtract(xdot, i, j, k, ndot);
782         HexExtractRef(f, i, j, k, fn);
783         if (thi->no_slip && k == 0) {
784           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
785           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
786           ls = 4;
787         }
788         for (q = 0; q < 8; q++) {
789           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
790           PetscScalar du[3], dv[3], u, v, udot = 0, vdot = 0;
791           for (l = ls; l < 8; l++) {
792             udot += HexQInterp[q][l] * ndot[l].u;
793             vdot += HexQInterp[q][l] * ndot[l].v;
794           }
795           HexGrad(HexQDeriv[q], zn, dz);
796           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
797           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
798           jw /= thi->rhog; /* scales residuals to be O(1) */
799           if (q == 0) etabase = eta;
800           RangeUpdate(&etamin, &etamax, eta);
801           for (l = ls; l < 8; l++) { /* test functions */
802             const PetscScalar ds[2] = {dpn[q % 4][0].h + dpn[q % 4][0].b, dpn[q % 4][1].h + dpn[q % 4][1].b};
803             const PetscReal   pp = phi[l], *dp = dphi[l];
804             fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
805             fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
806             fn[l]->u += pp * jw * udot * thi->inertia * pp;
807             fn[l]->v += pp * jw * vdot * thi->inertia * pp;
808           }
809         }
810         if (k == 0) { /* we are on a bottom face */
811           if (thi->no_slip) {
812             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
813             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
814             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
815             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
816             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
817             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
818             * assembled matrix (see the similar block in THIJacobianLocal).
819             *
820             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
821             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
822             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
823             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
824             */
825             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1.);
826             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
827             fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
828             fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
829           } else {                    /* Integrate over bottom face to apply boundary condition */
830             for (q = 0; q < 4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
831               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
832               PetscScalar     u = 0, v = 0, rbeta2 = 0;
833               PetscReal       beta2, dbeta2;
834               for (l = 0; l < 4; l++) {
835                 u += phi[l] * n[l].u;
836                 v += phi[l] * n[l].v;
837                 rbeta2 += phi[l] * pn[l].beta2;
838               }
839               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
840               RangeUpdate(&beta2min, &beta2max, beta2);
841               for (l = 0; l < 4; l++) {
842                 const PetscReal pp = phi[l];
843                 fn[ls + l]->u += pp * jw * beta2 * u;
844                 fn[ls + l]->v += pp * jw * beta2 * v;
845               }
846             }
847           }
848         }
849       }
850     }
851   }
852 
853   PetscCall(PRangeMinMax(&thi->eta, etamin, etamax));
854   PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max));
855   PetscFunctionReturn(PETSC_SUCCESS);
856 }
857 
858 static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const PrmNode **prmdot, PrmNode **f, THI thi)
859 {
860   PetscInt xs, ys, xm, ym, zm, i, j, k;
861 
862   PetscFunctionBeginUser;
863   xs = info->zs;
864   ys = info->ys;
865   xm = info->zm;
866   ym = info->ym;
867   zm = info->xm;
868 
869   for (i = xs; i < xs + xm; i++) {
870     for (j = ys; j < ys + ym; j++) {
871       PetscScalar div = 0, erate, h[8];
872       PrmNodeGetFaceMeasure(prm, i, j, h);
873       for (k = 0; k < zm; k++) {
874         PetscScalar weight = (k == 0 || k == zm - 1) ? 0.5 / (zm - 1) : 1.0 / (zm - 1);
875         if (0) { /* centered flux */
876           div += (-weight * h[0] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j - 1][k].u, x[i][j - 1][k].u) - weight * h[1] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j + 1][k].u, x[i][j + 1][k].u) +
877                   weight * h[2] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j + 1][k].u, x[i][j + 1][k].u) + weight * h[3] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j - 1][k].u, x[i][j - 1][k].u) -
878                   weight * h[4] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i + 1][j - 1][k].v, x[i + 1][j][k].v) - weight * h[5] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i - 1][j - 1][k].v, x[i - 1][j][k].v) +
879                   weight * h[6] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i - 1][j + 1][k].v, x[i - 1][j][k].v) + weight * h[7] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i + 1][j + 1][k].v, x[i + 1][j][k].v));
880         } else { /* Upwind flux */
881           div += weight * (-UpwindFluxXW(x, prm, h, i, j, k, 1) - UpwindFluxXW(x, prm, h, i, j, k, -1) + UpwindFluxXE(x, prm, h, i, j, k, 1) + UpwindFluxXE(x, prm, h, i, j, k, -1) - UpwindFluxYS(x, prm, h, i, j, k, 1) - UpwindFluxYS(x, prm, h, i, j, k, -1) + UpwindFluxYN(x, prm, h, i, j, k, 1) + UpwindFluxYN(x, prm, h, i, j, k, -1));
882         }
883       }
884       /* printf("div[%d][%d] %g\n",i,j,div); */
885       THIErosion(thi, &x[i][j][0], &erate, NULL);
886       f[i][j].b     = prmdot[i][j].b - erate;
887       f[i][j].h     = prmdot[i][j].h + div;
888       f[i][j].beta2 = prmdot[i][j].beta2;
889     }
890   }
891   PetscFunctionReturn(PETSC_SUCCESS);
892 }
893 
894 static PetscErrorCode THIFunction(TS ts, PetscReal t, Vec X, Vec Xdot, Vec F, void *ctx)
895 {
896   THI             thi = (THI)ctx;
897   DM              pack, da3, da2;
898   Vec             X3, X2, Xdot3, Xdot2, F3, F2, F3g, F2g;
899   const Node   ***x3, ***xdot3;
900   const PrmNode **x2, **xdot2;
901   Node         ***f3;
902   PrmNode       **f2;
903   DMDALocalInfo   info3;
904 
905   PetscFunctionBeginUser;
906   PetscCall(TSGetDM(ts, &pack));
907   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
908   PetscCall(DMDAGetLocalInfo(da3, &info3));
909   PetscCall(DMCompositeGetLocalVectors(pack, &X3, &X2));
910   PetscCall(DMCompositeGetLocalVectors(pack, &Xdot3, &Xdot2));
911   PetscCall(DMCompositeScatter(pack, X, X3, X2));
912   PetscCall(THIFixGhosts(thi, da3, da2, X3, X2));
913   PetscCall(DMCompositeScatter(pack, Xdot, Xdot3, Xdot2));
914 
915   PetscCall(DMGetLocalVector(da3, &F3));
916   PetscCall(DMGetLocalVector(da2, &F2));
917   PetscCall(VecZeroEntries(F3));
918 
919   PetscCall(DMDAVecGetArray(da3, X3, &x3));
920   PetscCall(DMDAVecGetArray(da2, X2, &x2));
921   PetscCall(DMDAVecGetArray(da3, Xdot3, &xdot3));
922   PetscCall(DMDAVecGetArray(da2, Xdot2, &xdot2));
923   PetscCall(DMDAVecGetArray(da3, F3, &f3));
924   PetscCall(DMDAVecGetArray(da2, F2, &f2));
925 
926   PetscCall(THIFunctionLocal_3D(&info3, x3, x2, xdot3, f3, thi));
927   PetscCall(THIFunctionLocal_2D(&info3, x3, x2, xdot2, f2, thi));
928 
929   PetscCall(DMDAVecRestoreArray(da3, X3, &x3));
930   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
931   PetscCall(DMDAVecRestoreArray(da3, Xdot3, &xdot3));
932   PetscCall(DMDAVecRestoreArray(da2, Xdot2, &xdot2));
933   PetscCall(DMDAVecRestoreArray(da3, F3, &f3));
934   PetscCall(DMDAVecRestoreArray(da2, F2, &f2));
935 
936   PetscCall(DMCompositeRestoreLocalVectors(pack, &X3, &X2));
937   PetscCall(DMCompositeRestoreLocalVectors(pack, &Xdot3, &Xdot2));
938 
939   PetscCall(VecZeroEntries(F));
940   PetscCall(DMCompositeGetAccess(pack, F, &F3g, &F2g));
941   PetscCall(DMLocalToGlobalBegin(da3, F3, ADD_VALUES, F3g));
942   PetscCall(DMLocalToGlobalEnd(da3, F3, ADD_VALUES, F3g));
943   PetscCall(DMLocalToGlobalBegin(da2, F2, INSERT_VALUES, F2g));
944   PetscCall(DMLocalToGlobalEnd(da2, F2, INSERT_VALUES, F2g));
945 
946   if (thi->verbose) {
947     PetscViewer viewer;
948     PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi), &viewer));
949     PetscCall(PetscViewerASCIIPrintf(viewer, "3D_Velocity residual (bs=2):\n"));
950     PetscCall(PetscViewerASCIIPushTab(viewer));
951     PetscCall(VecView(F3, viewer));
952     PetscCall(PetscViewerASCIIPopTab(viewer));
953     PetscCall(PetscViewerASCIIPrintf(viewer, "2D_Fields residual (bs=3):\n"));
954     PetscCall(PetscViewerASCIIPushTab(viewer));
955     PetscCall(VecView(F2, viewer));
956     PetscCall(PetscViewerASCIIPopTab(viewer));
957   }
958 
959   PetscCall(DMCompositeRestoreAccess(pack, F, &F3g, &F2g));
960 
961   PetscCall(DMRestoreLocalVector(da3, &F3));
962   PetscCall(DMRestoreLocalVector(da2, &F2));
963   PetscFunctionReturn(PETSC_SUCCESS);
964 }
965 
966 static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer)
967 {
968   PetscReal   nrm;
969   PetscInt    m;
970   PetscMPIInt rank;
971 
972   PetscFunctionBeginUser;
973   PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm));
974   PetscCall(MatGetSize(B, &m, 0));
975   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank));
976   if (rank == 0) {
977     PetscScalar val0, val2;
978     PetscCall(MatGetValue(B, 0, 0, &val0));
979     PetscCall(MatGetValue(B, 2, 2, &val2));
980     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %8" PetscInt_FMT "  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2), (double)thi->eta.cmin,
981                                      (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
982   }
983   PetscFunctionReturn(PETSC_SUCCESS);
984 }
985 
986 static PetscErrorCode THISurfaceStatistics(DM pack, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean)
987 {
988   DM          da3, da2;
989   Vec         X3, X2;
990   Node     ***x;
991   PetscInt    i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
992   PetscReal   umin = 1e100, umax = -1e100;
993   PetscScalar usum = 0.0, gusum;
994 
995   PetscFunctionBeginUser;
996   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
997   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
998   *min = *max = *mean = 0;
999   PetscCall(DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1000   PetscCall(DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm));
1001   PetscCheck(zs == 0 && zm == mz, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Unexpected decomposition");
1002   PetscCall(DMDAVecGetArray(da3, X3, &x));
1003   for (i = xs; i < xs + xm; i++) {
1004     for (j = ys; j < ys + ym; j++) {
1005       PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
1006       RangeUpdate(&umin, &umax, u);
1007       usum += u;
1008     }
1009   }
1010   PetscCall(DMDAVecRestoreArray(da3, X3, &x));
1011   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1012 
1013   PetscCallMPI(MPIU_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da3)));
1014   PetscCallMPI(MPIU_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da3)));
1015   PetscCallMPI(MPIU_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da3)));
1016   *mean = PetscRealPart(gusum) / (mx * my);
1017   PetscFunctionReturn(PETSC_SUCCESS);
1018 }
1019 
1020 static PetscErrorCode THISolveStatistics(THI thi, TS ts, PetscInt coarsened, const char name[])
1021 {
1022   MPI_Comm comm;
1023   DM       pack;
1024   Vec      X, X3, X2;
1025 
1026   PetscFunctionBeginUser;
1027   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1028   PetscCall(TSGetDM(ts, &pack));
1029   PetscCall(TSGetSolution(ts, &X));
1030   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
1031   PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name));
1032   {
1033     PetscInt            its, lits;
1034     SNESConvergedReason reason;
1035     SNES                snes;
1036     PetscCall(TSGetSNES(ts, &snes));
1037     PetscCall(SNESGetIterationNumber(snes, &its));
1038     PetscCall(SNESGetConvergedReason(snes, &reason));
1039     PetscCall(SNESGetLinearSolveIterations(snes, &lits));
1040     PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits));
1041   }
1042   {
1043     PetscReal    nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
1044     PetscInt     i, j, m;
1045     PetscScalar *x;
1046     PetscCall(VecNorm(X3, NORM_2, &nrm2));
1047     PetscCall(VecGetLocalSize(X3, &m));
1048     PetscCall(VecGetArray(X3, &x));
1049     for (i = 0; i < m; i += 2) {
1050       PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
1051       tmin[0] = PetscMin(u, tmin[0]);
1052       tmin[1] = PetscMin(v, tmin[1]);
1053       tmin[2] = PetscMin(c, tmin[2]);
1054       tmax[0] = PetscMax(u, tmax[0]);
1055       tmax[1] = PetscMax(v, tmax[1]);
1056       tmax[2] = PetscMax(c, tmax[2]);
1057     }
1058     PetscCall(VecRestoreArray(X, &x));
1059     PetscCallMPI(MPIU_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi)));
1060     PetscCallMPI(MPIU_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi)));
1061     /* Dimensionalize to meters/year */
1062     nrm2 *= thi->units->year / thi->units->meter;
1063     for (j = 0; j < 3; j++) {
1064       min[j] *= thi->units->year / thi->units->meter;
1065       max[j] *= thi->units->year / thi->units->meter;
1066     }
1067     PetscCall(PetscPrintf(comm, "|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]));
1068     {
1069       PetscReal umin, umax, umean;
1070       PetscCall(THISurfaceStatistics(pack, X, &umin, &umax, &umean));
1071       umin *= thi->units->year / thi->units->meter;
1072       umax *= thi->units->year / thi->units->meter;
1073       umean *= thi->units->year / thi->units->meter;
1074       PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean));
1075     }
1076     /* These values stay nondimensional */
1077     PetscCall(PetscPrintf(comm, "Global eta range   [%g, %g], converged range [%g, %g]\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax));
1078     PetscCall(PetscPrintf(comm, "Global beta2 range [%g, %g], converged range [%g, %g]\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
1079   }
1080   PetscCall(PetscPrintf(comm, "\n"));
1081   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info, PetscInt i, PetscInt j, PetscInt k)
1086 {
1087   return ((i - info->gzs) * info->gym + (j - info->gys)) * info->gxm + (k - info->gxs);
1088 }
1089 static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info, PetscInt i, PetscInt j)
1090 {
1091   return (i - info->gzs) * info->gym + (j - info->gys);
1092 }
1093 
1094 static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, Mat B, Mat Bcpl, THI thi)
1095 {
1096   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1097   PetscReal hx, hy;
1098 
1099   PetscFunctionBeginUser;
1100   xs = info->zs;
1101   ys = info->ys;
1102   xm = info->zm;
1103   ym = info->ym;
1104   zm = info->xm;
1105   hx = thi->Lx / info->mz;
1106   hy = thi->Ly / info->my;
1107 
1108   for (i = xs; i < xs + xm; i++) {
1109     for (j = ys; j < ys + ym; j++) {
1110       PrmNode pn[4], dpn[4][2];
1111       QuadExtract(prm, i, j, pn);
1112       PetscCall(QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn));
1113       for (k = 0; k < zm - 1; k++) {
1114         Node        n[8];
1115         PetscReal   zn[8], etabase = 0;
1116         PetscScalar Ke[8 * NODE_SIZE][8 * NODE_SIZE], Kcpl[8 * NODE_SIZE][4 * PRMNODE_SIZE];
1117         PetscInt    ls = 0;
1118 
1119         PrmHexGetZ(pn, k, zm, zn);
1120         HexExtract(x, i, j, k, n);
1121         PetscCall(PetscMemzero(Ke, sizeof(Ke)));
1122         PetscCall(PetscMemzero(Kcpl, sizeof(Kcpl)));
1123         if (thi->no_slip && k == 0) {
1124           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1125           ls = 4;
1126         }
1127         for (q = 0; q < 8; q++) {
1128           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
1129           PetscScalar du[3], dv[3], u, v;
1130           HexGrad(HexQDeriv[q], zn, dz);
1131           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1132           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1133           jw /= thi->rhog; /* residuals are scaled by this factor */
1134           if (q == 0) etabase = eta;
1135           for (l = ls; l < 8; l++) { /* test functions */
1136             const PetscReal pp = phi[l], *restrict dp = dphi[l];
1137             for (ll = ls; ll < 8; ll++) { /* trial functions */
1138               const PetscReal *restrict dpl = dphi[ll];
1139               PetscScalar dgdu, dgdv;
1140               dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1141               dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1142               /* Picard part */
1143               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1144               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1145               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1146               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1147               /* extra Newton terms */
1148               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1149               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1150               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1151               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1152               /* inertial part */
1153               Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * thi->inertia * pp;
1154               Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * thi->inertia * pp;
1155             }
1156             for (ll = 0; ll < 4; ll++) {                                                              /* Trial functions for surface/bed */
1157               const PetscReal dpl[] = {QuadQDeriv[q % 4][ll][0] / hx, QuadQDeriv[q % 4][ll][1] / hy}; /* surface = h + b */
1158               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[0];
1159               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[0];
1160               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[1];
1161               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[1];
1162             }
1163           }
1164         }
1165         if (k == 0) { /* on a bottom face */
1166           if (thi->no_slip) {
1167             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1);
1168             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1169             Ke[0][0] = thi->dirichlet_scale * diagu;
1170             Ke[0][1] = 0;
1171             Ke[1][0] = 0;
1172             Ke[1][1] = thi->dirichlet_scale * diagv;
1173           } else {
1174             for (q = 0; q < 4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1175               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
1176               PetscScalar     u = 0, v = 0, rbeta2 = 0;
1177               PetscReal       beta2, dbeta2;
1178               for (l = 0; l < 4; l++) {
1179                 u += phi[l] * n[l].u;
1180                 v += phi[l] * n[l].v;
1181                 rbeta2 += phi[l] * pn[l].beta2;
1182               }
1183               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1184               for (l = 0; l < 4; l++) {
1185                 const PetscReal pp = phi[l];
1186                 for (ll = 0; ll < 4; ll++) {
1187                   const PetscReal ppl = phi[ll];
1188                   Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1189                   Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1190                   Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1191                   Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1192                 }
1193               }
1194             }
1195           }
1196         }
1197         {
1198           const PetscInt rc3blocked[8]                 = {DMDALocalIndex3D(info, i + 0, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 1, k + 0), DMDALocalIndex3D(info, i + 0, j + 1, k + 0),
1199                                                           DMDALocalIndex3D(info, i + 0, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 1, k + 1), DMDALocalIndex3D(info, i + 0, j + 1, k + 1)},
1200                          col2blocked[PRMNODE_SIZE * 4] = {DMDALocalIndex2D(info, i + 0, j + 0), DMDALocalIndex2D(info, i + 1, j + 0), DMDALocalIndex2D(info, i + 1, j + 1), DMDALocalIndex2D(info, i + 0, j + 1)};
1201 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1202           for (l = 0; l < 8; l++) {
1203             for (ll = l + 1; ll < 8; ll++) {
1204               Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1205               Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1206               Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1207               Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1208             }
1209           }
1210 #endif
1211           PetscCall(MatSetValuesBlockedLocal(B, 8, rc3blocked, 8, rc3blocked, &Ke[0][0], ADD_VALUES)); /* velocity-velocity coupling can use blocked insertion */
1212           {                                                                                            /* The off-diagonal part cannot (yet) */
1213             PetscInt row3scalar[NODE_SIZE * 8], col2scalar[PRMNODE_SIZE * 4];
1214             for (l = 0; l < 8; l++)
1215               for (ll = 0; ll < NODE_SIZE; ll++) row3scalar[l * NODE_SIZE + ll] = rc3blocked[l] * NODE_SIZE + ll;
1216             for (l = 0; l < 4; l++)
1217               for (ll = 0; ll < PRMNODE_SIZE; ll++) col2scalar[l * PRMNODE_SIZE + ll] = col2blocked[l] * PRMNODE_SIZE + ll;
1218             PetscCall(MatSetValuesLocal(Bcpl, 8 * NODE_SIZE, row3scalar, 4 * PRMNODE_SIZE, col2scalar, &Kcpl[0][0], ADD_VALUES));
1219           }
1220         }
1221       }
1222     }
1223   }
1224   PetscFunctionReturn(PETSC_SUCCESS);
1225 }
1226 
1227 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, const Node ***x3, const PrmNode **x2, const PrmNode **xdot2, PetscReal a, Mat B22, Mat B21, THI thi)
1228 {
1229   PetscInt xs, ys, xm, ym, zm, i, j, k;
1230 
1231   PetscFunctionBeginUser;
1232   xs = info->zs;
1233   ys = info->ys;
1234   xm = info->zm;
1235   ym = info->ym;
1236   zm = info->xm;
1237 
1238   PetscCheck(zm <= 1024, ((PetscObject)info->da)->comm, PETSC_ERR_SUP, "Need to allocate more space");
1239   for (i = xs; i < xs + xm; i++) {
1240     for (j = ys; j < ys + ym; j++) {
1241       { /* Self-coupling */
1242         const PetscInt    row[]  = {DMDALocalIndex2D(info, i, j)};
1243         const PetscInt    col[]  = {DMDALocalIndex2D(info, i, j)};
1244         const PetscScalar vals[] = {a, 0, 0, 0, a, 0, 0, 0, a};
1245         PetscCall(MatSetValuesBlockedLocal(B22, 1, row, 1, col, vals, INSERT_VALUES));
1246       }
1247       for (k = 0; k < zm; k++) { /* Coupling to velocity problem */
1248         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1249         const PetscInt    row[]  = {FieldIndex(PrmNode, DMDALocalIndex2D(info, i, j), h)};
1250         const PetscInt    cols[] = {FieldIndex(Node, DMDALocalIndex3D(info, i - 1, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i + 1, j, k), u),
1251                                     FieldIndex(Node, DMDALocalIndex3D(info, i, j - 1, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j + 1, k), v)};
1252         const PetscScalar w = (k && k < zm - 1) ? 0.5 : 0.25, hW = w * (x2[i - 1][j].h + x2[i][j].h) / (zm - 1.), hE = w * (x2[i][j].h + x2[i + 1][j].h) / (zm - 1.), hS = w * (x2[i][j - 1].h + x2[i][j].h) / (zm - 1.),
1253                           hN = w * (x2[i][j].h + x2[i][j + 1].h) / (zm - 1.);
1254         PetscScalar *vals, vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0), (PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW, (PetscRealPart(x3[i][j][k].u) > 0) ? 0 : +hE,
1255                                             (PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0,   (PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS, ((PetscRealPart(x3[i][j][k].v) > 0) ? 0 : +hN)},
1256                            vals_centered[] = {-0.5 * hW, 0.5 * (-hW + hE), 0.5 * hE, -0.5 * hS, 0.5 * (-hS + hN), 0.5 * hN};
1257         vals                               = 1 ? vals_upwind : vals_centered;
1258         if (k == 0) {
1259           Node derate;
1260           THIErosion(thi, &x3[i][j][0], NULL, &derate);
1261           vals[1] -= derate.u;
1262           vals[4] -= derate.v;
1263         }
1264         PetscCall(MatSetValuesLocal(B21, 1, row, 6, cols, vals, INSERT_VALUES));
1265       }
1266     }
1267   }
1268   PetscFunctionReturn(PETSC_SUCCESS);
1269 }
1270 
1271 static PetscErrorCode THIJacobian(TS ts, PetscReal t, Vec X, Vec Xdot, PetscReal a, Mat A, Mat B, void *ctx)
1272 {
1273   THI             thi = (THI)ctx;
1274   DM              pack, da3, da2;
1275   Vec             X3, X2, Xdot2;
1276   Mat             B11, B12, B21, B22;
1277   DMDALocalInfo   info3;
1278   IS             *isloc;
1279   const Node   ***x3;
1280   const PrmNode **x2, **xdot2;
1281 
1282   PetscFunctionBeginUser;
1283   PetscCall(TSGetDM(ts, &pack));
1284   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1285   PetscCall(DMDAGetLocalInfo(da3, &info3));
1286   PetscCall(DMCompositeGetLocalVectors(pack, &X3, &X2));
1287   PetscCall(DMCompositeGetLocalVectors(pack, NULL, &Xdot2));
1288   PetscCall(DMCompositeScatter(pack, X, X3, X2));
1289   PetscCall(THIFixGhosts(thi, da3, da2, X3, X2));
1290   PetscCall(DMCompositeScatter(pack, Xdot, NULL, Xdot2));
1291 
1292   PetscCall(MatZeroEntries(B));
1293 
1294   PetscCall(DMCompositeGetLocalISs(pack, &isloc));
1295   PetscCall(MatGetLocalSubMatrix(B, isloc[0], isloc[0], &B11));
1296   PetscCall(MatGetLocalSubMatrix(B, isloc[0], isloc[1], &B12));
1297   PetscCall(MatGetLocalSubMatrix(B, isloc[1], isloc[0], &B21));
1298   PetscCall(MatGetLocalSubMatrix(B, isloc[1], isloc[1], &B22));
1299 
1300   PetscCall(DMDAVecGetArray(da3, X3, &x3));
1301   PetscCall(DMDAVecGetArray(da2, X2, &x2));
1302   PetscCall(DMDAVecGetArray(da2, Xdot2, &xdot2));
1303 
1304   PetscCall(THIJacobianLocal_Momentum(&info3, x3, x2, B11, B12, thi));
1305 
1306   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1307   PetscCall(MatAssemblyBegin(B, MAT_FLUSH_ASSEMBLY));
1308   PetscCall(MatAssemblyEnd(B, MAT_FLUSH_ASSEMBLY));
1309 
1310   PetscCall(THIJacobianLocal_2D(&info3, x3, x2, xdot2, a, B22, B21, thi));
1311 
1312   PetscCall(DMDAVecRestoreArray(da3, X3, &x3));
1313   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
1314   PetscCall(DMDAVecRestoreArray(da2, Xdot2, &xdot2));
1315 
1316   PetscCall(MatRestoreLocalSubMatrix(B, isloc[0], isloc[0], &B11));
1317   PetscCall(MatRestoreLocalSubMatrix(B, isloc[0], isloc[1], &B12));
1318   PetscCall(MatRestoreLocalSubMatrix(B, isloc[1], isloc[0], &B21));
1319   PetscCall(MatRestoreLocalSubMatrix(B, isloc[1], isloc[1], &B22));
1320   PetscCall(ISDestroy(&isloc[0]));
1321   PetscCall(ISDestroy(&isloc[1]));
1322   PetscCall(PetscFree(isloc));
1323 
1324   PetscCall(DMCompositeRestoreLocalVectors(pack, &X3, &X2));
1325   PetscCall(DMCompositeRestoreLocalVectors(pack, 0, &Xdot2));
1326 
1327   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1328   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1329   if (A != B) {
1330     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1331     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1332   }
1333   if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1334   PetscFunctionReturn(PETSC_SUCCESS);
1335 }
1336 
1337 /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1338  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1339  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1340  */
1341 static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM pack, Vec X, const char filename[], const char filename2[])
1342 {
1343   const PetscInt dof = NODE_SIZE, dof2 = PRMNODE_SIZE;
1344   Units          units = thi->units;
1345   MPI_Comm       comm;
1346   PetscViewer    viewer3, viewer2;
1347   PetscMPIInt    rank, size, tag, nn, nmax, nn2, nmax2;
1348   PetscInt       mx, my, mz, r, range[6];
1349   PetscScalar   *x, *x2;
1350   DM             da3, da2;
1351   Vec            X3, X2;
1352 
1353   PetscFunctionBeginUser;
1354   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1355   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1356   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
1357   PetscCall(DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1358   PetscCallMPI(MPI_Comm_size(comm, &size));
1359   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1360   PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer3));
1361   PetscCall(PetscViewerASCIIOpen(comm, filename2, &viewer2));
1362   PetscCall(PetscViewerASCIIPrintf(viewer3, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1363   PetscCall(PetscViewerASCIIPrintf(viewer2, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1364   PetscCall(PetscViewerASCIIPrintf(viewer3, "  <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1));
1365   PetscCall(PetscViewerASCIIPrintf(viewer2, "  <StructuredGrid WholeExtent=\"%d %d %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, 0, 0, my - 1, 0, mx - 1));
1366 
1367   PetscCall(DMDAGetCorners(da3, range, range + 1, range + 2, range + 3, range + 4, range + 5));
1368   PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn));
1369   PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm));
1370   PetscCall(PetscMPIIntCast(range[4] * range[5] * dof2, &nn2));
1371   PetscCallMPI(MPI_Reduce(&nn2, &nmax2, 1, MPI_INT, MPI_MAX, 0, comm));
1372   tag = ((PetscObject)viewer3)->tag;
1373   PetscCall(VecGetArrayRead(X3, (const PetscScalar **)&x));
1374   PetscCall(VecGetArrayRead(X2, (const PetscScalar **)&x2));
1375   if (rank == 0) {
1376     PetscScalar *array, *array2;
1377     PetscCall(PetscMalloc2(nmax, &array, nmax2, &array2));
1378     for (r = 0; r < size; r++) {
1379       PetscInt i, j, k, f, xs, xm, ys, ym, zs, zm;
1380       Node    *y3;
1381       PetscScalar (*y2)[PRMNODE_SIZE];
1382       MPI_Status status;
1383 
1384       if (r) PetscCallMPI(MPI_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE));
1385       zs = range[0];
1386       ys = range[1];
1387       xs = range[2];
1388       zm = range[3];
1389       ym = range[4];
1390       xm = range[5];
1391       PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1392       if (r) {
1393         PetscCallMPI(MPI_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status));
1394         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn));
1395         PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "corrupt da3 send");
1396         y3 = (Node *)array;
1397         PetscCallMPI(MPI_Recv(array2, nmax2, MPIU_SCALAR, r, tag, comm, &status));
1398         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn2));
1399         PetscCheck(nn2 == xm * ym * dof2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "corrupt da2 send");
1400         y2 = (PetscScalar (*)[PRMNODE_SIZE])array2;
1401       } else {
1402         y3 = (Node *)x;
1403         y2 = (PetscScalar (*)[PRMNODE_SIZE])x2;
1404       }
1405       PetscCall(PetscViewerASCIIPrintf(viewer3, "    <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1));
1406       PetscCall(PetscViewerASCIIPrintf(viewer2, "    <Piece Extent=\"%d %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", 0, 0, ys, ys + ym - 1, xs, xs + xm - 1));
1407 
1408       PetscCall(PetscViewerASCIIPrintf(viewer3, "      <Points>\n"));
1409       PetscCall(PetscViewerASCIIPrintf(viewer2, "      <Points>\n"));
1410       PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1411       PetscCall(PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1412       for (i = xs; i < xs + xm; i++) {
1413         for (j = ys; j < ys + ym; j++) {
1414           PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, b = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, b)]), h = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, h)]);
1415           for (k = zs; k < zs + zm; k++) {
1416             PetscReal zz = b + h * k / (mz - 1.);
1417             PetscCall(PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)xx, (double)yy, (double)zz));
1418           }
1419           PetscCall(PetscViewerASCIIPrintf(viewer2, "%f %f %f\n", (double)xx, (double)yy, (double)0.0));
1420         }
1421       }
1422       PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1423       PetscCall(PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n"));
1424       PetscCall(PetscViewerASCIIPrintf(viewer3, "      </Points>\n"));
1425       PetscCall(PetscViewerASCIIPrintf(viewer2, "      </Points>\n"));
1426 
1427       { /* Velocity and rank (3D) */
1428         PetscCall(PetscViewerASCIIPrintf(viewer3, "      <PointData>\n"));
1429         PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1430         for (i = 0; i < nn / dof; i++) PetscCall(PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)(PetscRealPart(y3[i].u) * units->year / units->meter), (double)(PetscRealPart(y3[i].v) * units->year / units->meter), 0.0));
1431         PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1432 
1433         PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1434         for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer3, "%" PetscInt_FMT "\n", r));
1435         PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1436         PetscCall(PetscViewerASCIIPrintf(viewer3, "      </PointData>\n"));
1437       }
1438 
1439       { /* 2D */
1440         PetscCall(PetscViewerASCIIPrintf(viewer2, "      <PointData>\n"));
1441         for (f = 0; f < PRMNODE_SIZE; f++) {
1442           const char *fieldname;
1443           PetscCall(DMDAGetFieldName(da2, f, &fieldname));
1444           PetscCall(PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n", fieldname));
1445           for (i = 0; i < nn2 / PRMNODE_SIZE; i++) PetscCall(PetscViewerASCIIPrintf(viewer2, "%g\n", (double)y2[i][f]));
1446           PetscCall(PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n"));
1447         }
1448         PetscCall(PetscViewerASCIIPrintf(viewer2, "      </PointData>\n"));
1449       }
1450 
1451       PetscCall(PetscViewerASCIIPrintf(viewer3, "    </Piece>\n"));
1452       PetscCall(PetscViewerASCIIPrintf(viewer2, "    </Piece>\n"));
1453     }
1454     PetscCall(PetscFree2(array, array2));
1455   } else {
1456     PetscCallMPI(MPI_Send(range, 6, MPIU_INT, 0, tag, comm));
1457     PetscCallMPI(MPI_Send(x, nn, MPIU_SCALAR, 0, tag, comm));
1458     PetscCallMPI(MPI_Send(x2, nn2, MPIU_SCALAR, 0, tag, comm));
1459   }
1460   PetscCall(VecRestoreArrayRead(X3, (const PetscScalar **)&x));
1461   PetscCall(VecRestoreArrayRead(X2, (const PetscScalar **)&x2));
1462   PetscCall(PetscViewerASCIIPrintf(viewer3, "  </StructuredGrid>\n"));
1463   PetscCall(PetscViewerASCIIPrintf(viewer2, "  </StructuredGrid>\n"));
1464 
1465   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1466   PetscCall(PetscViewerASCIIPrintf(viewer3, "</VTKFile>\n"));
1467   PetscCall(PetscViewerASCIIPrintf(viewer2, "</VTKFile>\n"));
1468   PetscCall(PetscViewerDestroy(&viewer3));
1469   PetscCall(PetscViewerDestroy(&viewer2));
1470   PetscFunctionReturn(PETSC_SUCCESS);
1471 }
1472 
1473 static PetscErrorCode THITSMonitor(TS ts, PetscInt step, PetscReal t, Vec X, void *ctx)
1474 {
1475   THI  thi = (THI)ctx;
1476   DM   pack;
1477   char filename3[PETSC_MAX_PATH_LEN], filename2[PETSC_MAX_PATH_LEN];
1478 
1479   PetscFunctionBeginUser;
1480   if (step < 0) PetscFunctionReturn(PETSC_SUCCESS); /* negative one is used to indicate an interpolated solution */
1481   PetscCall(PetscPrintf(PetscObjectComm((PetscObject)ts), "%3" PetscInt_FMT ": t=%g\n", step, (double)t));
1482   if (thi->monitor_interval && step % thi->monitor_interval) PetscFunctionReturn(PETSC_SUCCESS);
1483   PetscCall(TSGetDM(ts, &pack));
1484   PetscCall(PetscSNPrintf(filename3, sizeof(filename3), "%s-3d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step));
1485   PetscCall(PetscSNPrintf(filename2, sizeof(filename2), "%s-2d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step));
1486   PetscCall(THIDAVecView_VTK_XML(thi, pack, X, filename3, filename2));
1487   PetscFunctionReturn(PETSC_SUCCESS);
1488 }
1489 
1490 static PetscErrorCode THICreateDM3d(THI thi, DM *dm3d)
1491 {
1492   MPI_Comm comm;
1493   PetscInt M = 3, N = 3, P = 2;
1494   DM       da;
1495 
1496   PetscFunctionBeginUser;
1497   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1498   PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1499   {
1500     PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL));
1501     N = M;
1502     PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL));
1503     PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL));
1504   }
1505   PetscOptionsEnd();
1506   PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da));
1507   PetscCall(DMSetFromOptions(da));
1508   PetscCall(DMSetUp(da));
1509   PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
1510   PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
1511   *dm3d = da;
1512   PetscFunctionReturn(PETSC_SUCCESS);
1513 }
1514 
1515 int main(int argc, char *argv[])
1516 {
1517   MPI_Comm  comm;
1518   DM        pack, da3, da2;
1519   TS        ts;
1520   THI       thi;
1521   Vec       X;
1522   Mat       B;
1523   PetscInt  i, steps;
1524   PetscReal ftime;
1525 
1526   PetscFunctionBeginUser;
1527   PetscCall(PetscInitialize(&argc, &argv, 0, help));
1528   comm = PETSC_COMM_WORLD;
1529 
1530   PetscCall(THICreate(comm, &thi));
1531   PetscCall(THICreateDM3d(thi, &da3));
1532   {
1533     PetscInt        Mx, My, mx, my, s;
1534     DMDAStencilType st;
1535     PetscCall(DMDAGetInfo(da3, 0, 0, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st));
1536     PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2));
1537     PetscCall(DMSetUp(da2));
1538   }
1539 
1540   PetscCall(PetscObjectSetName((PetscObject)da3, "3D_Velocity"));
1541   PetscCall(DMSetOptionsPrefix(da3, "f3d_"));
1542   PetscCall(DMDASetFieldName(da3, 0, "u"));
1543   PetscCall(DMDASetFieldName(da3, 1, "v"));
1544   PetscCall(PetscObjectSetName((PetscObject)da2, "2D_Fields"));
1545   PetscCall(DMSetOptionsPrefix(da2, "f2d_"));
1546   PetscCall(DMDASetFieldName(da2, 0, "b"));
1547   PetscCall(DMDASetFieldName(da2, 1, "h"));
1548   PetscCall(DMDASetFieldName(da2, 2, "beta2"));
1549   PetscCall(DMCompositeCreate(comm, &pack));
1550   PetscCall(DMCompositeAddDM(pack, da3));
1551   PetscCall(DMCompositeAddDM(pack, da2));
1552   PetscCall(DMDestroy(&da3));
1553   PetscCall(DMDestroy(&da2));
1554   PetscCall(DMSetUp(pack));
1555   PetscCall(DMCreateMatrix(pack, &B));
1556   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1557   PetscCall(MatSetOptionsPrefix(B, "thi_"));
1558 
1559   for (i = 0; i < thi->nlevels; i++) {
1560     PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
1561     PetscInt  Mx, My, Mz;
1562     PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1563     PetscCall(DMDAGetInfo(da3, 0, &Mz, &My, &Mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1564     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n", i, Lx, Ly, Lz, Mx, My, Mz, Mx * My * Mz, Lx / Mx, Ly / My, 1000. / (Mz - 1)));
1565   }
1566 
1567   PetscCall(DMCreateGlobalVector(pack, &X));
1568   PetscCall(THIInitial(thi, pack, X));
1569 
1570   PetscCall(TSCreate(comm, &ts));
1571   PetscCall(TSSetDM(ts, pack));
1572   PetscCall(TSSetProblemType(ts, TS_NONLINEAR));
1573   PetscCall(TSMonitorSet(ts, THITSMonitor, thi, NULL));
1574   PetscCall(TSSetType(ts, TSTHETA));
1575   PetscCall(TSSetIFunction(ts, NULL, THIFunction, thi));
1576   PetscCall(TSSetIJacobian(ts, B, B, THIJacobian, thi));
1577   PetscCall(TSSetMaxTime(ts, 10.0));
1578   PetscCall(TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER));
1579   PetscCall(TSSetSolution(ts, X));
1580   PetscCall(TSSetTimeStep(ts, 1e-3));
1581   PetscCall(TSSetFromOptions(ts));
1582 
1583   PetscCall(TSSolve(ts, X));
1584   PetscCall(TSGetSolveTime(ts, &ftime));
1585   PetscCall(TSGetStepNumber(ts, &steps));
1586   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Steps %" PetscInt_FMT "  final time %g\n", steps, (double)ftime));
1587 
1588   if (0) PetscCall(THISolveStatistics(thi, ts, 0, "Full"));
1589 
1590   {
1591     PetscBool flg;
1592     char      filename[PETSC_MAX_PATH_LEN] = "";
1593     PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg));
1594     if (flg) PetscCall(THIDAVecView_VTK_XML(thi, pack, X, filename, NULL));
1595   }
1596 
1597   PetscCall(VecDestroy(&X));
1598   PetscCall(MatDestroy(&B));
1599   PetscCall(DMDestroy(&pack));
1600   PetscCall(TSDestroy(&ts));
1601   PetscCall(THIDestroy(&thi));
1602   PetscCall(PetscFinalize());
1603   return 0;
1604 }
1605