xref: /petsc/src/ts/tutorials/ex14.c (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\
2 \n\
3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4 using multigrid.  The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5 to p=4/3 in a p-Laplacian).  The focus is on ISMIP-HOM experiments which assume periodic\n\
6 boundary conditions in the x- and y-directions.\n\
7 \n\
8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10 \n\
11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12 \n\n";
13 
14 /*
15 The equations for horizontal velocity (u,v) are
16 
17   - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18   - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
19 
20 where
21 
22   eta = B/2 (epsilon + gamma)^((p-2)/2)
23 
24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25 written in terms of the second invariant
26 
27   gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
28 
29 The surface boundary conditions are the natural conditions.  The basal boundary conditions
30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
31 
32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
33 
34 The discretization is Q1 finite elements, managed by a DMDA.  The grid is never distorted in the
35 map (x,y) plane, but the bed and surface may be bumpy.  This is handled as usual in FEM, through
36 the Jacobian of the coordinate transformation from a reference element to the physical element.
37 
38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed
39 specially so that columns are never distributed, and are always contiguous in memory.
40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation,
41 and then indexing as vec[i][j][k].  The exotic coarse spaces require 2D DMDAs which are made to
42 use compatible domain decomposition relative to the 3D DMDAs.
43 
44 */
45 
46 #include <petscts.h>
47 #include <petscdm.h>
48 #include <petscdmda.h>
49 #include <petscdmcomposite.h>
50 #include <ctype.h> /* toupper() */
51 #include <petsc/private/petscimpl.h>
52 
53 #if defined __SSE2__
54 #include <emmintrin.h>
55 #endif
56 
57 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
58 #define USE_SSE2_KERNELS (!defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && defined __SSE2__)
59 
60 #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
61 #if defined  __cplusplus /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
62 #define restrict
63 #else
64 #define restrict PETSC_RESTRICT
65 #endif
66 #endif
67 
68 static PetscClassId THI_CLASSID;
69 
70 typedef enum {
71   QUAD_GAUSS,
72   QUAD_LOBATTO
73 } QuadratureType;
74 static const char     *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0};
75 static const PetscReal HexQWeights[8]    = {1, 1, 1, 1, 1, 1, 1, 1};
76 static const PetscReal HexQNodes[]       = {-0.57735026918962573, 0.57735026918962573};
77 #define G 0.57735026918962573
78 #define H (0.5 * (1. + G))
79 #define L (0.5 * (1. - G))
80 #define M (-0.5)
81 #define P (0.5)
82 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
83 static const PetscReal HexQInterp_Lobatto[8][8] = {
84   {H, 0, 0, 0, L, 0, 0, 0},
85   {0, H, 0, 0, 0, L, 0, 0},
86   {0, 0, H, 0, 0, 0, L, 0},
87   {0, 0, 0, H, 0, 0, 0, L},
88   {L, 0, 0, 0, H, 0, 0, 0},
89   {0, L, 0, 0, 0, H, 0, 0},
90   {0, 0, L, 0, 0, 0, H, 0},
91   {0, 0, 0, L, 0, 0, 0, H}
92 };
93 static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
94   {{M * H, M *H, M}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0},     {M * L, M *L, P}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0}    },
95   {{M * H, 0, 0},    {P * H, M *H, M}, {0, P *H, 0},     {0, 0, 0},        {M * L, 0, 0},    {P * L, M *L, P}, {0, P *L, 0},     {0, 0, 0}       },
96   {{0, 0, 0},        {0, M *H, 0},     {P * H, P *H, M}, {M * H, 0, 0},    {0, 0, 0},        {0, M *L, 0},     {P * L, P *L, P}, {M * L, 0, 0}   },
97   {{0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, M}, {0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, P}},
98   {{M * L, M *L, M}, {P * L, 0, 0},    {0, 0, 0},        {0, P *L, 0},     {M * H, M *H, P}, {P * H, 0, 0},    {0, 0, 0},        {0, P *H, 0}    },
99   {{M * L, 0, 0},    {P * L, M *L, M}, {0, P *L, 0},     {0, 0, 0},        {M * H, 0, 0},    {P * H, M *H, P}, {0, P *H, 0},     {0, 0, 0}       },
100   {{0, 0, 0},        {0, M *L, 0},     {P * L, P *L, M}, {M * L, 0, 0},    {0, 0, 0},        {0, M *H, 0},     {P * H, P *H, P}, {M * H, 0, 0}   },
101   {{0, M *L, 0},     {0, 0, 0},        {P * L, 0, 0},    {M * L, P *L, M}, {0, M *H, 0},     {0, 0, 0},        {P * H, 0, 0},    {M * H, P *H, P}}
102 };
103 /* Stanndard Gauss */
104 static const PetscReal HexQInterp_Gauss[8][8] = {
105   {H * H * H, L *H *H, L *L *H, H *L *H, H *H *L, L *H *L, L *L *L, H *L *L},
106   {L * H * H, H *H *H, H *L *H, L *L *H, L *H *L, H *H *L, H *L *L, L *L *L},
107   {L * L * H, H *L *H, H *H *H, L *H *H, L *L *L, H *L *L, H *H *L, L *H *L},
108   {H * L * H, L *L *H, L *H *H, H *H *H, H *L *L, L *L *L, L *H *L, H *H *L},
109   {H * H * L, L *H *L, L *L *L, H *L *L, H *H *H, L *H *H, L *L *H, H *L *H},
110   {L * H * L, H *H *L, H *L *L, L *L *L, L *H *H, H *H *H, H *L *H, L *L *H},
111   {L * L * L, H *L *L, H *H *L, L *H *L, L *L *H, H *L *H, H *H *H, L *H *H},
112   {H * L * L, L *L *L, L *H *L, H *H *L, H *L *H, L *L *H, L *H *H, H *H *H}
113 };
114 static const PetscReal HexQDeriv_Gauss[8][8][3] = {
115   {{M * H * H, H *M *H, H *H *M}, {P * H * H, L *M *H, L *H *M}, {P * L * H, L *P *H, L *L *M}, {M * L * H, H *P *H, H *L *M}, {M * H * L, H *M *L, H *H *P}, {P * H * L, L *M *L, L *H *P}, {P * L * L, L *P *L, L *L *P}, {M * L * L, H *P *L, H *L *P}},
116   {{M * H * H, L *M *H, L *H *M}, {P * H * H, H *M *H, H *H *M}, {P * L * H, H *P *H, H *L *M}, {M * L * H, L *P *H, L *L *M}, {M * H * L, L *M *L, L *H *P}, {P * H * L, H *M *L, H *H *P}, {P * L * L, H *P *L, H *L *P}, {M * L * L, L *P *L, L *L *P}},
117   {{M * L * H, L *M *H, L *L *M}, {P * L * H, H *M *H, H *L *M}, {P * H * H, H *P *H, H *H *M}, {M * H * H, L *P *H, L *H *M}, {M * L * L, L *M *L, L *L *P}, {P * L * L, H *M *L, H *L *P}, {P * H * L, H *P *L, H *H *P}, {M * H * L, L *P *L, L *H *P}},
118   {{M * L * H, H *M *H, H *L *M}, {P * L * H, L *M *H, L *L *M}, {P * H * H, L *P *H, L *H *M}, {M * H * H, H *P *H, H *H *M}, {M * L * L, H *M *L, H *L *P}, {P * L * L, L *M *L, L *L *P}, {P * H * L, L *P *L, L *H *P}, {M * H * L, H *P *L, H *H *P}},
119   {{M * H * L, H *M *L, H *H *M}, {P * H * L, L *M *L, L *H *M}, {P * L * L, L *P *L, L *L *M}, {M * L * L, H *P *L, H *L *M}, {M * H * H, H *M *H, H *H *P}, {P * H * H, L *M *H, L *H *P}, {P * L * H, L *P *H, L *L *P}, {M * L * H, H *P *H, H *L *P}},
120   {{M * H * L, L *M *L, L *H *M}, {P * H * L, H *M *L, H *H *M}, {P * L * L, H *P *L, H *L *M}, {M * L * L, L *P *L, L *L *M}, {M * H * H, L *M *H, L *H *P}, {P * H * H, H *M *H, H *H *P}, {P * L * H, H *P *H, H *L *P}, {M * L * H, L *P *H, L *L *P}},
121   {{M * L * L, L *M *L, L *L *M}, {P * L * L, H *M *L, H *L *M}, {P * H * L, H *P *L, H *H *M}, {M * H * L, L *P *L, L *H *M}, {M * L * H, L *M *H, L *L *P}, {P * L * H, H *M *H, H *L *P}, {P * H * H, H *P *H, H *H *P}, {M * H * H, L *P *H, L *H *P}},
122   {{M * L * L, H *M *L, H *L *M}, {P * L * L, L *M *L, L *L *M}, {P * H * L, L *P *L, L *H *M}, {M * H * L, H *P *L, H *H *M}, {M * L * H, H *M *H, H *L *P}, {P * L * H, L *M *H, L *L *P}, {P * H * H, L *P *H, L *H *P}, {M * H * H, H *P *H, H *H *P}}
123 };
124 static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3];
125 /* Standard 2x2 Gauss quadrature for the bottom layer. */
126 static const PetscReal QuadQInterp[4][4] = {
127   {H * H, L *H, L *L, H *L},
128   {L * H, H *H, H *L, L *L},
129   {L * L, H *L, H *H, L *H},
130   {H * L, L *L, L *H, H *H}
131 };
132 static const PetscReal QuadQDeriv[4][4][2] = {
133   {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}},
134   {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}},
135   {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}},
136   {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}}
137 };
138 #undef G
139 #undef H
140 #undef L
141 #undef M
142 #undef P
143 
144 #define HexExtract(x, i, j, k, n) \
145   do { \
146     (n)[0] = (x)[i][j][k]; \
147     (n)[1] = (x)[i + 1][j][k]; \
148     (n)[2] = (x)[i + 1][j + 1][k]; \
149     (n)[3] = (x)[i][j + 1][k]; \
150     (n)[4] = (x)[i][j][k + 1]; \
151     (n)[5] = (x)[i + 1][j][k + 1]; \
152     (n)[6] = (x)[i + 1][j + 1][k + 1]; \
153     (n)[7] = (x)[i][j + 1][k + 1]; \
154   } while (0)
155 
156 #define HexExtractRef(x, i, j, k, n) \
157   do { \
158     (n)[0] = &(x)[i][j][k]; \
159     (n)[1] = &(x)[i + 1][j][k]; \
160     (n)[2] = &(x)[i + 1][j + 1][k]; \
161     (n)[3] = &(x)[i][j + 1][k]; \
162     (n)[4] = &(x)[i][j][k + 1]; \
163     (n)[5] = &(x)[i + 1][j][k + 1]; \
164     (n)[6] = &(x)[i + 1][j + 1][k + 1]; \
165     (n)[7] = &(x)[i][j + 1][k + 1]; \
166   } while (0)
167 
168 #define QuadExtract(x, i, j, n) \
169   do { \
170     (n)[0] = (x)[i][j]; \
171     (n)[1] = (x)[i + 1][j]; \
172     (n)[2] = (x)[i + 1][j + 1]; \
173     (n)[3] = (x)[i][j + 1]; \
174   } while (0)
175 
176 static PetscScalar Sqr(PetscScalar a) {
177   return a * a;
178 }
179 
180 static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[]) {
181   PetscInt i;
182   dz[0] = dz[1] = dz[2] = 0;
183   for (i = 0; i < 8; i++) {
184     dz[0] += dphi[i][0] * zn[i];
185     dz[1] += dphi[i][1] * zn[i];
186     dz[2] += dphi[i][2] * zn[i];
187   }
188 }
189 
190 static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[restrict], PetscReal phi[restrict], PetscReal dphi[restrict][3], PetscReal *restrict jw) {
191   const PetscReal jac[3][3] =
192     {
193       {hx / 2, 0,      0    },
194       {0,      hy / 2, 0    },
195       {dz[0],  dz[1],  dz[2]}
196   },
197                   ijac[3][3] = {{1 / jac[0][0], 0, 0}, {0, 1 / jac[1][1], 0}, {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]}}, jdet = jac[0][0] * jac[1][1] * jac[2][2];
198   PetscInt i;
199 
200   for (i = 0; i < 8; i++) {
201     const PetscReal *dphir = HexQDeriv[q][i];
202     phi[i]                 = HexQInterp[q][i];
203     dphi[i][0]             = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0];
204     dphi[i][1]             = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1];
205     dphi[i][2]             = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2];
206   }
207   *jw = 1.0 * jdet;
208 }
209 
210 typedef struct _p_THI   *THI;
211 typedef struct _n_Units *Units;
212 
213 typedef struct {
214   PetscScalar u, v;
215 } Node;
216 
217 typedef struct {
218   PetscScalar b;     /* bed */
219   PetscScalar h;     /* thickness */
220   PetscScalar beta2; /* friction */
221 } PrmNode;
222 
223 #define FieldSize(ntype)             ((PetscInt)(sizeof(ntype) / sizeof(PetscScalar)))
224 #define FieldOffset(ntype, member)   ((PetscInt)(offsetof(ntype, member) / sizeof(PetscScalar)))
225 #define FieldIndex(ntype, i, member) ((PetscInt)((i)*FieldSize(ntype) + FieldOffset(ntype, member)))
226 #define NODE_SIZE                    FieldSize(Node)
227 #define PRMNODE_SIZE                 FieldSize(PrmNode)
228 
229 typedef struct {
230   PetscReal min, max, cmin, cmax;
231 } PRange;
232 
233 struct _p_THI {
234   PETSCHEADER(int);
235   void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p);
236   PetscInt  nlevels;
237   PetscInt  zlevels;
238   PetscReal Lx, Ly, Lz; /* Model domain */
239   PetscReal alpha;      /* Bed angle */
240   Units     units;
241   PetscReal dirichlet_scale;
242   PetscReal ssa_friction_scale;
243   PetscReal inertia;
244   PRange    eta;
245   PRange    beta2;
246   struct {
247     PetscReal Bd2, eps, exponent, glen_n;
248   } viscosity;
249   struct {
250     PetscReal irefgam, eps2, exponent;
251   } friction;
252   struct {
253     PetscReal rate, exponent, refvel;
254   } erosion;
255   PetscReal rhog;
256   PetscBool no_slip;
257   PetscBool verbose;
258   char     *mattype;
259   char     *monitor_basename;
260   PetscInt  monitor_interval;
261 };
262 
263 struct _n_Units {
264   /* fundamental */
265   PetscReal meter;
266   PetscReal kilogram;
267   PetscReal second;
268   /* derived */
269   PetscReal Pascal;
270   PetscReal year;
271 };
272 
273 static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[]) {
274   const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1,       pn[1].b + pn[1].h * (PetscScalar)k / zm1,       pn[2].b + pn[2].h * (PetscScalar)k / zm1,       pn[3].b + pn[3].h * (PetscScalar)k / zm1,
275                                             pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1};
276   PetscInt          i;
277   for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]);
278 }
279 
280 /* Compute a gradient of all the 2D fields at four quadrature points.  Output for [quadrature_point][direction].field_name */
281 static PetscErrorCode QuadComputeGrad4(const PetscReal dphi[][4][2], PetscReal hx, PetscReal hy, const PrmNode pn[4], PrmNode dp[4][2]) {
282   PetscInt q, i, f;
283   const PetscScalar(*restrict pg)[PRMNODE_SIZE] = (const PetscScalar(*)[PRMNODE_SIZE])pn; /* Get generic array pointers to the node */
284   PetscScalar(*restrict dpg)[2][PRMNODE_SIZE]   = (PetscScalar(*)[2][PRMNODE_SIZE])dp;
285 
286   PetscFunctionBeginUser;
287   PetscCall(PetscArrayzero(dpg, 4));
288   for (q = 0; q < 4; q++) {
289     for (i = 0; i < 4; i++) {
290       for (f = 0; f < PRMNODE_SIZE; f++) {
291         dpg[q][0][f] += dphi[q][i][0] / hx * pg[i][f];
292         dpg[q][1][f] += dphi[q][i][1] / hy * pg[i][f];
293       }
294     }
295   }
296   PetscFunctionReturn(0);
297 }
298 
299 static inline PetscReal StaggeredMidpoint2D(PetscScalar a, PetscScalar b, PetscScalar c, PetscScalar d) {
300   return 0.5 * PetscRealPart(0.75 * a + 0.75 * b + 0.25 * c + 0.25 * d);
301 }
302 static inline PetscReal UpwindFlux1D(PetscReal u, PetscReal hL, PetscReal hR) {
303   return (u > 0) ? hL * u : hR * u;
304 }
305 
306 #define UpwindFluxXW(x3, x2, h, i, j, k, dj) \
307   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i - 1][j][k].u, x3[i - 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i - 1][j].h + 0.25 * x2[i - 1][j + dj].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h))
308 #define UpwindFluxXE(x3, x2, h, i, j, k, dj) \
309   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].u, x3[i + 1][j][k].u, x3[i + 1][j + dj][k].u, x3[i][k + dj][k].u), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i][j + dj].h), PetscRealPart(0.75 * x2[i + 1][j].h + 0.25 * x2[i + 1][j + dj].h))
310 #define UpwindFluxYS(x3, x2, h, i, j, k, di) \
311   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j - 1][k].v, x3[i + di][j - 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j - 1].h + 0.25 * x2[i + di][j - 1].h), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h))
312 #define UpwindFluxYN(x3, x2, h, i, j, k, di) \
313   UpwindFlux1D(StaggeredMidpoint2D(x3[i][j][k].v, x3[i][j + 1][k].v, x3[i + di][j + 1][k].v, x3[i + di][j][k].v), PetscRealPart(0.75 * x2[i][j].h + 0.25 * x2[i + di][j].h), PetscRealPart(0.75 * x2[i][j + 1].h + 0.25 * x2[i + di][j + 1].h))
314 
315 static void PrmNodeGetFaceMeasure(const PrmNode **p, PetscInt i, PetscInt j, PetscScalar h[]) {
316   /* West */
317   h[0] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j - 1].h, p[i][j - 1].h);
318   h[1] = StaggeredMidpoint2D(p[i][j].h, p[i - 1][j].h, p[i - 1][j + 1].h, p[i][j + 1].h);
319   /* East */
320   h[2] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j + 1].h, p[i][j + 1].h);
321   h[3] = StaggeredMidpoint2D(p[i][j].h, p[i + 1][j].h, p[i + 1][j - 1].h, p[i][j - 1].h);
322   /* South */
323   h[4] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i + 1][j - 1].h, p[i + 1][j].h);
324   h[5] = StaggeredMidpoint2D(p[i][j].h, p[i][j - 1].h, p[i - 1][j - 1].h, p[i - 1][j].h);
325   /* North */
326   h[6] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i - 1][j + 1].h, p[i - 1][j].h);
327   h[7] = StaggeredMidpoint2D(p[i][j].h, p[i][j + 1].h, p[i + 1][j + 1].h, p[i + 1][j].h);
328 }
329 
330 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
331 static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p) {
332   Units     units = thi->units;
333   PetscReal s     = -x * PetscSinReal(thi->alpha);
334   p->b            = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly);
335   p->h            = s - p->b;
336   p->beta2        = -1e-10; /* This value is not used, but it should not be huge because that would change the finite difference step size  */
337 }
338 
339 static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p) {
340   Units     units = thi->units;
341   PetscReal s     = -x * PetscSinReal(thi->alpha);
342   p->b            = s - 1000 * units->meter;
343   p->h            = s - p->b;
344   /* tau_b = beta2 v   is a stress (Pa).
345    * This is a big number in our units (it needs to balance the driving force from the surface), so we scale it by 1/rhog, just like the residual. */
346   p->beta2        = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter / thi->rhog;
347 }
348 
349 /* These are just toys */
350 
351 /* From Fred Herman */
352 static void THIInitialize_HOM_F(THI thi, PetscReal x, PetscReal y, PrmNode *p) {
353   Units     units = thi->units;
354   PetscReal s     = -x * PetscSinReal(thi->alpha);
355   p->b            = s - 1000 * units->meter + 100 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx); /* * sin(y*2*PETSC_PI/thi->Ly); */
356   p->h            = s - p->b;
357   p->h            = (1 - (atan((x - thi->Lx / 2) / 1.) + PETSC_PI / 2.) / PETSC_PI) * 500 * units->meter + 1 * units->meter;
358   s               = PetscRealPart(p->b + p->h);
359   p->beta2        = -1e-10;
360   /*  p->beta2 = 1000 * units->Pascal * units->year / units->meter; */
361 }
362 
363 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
364 static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) {
365   Units     units = thi->units;
366   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
367   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
368   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
369   p->h     = s - p->b;
370   p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter / thi->rhog;
371 }
372 
373 /* Like Z, but with 200 meter cliffs */
374 static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) {
375   Units     units = thi->units;
376   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
377   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
378   p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
379   if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter;
380   p->h     = s - p->b;
381   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
382 }
383 
384 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */
385 static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) {
386   Units     units = thi->units;
387   PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */
388   PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha);
389   p->b     = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI);
390   p->h     = s - p->b;
391   p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter / thi->rhog;
392 }
393 
394 static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2) {
395   if (thi->friction.irefgam == 0) {
396     Units units           = thi->units;
397     thi->friction.irefgam = 1. / (0.5 * PetscSqr(100 * units->meter / units->year));
398     thi->friction.eps2    = 0.5 * PetscSqr(1.e-4 / thi->friction.irefgam);
399   }
400   if (thi->friction.exponent == 0) {
401     *beta2  = rbeta2;
402     *dbeta2 = 0;
403   } else {
404     *beta2  = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent);
405     *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam;
406   }
407 }
408 
409 static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta) {
410   PetscReal Bd2, eps, exponent;
411   if (thi->viscosity.Bd2 == 0) {
412     Units           units   = thi->units;
413     const PetscReal n       = thi->viscosity.glen_n,                                  /* Glen exponent */
414       p                     = 1. + 1. / n,                                            /* for Stokes */
415       A                     = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */
416       B                     = PetscPowReal(A, -1. / n);                               /* hardness parameter */
417     thi->viscosity.Bd2      = B / 2;
418     thi->viscosity.exponent = (p - 2) / 2;
419     thi->viscosity.eps      = 0.5 * PetscSqr(1e-5 / units->year);
420   }
421   Bd2      = thi->viscosity.Bd2;
422   exponent = thi->viscosity.exponent;
423   eps      = thi->viscosity.eps;
424   *eta     = Bd2 * PetscPowReal(eps + gam, exponent);
425   *deta    = exponent * (*eta) / (eps + gam);
426 }
427 
428 static void THIErosion(THI thi, const Node *vel, PetscScalar *erate, Node *derate) {
429   const PetscScalar magref2 = 1.e-10 + (PetscSqr(vel->u) + PetscSqr(vel->v)) / PetscSqr(thi->erosion.refvel), rate = -thi->erosion.rate * PetscPowScalar(magref2, 0.5 * thi->erosion.exponent);
430   if (erate) *erate = rate;
431   if (derate) {
432     if (thi->erosion.exponent == 1) {
433       derate->u = 0;
434       derate->v = 0;
435     } else {
436       derate->u = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->u / PetscSqr(thi->erosion.refvel);
437       derate->v = 0.5 * thi->erosion.exponent * rate / magref2 * 2. * vel->v / PetscSqr(thi->erosion.refvel);
438     }
439   }
440 }
441 
442 static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x) {
443   if (x < *min) *min = x;
444   if (x > *max) *max = x;
445 }
446 
447 static void PRangeClear(PRange *p) {
448   p->cmin = p->min = 1e100;
449   p->cmax = p->max = -1e100;
450 }
451 
452 static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max) {
453   PetscFunctionBeginUser;
454   p->cmin = min;
455   p->cmax = max;
456   if (min < p->min) p->min = min;
457   if (max > p->max) p->max = max;
458   PetscFunctionReturn(0);
459 }
460 
461 static PetscErrorCode THIDestroy(THI *thi) {
462   PetscFunctionBeginUser;
463   if (--((PetscObject)(*thi))->refct > 0) PetscFunctionReturn(0);
464   PetscCall(PetscFree((*thi)->units));
465   PetscCall(PetscFree((*thi)->mattype));
466   PetscCall(PetscFree((*thi)->monitor_basename));
467   PetscCall(PetscHeaderDestroy(thi));
468   PetscFunctionReturn(0);
469 }
470 
471 static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi) {
472   static PetscBool registered = PETSC_FALSE;
473   THI              thi;
474   Units            units;
475   char             monitor_basename[PETSC_MAX_PATH_LEN] = "thi-";
476   PetscErrorCode   ierr;
477 
478   PetscFunctionBeginUser;
479   *inthi = 0;
480   if (!registered) {
481     PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID));
482     registered = PETSC_TRUE;
483   }
484   PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "THI", comm, THIDestroy, 0));
485 
486   PetscCall(PetscNew(&thi->units));
487 
488   units           = thi->units;
489   units->meter    = 1e-2;
490   units->second   = 1e-7;
491   units->kilogram = 1e-12;
492 
493   PetscOptionsBegin(comm, NULL, "Scaled units options", "");
494   {
495     PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL));
496     PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL));
497     PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL));
498   }
499   PetscOptionsEnd();
500   units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
501   units->year   = 31556926. * units->second, /* seconds per year */
502 
503     thi->Lx            = 10.e3;
504   thi->Ly              = 10.e3;
505   thi->Lz              = 1000;
506   thi->nlevels         = 1;
507   thi->dirichlet_scale = 1;
508   thi->verbose         = PETSC_FALSE;
509 
510   thi->viscosity.glen_n = 3.;
511   thi->erosion.rate     = 1e-3; /* m/a */
512   thi->erosion.exponent = 1.;
513   thi->erosion.refvel   = 1.; /* m/a */
514 
515   PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", "");
516   {
517     QuadratureType quad       = QUAD_GAUSS;
518     char           homexp[]   = "A";
519     char           mtype[256] = MATSBAIJ;
520     PetscReal      L, m = 1.0;
521     PetscBool      flg;
522     L = thi->Lx;
523     PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg));
524     if (flg) thi->Lx = thi->Ly = L;
525     PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL));
526     PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL));
527     PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL));
528     PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL));
529     switch (homexp[0] = toupper(homexp[0])) {
530     case 'A':
531       thi->initialize = THIInitialize_HOM_A;
532       thi->no_slip    = PETSC_TRUE;
533       thi->alpha      = 0.5;
534       break;
535     case 'C':
536       thi->initialize = THIInitialize_HOM_C;
537       thi->no_slip    = PETSC_FALSE;
538       thi->alpha      = 0.1;
539       break;
540     case 'F':
541       thi->initialize = THIInitialize_HOM_F;
542       thi->no_slip    = PETSC_FALSE;
543       thi->alpha      = 0.5;
544       break;
545     case 'X':
546       thi->initialize = THIInitialize_HOM_X;
547       thi->no_slip    = PETSC_FALSE;
548       thi->alpha      = 0.3;
549       break;
550     case 'Y':
551       thi->initialize = THIInitialize_HOM_Y;
552       thi->no_slip    = PETSC_FALSE;
553       thi->alpha      = 0.5;
554       break;
555     case 'Z':
556       thi->initialize = THIInitialize_HOM_Z;
557       thi->no_slip    = PETSC_FALSE;
558       thi->alpha      = 0.5;
559       break;
560     default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]);
561     }
562     PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL));
563     switch (quad) {
564     case QUAD_GAUSS:
565       HexQInterp = HexQInterp_Gauss;
566       HexQDeriv  = HexQDeriv_Gauss;
567       break;
568     case QUAD_LOBATTO:
569       HexQInterp = HexQInterp_Lobatto;
570       HexQDeriv  = HexQDeriv_Lobatto;
571       break;
572     }
573     PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL));
574     PetscCall(PetscOptionsReal("-thi_viscosity_glen_n", "Exponent in Glen flow law, 1=linear, infty=ideal plastic", NULL, thi->viscosity.glen_n, &thi->viscosity.glen_n, NULL));
575     PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL));
576     thi->friction.exponent = (m - 1) / 2;
577     PetscCall(PetscOptionsReal("-thi_erosion_rate", "Rate of erosion relative to sliding velocity at reference velocity (m/a)", NULL, thi->erosion.rate, &thi->erosion.rate, NULL));
578     PetscCall(PetscOptionsReal("-thi_erosion_exponent", "Power of sliding velocity appearing in erosion relation", NULL, thi->erosion.exponent, &thi->erosion.exponent, NULL));
579     PetscCall(PetscOptionsReal("-thi_erosion_refvel", "Reference sliding velocity for erosion (m/a)", NULL, thi->erosion.refvel, &thi->erosion.refvel, NULL));
580     thi->erosion.rate *= units->meter / units->year;
581     thi->erosion.refvel *= units->meter / units->year;
582     PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL));
583     PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL));
584     PetscCall(PetscOptionsReal("-thi_inertia", "Coefficient of accelaration term in velocity system, physical is almost zero", NULL, thi->inertia, &thi->inertia, NULL));
585     PetscCall(PetscOptionsInt("-thi_nlevels", "Number of levels of refinement", "", thi->nlevels, &thi->nlevels, NULL));
586     PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL));
587     PetscCall(PetscStrallocpy(mtype, &thi->mattype));
588     PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL));
589     PetscCall(PetscOptionsString("-thi_monitor", "Basename to write state files to", NULL, monitor_basename, monitor_basename, sizeof(monitor_basename), &flg));
590     if (flg) {
591       PetscCall(PetscStrallocpy(monitor_basename, &thi->monitor_basename));
592       thi->monitor_interval = 1;
593       PetscCall(PetscOptionsInt("-thi_monitor_interval", "Frequency at which to write state files", NULL, thi->monitor_interval, &thi->monitor_interval, NULL));
594     }
595   }
596   PetscOptionsEnd();
597 
598   /* dimensionalize */
599   thi->Lx *= units->meter;
600   thi->Ly *= units->meter;
601   thi->Lz *= units->meter;
602   thi->alpha *= PETSC_PI / 180;
603 
604   PRangeClear(&thi->eta);
605   PRangeClear(&thi->beta2);
606 
607   {
608     PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowRealInt(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second),
609               driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter;
610     THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta);
611     thi->rhog = rho * grav;
612     if (thi->verbose) {
613       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g  second %8.2g  kg %8.2g  Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal));
614       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving));
615       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu, 2 * eta * gradu / driving)));
616       THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta);
617       PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a  %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu, 2 * eta * 1e-3 * gradu / driving)));
618     }
619   }
620 
621   *inthi = thi;
622   PetscFunctionReturn(0);
623 }
624 
625 /* Our problem is periodic, but the domain has a mean slope of alpha so the bed does not line up between the upstream
626  * and downstream ends of the domain.  This function fixes the ghost values so that the domain appears truly periodic in
627  * the horizontal. */
628 static PetscErrorCode THIFixGhosts(THI thi, DM da3, DM da2, Vec X3, Vec X2) {
629   DMDALocalInfo info;
630   PrmNode     **x2;
631   PetscInt      i, j;
632 
633   PetscFunctionBeginUser;
634   PetscCall(DMDAGetLocalInfo(da3, &info));
635   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
636   PetscCall(DMDAVecGetArray(da2, X2, &x2));
637   for (i = info.gzs; i < info.gzs + info.gzm; i++) {
638     if (i > -1 && i < info.mz) continue;
639     for (j = info.gys; j < info.gys + info.gym; j++) x2[i][j].b += PetscSinReal(thi->alpha) * thi->Lx * (i < 0 ? 1.0 : -1.0);
640   }
641   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
642   /* PetscCall(VecView(X2,PETSC_VIEWER_STDOUT_WORLD)); */
643   PetscFunctionReturn(0);
644 }
645 
646 static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, PrmNode **p) {
647   PetscInt i, j, xs, xm, ys, ym, mx, my;
648 
649   PetscFunctionBeginUser;
650   PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0));
651   PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
652   for (i = xs; i < xs + xm; i++) {
653     for (j = ys; j < ys + ym; j++) {
654       PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my;
655       thi->initialize(thi, xx, yy, &p[i][j]);
656     }
657   }
658   PetscFunctionReturn(0);
659 }
660 
661 static PetscErrorCode THIInitial(THI thi, DM pack, Vec X) {
662   DM        da3, da2;
663   PetscInt  i, j, k, xs, xm, ys, ym, zs, zm, mx, my;
664   PetscReal hx, hy;
665   PrmNode **prm;
666   Node   ***x;
667   Vec       X3g, X2g, X2;
668 
669   PetscFunctionBeginUser;
670   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
671   PetscCall(DMCompositeGetAccess(pack, X, &X3g, &X2g));
672   PetscCall(DMGetLocalVector(da2, &X2));
673 
674   PetscCall(DMDAGetInfo(da3, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
675   PetscCall(DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm));
676   PetscCall(DMDAVecGetArray(da3, X3g, &x));
677   PetscCall(DMDAVecGetArray(da2, X2, &prm));
678 
679   PetscCall(THIInitializePrm(thi, da2, prm));
680 
681   hx = thi->Lx / mx;
682   hy = thi->Ly / my;
683   for (i = xs; i < xs + xm; i++) {
684     for (j = ys; j < ys + ym; j++) {
685       for (k = zs; k < zs + zm; k++) {
686         const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy);
687         x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1;
688         x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1;
689       }
690     }
691   }
692 
693   PetscCall(DMDAVecRestoreArray(da3, X3g, &x));
694   PetscCall(DMDAVecRestoreArray(da2, X2, &prm));
695 
696   PetscCall(DMLocalToGlobalBegin(da2, X2, INSERT_VALUES, X2g));
697   PetscCall(DMLocalToGlobalEnd(da2, X2, INSERT_VALUES, X2g));
698   PetscCall(DMRestoreLocalVector(da2, &X2));
699 
700   PetscCall(DMCompositeRestoreAccess(pack, X, &X3g, &X2g));
701   PetscFunctionReturn(0);
702 }
703 
704 static void PointwiseNonlinearity(THI thi, const Node n[restrict 8], const PetscReal phi[restrict 3], PetscReal dphi[restrict 8][3], PetscScalar *restrict u, PetscScalar *restrict v, PetscScalar du[restrict 3], PetscScalar dv[restrict 3], PetscReal *eta, PetscReal *deta) {
705   PetscInt    l, ll;
706   PetscScalar gam;
707 
708   du[0] = du[1] = du[2] = 0;
709   dv[0] = dv[1] = dv[2] = 0;
710   *u                    = 0;
711   *v                    = 0;
712   for (l = 0; l < 8; l++) {
713     *u += phi[l] * n[l].u;
714     *v += phi[l] * n[l].v;
715     for (ll = 0; ll < 3; ll++) {
716       du[ll] += dphi[l][ll] * n[l].u;
717       dv[ll] += dphi[l][ll] * n[l].v;
718     }
719   }
720   gam = Sqr(du[0]) + Sqr(dv[1]) + du[0] * dv[1] + 0.25 * Sqr(du[1] + dv[0]) + 0.25 * Sqr(du[2]) + 0.25 * Sqr(dv[2]);
721   THIViscosity(thi, PetscRealPart(gam), eta, deta);
722 }
723 
724 static PetscErrorCode THIFunctionLocal_3D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const Node ***xdot, Node ***f, THI thi) {
725   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l;
726   PetscReal hx, hy, etamin, etamax, beta2min, beta2max;
727 
728   PetscFunctionBeginUser;
729   xs = info->zs;
730   ys = info->ys;
731   xm = info->zm;
732   ym = info->ym;
733   zm = info->xm;
734   hx = thi->Lx / info->mz;
735   hy = thi->Ly / info->my;
736 
737   etamin   = 1e100;
738   etamax   = 0;
739   beta2min = 1e100;
740   beta2max = 0;
741 
742   for (i = xs; i < xs + xm; i++) {
743     for (j = ys; j < ys + ym; j++) {
744       PrmNode pn[4], dpn[4][2];
745       QuadExtract(prm, i, j, pn);
746       PetscCall(QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn));
747       for (k = 0; k < zm - 1; k++) {
748         PetscInt  ls = 0;
749         Node      n[8], ndot[8], *fn[8];
750         PetscReal zn[8], etabase = 0;
751 
752         PrmHexGetZ(pn, k, zm, zn);
753         HexExtract(x, i, j, k, n);
754         HexExtract(xdot, i, j, k, ndot);
755         HexExtractRef(f, i, j, k, fn);
756         if (thi->no_slip && k == 0) {
757           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
758           /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
759           ls = 4;
760         }
761         for (q = 0; q < 8; q++) {
762           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
763           PetscScalar du[3], dv[3], u, v, udot = 0, vdot = 0;
764           for (l = ls; l < 8; l++) {
765             udot += HexQInterp[q][l] * ndot[l].u;
766             vdot += HexQInterp[q][l] * ndot[l].v;
767           }
768           HexGrad(HexQDeriv[q], zn, dz);
769           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
770           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
771           jw /= thi->rhog; /* scales residuals to be O(1) */
772           if (q == 0) etabase = eta;
773           RangeUpdate(&etamin, &etamax, eta);
774           for (l = ls; l < 8; l++) { /* test functions */
775             const PetscScalar ds[2] = {dpn[q % 4][0].h + dpn[q % 4][0].b, dpn[q % 4][1].h + dpn[q % 4][1].b};
776             const PetscReal   pp = phi[l], *dp = dphi[l];
777             fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0];
778             fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1];
779             fn[l]->u += pp * jw * udot * thi->inertia * pp;
780             fn[l]->v += pp * jw * vdot * thi->inertia * pp;
781           }
782         }
783         if (k == 0) { /* we are on a bottom face */
784           if (thi->no_slip) {
785             /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
786             * conditions.  After shenanigans above, etabase contains the effective viscosity at the closest quadrature
787             * point to the bed.  We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
788             * diagonal entry corresponding to the adjacent node.  The fundamental scaling of the viscous part is in
789             * diagu, diagv below.  This scaling is easy to recognize by considering the finite difference operator after
790             * scaling by element size.  The no-slip Dirichlet condition is scaled by this factor, and also in the
791             * assembled matrix (see the similar block in THIJacobianLocal).
792             *
793             * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends
794             * (nonlinearly in general) on the neighboring interior nodes through the local viscosity.  This will make
795             * a matrix-free Jacobian have extra entries in the corresponding row.  We assemble only the diagonal part,
796             * so the solution will exactly satisfy the boundary condition after the first linear iteration.
797             */
798             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1.);
799             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
800             fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u;
801             fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v;
802           } else {                    /* Integrate over bottom face to apply boundary condition */
803             for (q = 0; q < 4; q++) { /* We remove the explicit scaling of the residual by 1/rhog because beta2 already has that scaling to be O(1) */
804               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
805               PetscScalar     u = 0, v = 0, rbeta2 = 0;
806               PetscReal       beta2, dbeta2;
807               for (l = 0; l < 4; l++) {
808                 u += phi[l] * n[l].u;
809                 v += phi[l] * n[l].v;
810                 rbeta2 += phi[l] * pn[l].beta2;
811               }
812               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
813               RangeUpdate(&beta2min, &beta2max, beta2);
814               for (l = 0; l < 4; l++) {
815                 const PetscReal pp = phi[l];
816                 fn[ls + l]->u += pp * jw * beta2 * u;
817                 fn[ls + l]->v += pp * jw * beta2 * v;
818               }
819             }
820           }
821         }
822       }
823     }
824   }
825 
826   PetscCall(PRangeMinMax(&thi->eta, etamin, etamax));
827   PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max));
828   PetscFunctionReturn(0);
829 }
830 
831 static PetscErrorCode THIFunctionLocal_2D(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, const PrmNode **prmdot, PrmNode **f, THI thi) {
832   PetscInt xs, ys, xm, ym, zm, i, j, k;
833 
834   PetscFunctionBeginUser;
835   xs = info->zs;
836   ys = info->ys;
837   xm = info->zm;
838   ym = info->ym;
839   zm = info->xm;
840 
841   for (i = xs; i < xs + xm; i++) {
842     for (j = ys; j < ys + ym; j++) {
843       PetscScalar div = 0, erate, h[8];
844       PrmNodeGetFaceMeasure(prm, i, j, h);
845       for (k = 0; k < zm; k++) {
846         PetscScalar weight = (k == 0 || k == zm - 1) ? 0.5 / (zm - 1) : 1.0 / (zm - 1);
847         if (0) { /* centered flux */
848           div += (-weight * h[0] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j - 1][k].u, x[i][j - 1][k].u) - weight * h[1] * StaggeredMidpoint2D(x[i][j][k].u, x[i - 1][j][k].u, x[i - 1][j + 1][k].u, x[i][j + 1][k].u) +
849                   weight * h[2] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j + 1][k].u, x[i][j + 1][k].u) + weight * h[3] * StaggeredMidpoint2D(x[i][j][k].u, x[i + 1][j][k].u, x[i + 1][j - 1][k].u, x[i][j - 1][k].u) -
850                   weight * h[4] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i + 1][j - 1][k].v, x[i + 1][j][k].v) - weight * h[5] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j - 1][k].v, x[i - 1][j - 1][k].v, x[i - 1][j][k].v) +
851                   weight * h[6] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i - 1][j + 1][k].v, x[i - 1][j][k].v) + weight * h[7] * StaggeredMidpoint2D(x[i][j][k].v, x[i][j + 1][k].v, x[i + 1][j + 1][k].v, x[i + 1][j][k].v));
852         } else { /* Upwind flux */
853           div += weight * (-UpwindFluxXW(x, prm, h, i, j, k, 1) - UpwindFluxXW(x, prm, h, i, j, k, -1) + UpwindFluxXE(x, prm, h, i, j, k, 1) + UpwindFluxXE(x, prm, h, i, j, k, -1) - UpwindFluxYS(x, prm, h, i, j, k, 1) - UpwindFluxYS(x, prm, h, i, j, k, -1) + UpwindFluxYN(x, prm, h, i, j, k, 1) + UpwindFluxYN(x, prm, h, i, j, k, -1));
854         }
855       }
856       /* printf("div[%d][%d] %g\n",i,j,div); */
857       THIErosion(thi, &x[i][j][0], &erate, NULL);
858       f[i][j].b     = prmdot[i][j].b - erate;
859       f[i][j].h     = prmdot[i][j].h + div;
860       f[i][j].beta2 = prmdot[i][j].beta2;
861     }
862   }
863   PetscFunctionReturn(0);
864 }
865 
866 static PetscErrorCode THIFunction(TS ts, PetscReal t, Vec X, Vec Xdot, Vec F, void *ctx) {
867   THI             thi = (THI)ctx;
868   DM              pack, da3, da2;
869   Vec             X3, X2, Xdot3, Xdot2, F3, F2, F3g, F2g;
870   const Node   ***x3, ***xdot3;
871   const PrmNode **x2, **xdot2;
872   Node         ***f3;
873   PrmNode       **f2;
874   DMDALocalInfo   info3;
875 
876   PetscFunctionBeginUser;
877   PetscCall(TSGetDM(ts, &pack));
878   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
879   PetscCall(DMDAGetLocalInfo(da3, &info3));
880   PetscCall(DMCompositeGetLocalVectors(pack, &X3, &X2));
881   PetscCall(DMCompositeGetLocalVectors(pack, &Xdot3, &Xdot2));
882   PetscCall(DMCompositeScatter(pack, X, X3, X2));
883   PetscCall(THIFixGhosts(thi, da3, da2, X3, X2));
884   PetscCall(DMCompositeScatter(pack, Xdot, Xdot3, Xdot2));
885 
886   PetscCall(DMGetLocalVector(da3, &F3));
887   PetscCall(DMGetLocalVector(da2, &F2));
888   PetscCall(VecZeroEntries(F3));
889 
890   PetscCall(DMDAVecGetArray(da3, X3, &x3));
891   PetscCall(DMDAVecGetArray(da2, X2, &x2));
892   PetscCall(DMDAVecGetArray(da3, Xdot3, &xdot3));
893   PetscCall(DMDAVecGetArray(da2, Xdot2, &xdot2));
894   PetscCall(DMDAVecGetArray(da3, F3, &f3));
895   PetscCall(DMDAVecGetArray(da2, F2, &f2));
896 
897   PetscCall(THIFunctionLocal_3D(&info3, x3, x2, xdot3, f3, thi));
898   PetscCall(THIFunctionLocal_2D(&info3, x3, x2, xdot2, f2, thi));
899 
900   PetscCall(DMDAVecRestoreArray(da3, X3, &x3));
901   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
902   PetscCall(DMDAVecRestoreArray(da3, Xdot3, &xdot3));
903   PetscCall(DMDAVecRestoreArray(da2, Xdot2, &xdot2));
904   PetscCall(DMDAVecRestoreArray(da3, F3, &f3));
905   PetscCall(DMDAVecRestoreArray(da2, F2, &f2));
906 
907   PetscCall(DMCompositeRestoreLocalVectors(pack, &X3, &X2));
908   PetscCall(DMCompositeRestoreLocalVectors(pack, &Xdot3, &Xdot2));
909 
910   PetscCall(VecZeroEntries(F));
911   PetscCall(DMCompositeGetAccess(pack, F, &F3g, &F2g));
912   PetscCall(DMLocalToGlobalBegin(da3, F3, ADD_VALUES, F3g));
913   PetscCall(DMLocalToGlobalEnd(da3, F3, ADD_VALUES, F3g));
914   PetscCall(DMLocalToGlobalBegin(da2, F2, INSERT_VALUES, F2g));
915   PetscCall(DMLocalToGlobalEnd(da2, F2, INSERT_VALUES, F2g));
916 
917   if (thi->verbose) {
918     PetscViewer viewer;
919     PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)thi), &viewer));
920     PetscCall(PetscViewerASCIIPrintf(viewer, "3D_Velocity residual (bs=2):\n"));
921     PetscCall(PetscViewerASCIIPushTab(viewer));
922     PetscCall(VecView(F3, viewer));
923     PetscCall(PetscViewerASCIIPopTab(viewer));
924     PetscCall(PetscViewerASCIIPrintf(viewer, "2D_Fields residual (bs=3):\n"));
925     PetscCall(PetscViewerASCIIPushTab(viewer));
926     PetscCall(VecView(F2, viewer));
927     PetscCall(PetscViewerASCIIPopTab(viewer));
928   }
929 
930   PetscCall(DMCompositeRestoreAccess(pack, F, &F3g, &F2g));
931 
932   PetscCall(DMRestoreLocalVector(da3, &F3));
933   PetscCall(DMRestoreLocalVector(da2, &F2));
934   PetscFunctionReturn(0);
935 }
936 
937 static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer) {
938   PetscReal   nrm;
939   PetscInt    m;
940   PetscMPIInt rank;
941 
942   PetscFunctionBeginUser;
943   PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm));
944   PetscCall(MatGetSize(B, &m, 0));
945   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank));
946   if (rank == 0) {
947     PetscScalar val0, val2;
948     PetscCall(MatGetValue(B, 0, 0, &val0));
949     PetscCall(MatGetValue(B, 2, 2, &val2));
950     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %8" PetscInt_FMT "  norm %8.2e, (0,0) %8.2e  (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2), (double)thi->eta.cmin,
951                                      (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 static PetscErrorCode THISurfaceStatistics(DM pack, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean) {
957   DM          da3, da2;
958   Vec         X3, X2;
959   Node     ***x;
960   PetscInt    i, j, xs, ys, zs, xm, ym, zm, mx, my, mz;
961   PetscReal   umin = 1e100, umax = -1e100;
962   PetscScalar usum = 0.0, gusum;
963 
964   PetscFunctionBeginUser;
965   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
966   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
967   *min = *max = *mean = 0;
968   PetscCall(DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
969   PetscCall(DMDAGetCorners(da3, &zs, &ys, &xs, &zm, &ym, &xm));
970   PetscCheck(zs == 0 && zm == mz, PETSC_COMM_WORLD, PETSC_ERR_PLIB, "Unexpected decomposition");
971   PetscCall(DMDAVecGetArray(da3, X3, &x));
972   for (i = xs; i < xs + xm; i++) {
973     for (j = ys; j < ys + ym; j++) {
974       PetscReal u = PetscRealPart(x[i][j][zm - 1].u);
975       RangeUpdate(&umin, &umax, u);
976       usum += u;
977     }
978   }
979   PetscCall(DMDAVecRestoreArray(da3, X3, &x));
980   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
981 
982   PetscCallMPI(MPI_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da3)));
983   PetscCallMPI(MPI_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da3)));
984   PetscCallMPI(MPI_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da3)));
985   *mean = PetscRealPart(gusum) / (mx * my);
986   PetscFunctionReturn(0);
987 }
988 
989 static PetscErrorCode THISolveStatistics(THI thi, TS ts, PetscInt coarsened, const char name[]) {
990   MPI_Comm comm;
991   DM       pack;
992   Vec      X, X3, X2;
993 
994   PetscFunctionBeginUser;
995   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
996   PetscCall(TSGetDM(ts, &pack));
997   PetscCall(TSGetSolution(ts, &X));
998   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
999   PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name));
1000   {
1001     PetscInt            its, lits;
1002     SNESConvergedReason reason;
1003     SNES                snes;
1004     PetscCall(TSGetSNES(ts, &snes));
1005     PetscCall(SNESGetIterationNumber(snes, &its));
1006     PetscCall(SNESGetConvergedReason(snes, &reason));
1007     PetscCall(SNESGetLinearSolveIterations(snes, &lits));
1008     PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits));
1009   }
1010   {
1011     PetscReal    nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3];
1012     PetscInt     i, j, m;
1013     PetscScalar *x;
1014     PetscCall(VecNorm(X3, NORM_2, &nrm2));
1015     PetscCall(VecGetLocalSize(X3, &m));
1016     PetscCall(VecGetArray(X3, &x));
1017     for (i = 0; i < m; i += 2) {
1018       PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v);
1019       tmin[0] = PetscMin(u, tmin[0]);
1020       tmin[1] = PetscMin(v, tmin[1]);
1021       tmin[2] = PetscMin(c, tmin[2]);
1022       tmax[0] = PetscMax(u, tmax[0]);
1023       tmax[1] = PetscMax(v, tmax[1]);
1024       tmax[2] = PetscMax(c, tmax[2]);
1025     }
1026     PetscCall(VecRestoreArray(X, &x));
1027     PetscCallMPI(MPI_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi)));
1028     PetscCallMPI(MPI_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi)));
1029     /* Dimensionalize to meters/year */
1030     nrm2 *= thi->units->year / thi->units->meter;
1031     for (j = 0; j < 3; j++) {
1032       min[j] *= thi->units->year / thi->units->meter;
1033       max[j] *= thi->units->year / thi->units->meter;
1034     }
1035     PetscCall(PetscPrintf(comm, "|X|_2 %g   u in [%g, %g]   v in [%g, %g]   c in [%g, %g] \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2]));
1036     {
1037       PetscReal umin, umax, umean;
1038       PetscCall(THISurfaceStatistics(pack, X, &umin, &umax, &umean));
1039       umin *= thi->units->year / thi->units->meter;
1040       umax *= thi->units->year / thi->units->meter;
1041       umean *= thi->units->year / thi->units->meter;
1042       PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean));
1043     }
1044     /* These values stay nondimensional */
1045     PetscCall(PetscPrintf(comm, "Global eta range   [%g, %g], converged range [%g, %g]\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax));
1046     PetscCall(PetscPrintf(comm, "Global beta2 range [%g, %g], converged range [%g, %g]\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax));
1047   }
1048   PetscCall(PetscPrintf(comm, "\n"));
1049   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 static inline PetscInt DMDALocalIndex3D(DMDALocalInfo *info, PetscInt i, PetscInt j, PetscInt k) {
1054   return ((i - info->gzs) * info->gym + (j - info->gys)) * info->gxm + (k - info->gxs);
1055 }
1056 static inline PetscInt DMDALocalIndex2D(DMDALocalInfo *info, PetscInt i, PetscInt j) {
1057   return (i - info->gzs) * info->gym + (j - info->gys);
1058 }
1059 
1060 static PetscErrorCode THIJacobianLocal_Momentum(DMDALocalInfo *info, const Node ***x, const PrmNode **prm, Mat B, Mat Bcpl, THI thi) {
1061   PetscInt  xs, ys, xm, ym, zm, i, j, k, q, l, ll;
1062   PetscReal hx, hy;
1063 
1064   PetscFunctionBeginUser;
1065   xs = info->zs;
1066   ys = info->ys;
1067   xm = info->zm;
1068   ym = info->ym;
1069   zm = info->xm;
1070   hx = thi->Lx / info->mz;
1071   hy = thi->Ly / info->my;
1072 
1073   for (i = xs; i < xs + xm; i++) {
1074     for (j = ys; j < ys + ym; j++) {
1075       PrmNode pn[4], dpn[4][2];
1076       QuadExtract(prm, i, j, pn);
1077       PetscCall(QuadComputeGrad4(QuadQDeriv, hx, hy, pn, dpn));
1078       for (k = 0; k < zm - 1; k++) {
1079         Node        n[8];
1080         PetscReal   zn[8], etabase = 0;
1081         PetscScalar Ke[8 * NODE_SIZE][8 * NODE_SIZE], Kcpl[8 * NODE_SIZE][4 * PRMNODE_SIZE];
1082         PetscInt    ls = 0;
1083 
1084         PrmHexGetZ(pn, k, zm, zn);
1085         HexExtract(x, i, j, k, n);
1086         PetscCall(PetscMemzero(Ke, sizeof(Ke)));
1087         PetscCall(PetscMemzero(Kcpl, sizeof(Kcpl)));
1088         if (thi->no_slip && k == 0) {
1089           for (l = 0; l < 4; l++) n[l].u = n[l].v = 0;
1090           ls = 4;
1091         }
1092         for (q = 0; q < 8; q++) {
1093           PetscReal   dz[3], phi[8], dphi[8][3], jw, eta, deta;
1094           PetscScalar du[3], dv[3], u, v;
1095           HexGrad(HexQDeriv[q], zn, dz);
1096           HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw);
1097           PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta);
1098           jw /= thi->rhog; /* residuals are scaled by this factor */
1099           if (q == 0) etabase = eta;
1100           for (l = ls; l < 8; l++) { /* test functions */
1101             const PetscReal pp = phi[l], *restrict dp = dphi[l];
1102             for (ll = ls; ll < 8; ll++) { /* trial functions */
1103               const PetscReal *restrict dpl = dphi[ll];
1104               PetscScalar dgdu, dgdv;
1105               dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2];
1106               dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2];
1107               /* Picard part */
1108               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2];
1109               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0];
1110               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1];
1111               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2];
1112               /* extra Newton terms */
1113               Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2];
1114               Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2];
1115               Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2];
1116               Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2];
1117               /* inertial part */
1118               Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * thi->inertia * pp;
1119               Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * thi->inertia * pp;
1120             }
1121             for (ll = 0; ll < 4; ll++) {                                                              /* Trial functions for surface/bed */
1122               const PetscReal dpl[] = {QuadQDeriv[q % 4][ll][0] / hx, QuadQDeriv[q % 4][ll][1] / hy}; /* surface = h + b */
1123               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[0];
1124               Kcpl[FieldIndex(Node, l, u)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[0];
1125               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, h)] += pp * jw * thi->rhog * dpl[1];
1126               Kcpl[FieldIndex(Node, l, v)][FieldIndex(PrmNode, ll, b)] += pp * jw * thi->rhog * dpl[1];
1127             }
1128           }
1129         }
1130         if (k == 0) { /* on a bottom face */
1131           if (thi->no_slip) {
1132             const PetscReal   hz    = PetscRealPart(pn[0].h) / (zm - 1);
1133             const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx);
1134             Ke[0][0] = thi->dirichlet_scale * diagu;
1135             Ke[0][1] = 0;
1136             Ke[1][0] = 0;
1137             Ke[1][1] = thi->dirichlet_scale * diagv;
1138           } else {
1139             for (q = 0; q < 4; q++) { /* We remove the explicit scaling by 1/rhog because beta2 already has that scaling to be O(1) */
1140               const PetscReal jw = 0.25 * hx * hy, *phi = QuadQInterp[q];
1141               PetscScalar     u = 0, v = 0, rbeta2 = 0;
1142               PetscReal       beta2, dbeta2;
1143               for (l = 0; l < 4; l++) {
1144                 u += phi[l] * n[l].u;
1145                 v += phi[l] * n[l].v;
1146                 rbeta2 += phi[l] * pn[l].beta2;
1147               }
1148               THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2);
1149               for (l = 0; l < 4; l++) {
1150                 const PetscReal pp = phi[l];
1151                 for (ll = 0; ll < 4; ll++) {
1152                   const PetscReal ppl = phi[ll];
1153                   Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl;
1154                   Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl;
1155                   Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl;
1156                   Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl;
1157                 }
1158               }
1159             }
1160           }
1161         }
1162         {
1163           const PetscInt rc3blocked[8]                 = {DMDALocalIndex3D(info, i + 0, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 0, k + 0), DMDALocalIndex3D(info, i + 1, j + 1, k + 0), DMDALocalIndex3D(info, i + 0, j + 1, k + 0),
1164                                                           DMDALocalIndex3D(info, i + 0, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 0, k + 1), DMDALocalIndex3D(info, i + 1, j + 1, k + 1), DMDALocalIndex3D(info, i + 0, j + 1, k + 1)},
1165                          col2blocked[PRMNODE_SIZE * 4] = {DMDALocalIndex2D(info, i + 0, j + 0), DMDALocalIndex2D(info, i + 1, j + 0), DMDALocalIndex2D(info, i + 1, j + 1), DMDALocalIndex2D(info, i + 0, j + 1)};
1166 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1167           for (l = 0; l < 8; l++) {
1168             for (ll = l + 1; ll < 8; ll++) {
1169               Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0];
1170               Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1];
1171               Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0];
1172               Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1];
1173             }
1174           }
1175 #endif
1176           PetscCall(MatSetValuesBlockedLocal(B, 8, rc3blocked, 8, rc3blocked, &Ke[0][0], ADD_VALUES)); /* velocity-velocity coupling can use blocked insertion */
1177           {                                                                                            /* The off-diagonal part cannot (yet) */
1178             PetscInt row3scalar[NODE_SIZE * 8], col2scalar[PRMNODE_SIZE * 4];
1179             for (l = 0; l < 8; l++)
1180               for (ll = 0; ll < NODE_SIZE; ll++) row3scalar[l * NODE_SIZE + ll] = rc3blocked[l] * NODE_SIZE + ll;
1181             for (l = 0; l < 4; l++)
1182               for (ll = 0; ll < PRMNODE_SIZE; ll++) col2scalar[l * PRMNODE_SIZE + ll] = col2blocked[l] * PRMNODE_SIZE + ll;
1183             PetscCall(MatSetValuesLocal(Bcpl, 8 * NODE_SIZE, row3scalar, 4 * PRMNODE_SIZE, col2scalar, &Kcpl[0][0], ADD_VALUES));
1184           }
1185         }
1186       }
1187     }
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, const Node ***x3, const PrmNode **x2, const PrmNode **xdot2, PetscReal a, Mat B22, Mat B21, THI thi) {
1193   PetscInt xs, ys, xm, ym, zm, i, j, k;
1194 
1195   PetscFunctionBeginUser;
1196   xs = info->zs;
1197   ys = info->ys;
1198   xm = info->zm;
1199   ym = info->ym;
1200   zm = info->xm;
1201 
1202   PetscCheck(zm <= 1024, ((PetscObject)info->da)->comm, PETSC_ERR_SUP, "Need to allocate more space");
1203   for (i = xs; i < xs + xm; i++) {
1204     for (j = ys; j < ys + ym; j++) {
1205       { /* Self-coupling */
1206         const PetscInt    row[]  = {DMDALocalIndex2D(info, i, j)};
1207         const PetscInt    col[]  = {DMDALocalIndex2D(info, i, j)};
1208         const PetscScalar vals[] = {a, 0, 0, 0, a, 0, 0, 0, a};
1209         PetscCall(MatSetValuesBlockedLocal(B22, 1, row, 1, col, vals, INSERT_VALUES));
1210       }
1211       for (k = 0; k < zm; k++) { /* Coupling to velocity problem */
1212         /* Use a cheaper quadrature than for residual evaluation, because it is much sparser */
1213         const PetscInt    row[]  = {FieldIndex(PrmNode, DMDALocalIndex2D(info, i, j), h)};
1214         const PetscInt    cols[] = {FieldIndex(Node, DMDALocalIndex3D(info, i - 1, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), u), FieldIndex(Node, DMDALocalIndex3D(info, i + 1, j, k), u),
1215                                     FieldIndex(Node, DMDALocalIndex3D(info, i, j - 1, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j, k), v), FieldIndex(Node, DMDALocalIndex3D(info, i, j + 1, k), v)};
1216         const PetscScalar w = (k && k < zm - 1) ? 0.5 : 0.25, hW = w * (x2[i - 1][j].h + x2[i][j].h) / (zm - 1.), hE = w * (x2[i][j].h + x2[i + 1][j].h) / (zm - 1.), hS = w * (x2[i][j - 1].h + x2[i][j].h) / (zm - 1.),
1217                           hN = w * (x2[i][j].h + x2[i][j + 1].h) / (zm - 1.);
1218         PetscScalar *vals, vals_upwind[] = {((PetscRealPart(x3[i][j][k].u) > 0) ? -hW : 0), ((PetscRealPart(x3[i][j][k].u) > 0) ? +hE : -hW), ((PetscRealPart(x3[i][j][k].u) > 0) ? 0 : +hE),
1219                                             ((PetscRealPart(x3[i][j][k].v) > 0) ? -hS : 0), ((PetscRealPart(x3[i][j][k].v) > 0) ? +hN : -hS), ((PetscRealPart(x3[i][j][k].v) > 0) ? 0 : +hN)},
1220                            vals_centered[] = {-0.5 * hW, 0.5 * (-hW + hE), 0.5 * hE, -0.5 * hS, 0.5 * (-hS + hN), 0.5 * hN};
1221         vals                               = 1 ? vals_upwind : vals_centered;
1222         if (k == 0) {
1223           Node derate;
1224           THIErosion(thi, &x3[i][j][0], NULL, &derate);
1225           vals[1] -= derate.u;
1226           vals[4] -= derate.v;
1227         }
1228         PetscCall(MatSetValuesLocal(B21, 1, row, 6, cols, vals, INSERT_VALUES));
1229       }
1230     }
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 static PetscErrorCode THIJacobian(TS ts, PetscReal t, Vec X, Vec Xdot, PetscReal a, Mat A, Mat B, void *ctx) {
1236   THI             thi = (THI)ctx;
1237   DM              pack, da3, da2;
1238   Vec             X3, X2, Xdot2;
1239   Mat             B11, B12, B21, B22;
1240   DMDALocalInfo   info3;
1241   IS             *isloc;
1242   const Node   ***x3;
1243   const PrmNode **x2, **xdot2;
1244 
1245   PetscFunctionBeginUser;
1246   PetscCall(TSGetDM(ts, &pack));
1247   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1248   PetscCall(DMDAGetLocalInfo(da3, &info3));
1249   PetscCall(DMCompositeGetLocalVectors(pack, &X3, &X2));
1250   PetscCall(DMCompositeGetLocalVectors(pack, NULL, &Xdot2));
1251   PetscCall(DMCompositeScatter(pack, X, X3, X2));
1252   PetscCall(THIFixGhosts(thi, da3, da2, X3, X2));
1253   PetscCall(DMCompositeScatter(pack, Xdot, NULL, Xdot2));
1254 
1255   PetscCall(MatZeroEntries(B));
1256 
1257   PetscCall(DMCompositeGetLocalISs(pack, &isloc));
1258   PetscCall(MatGetLocalSubMatrix(B, isloc[0], isloc[0], &B11));
1259   PetscCall(MatGetLocalSubMatrix(B, isloc[0], isloc[1], &B12));
1260   PetscCall(MatGetLocalSubMatrix(B, isloc[1], isloc[0], &B21));
1261   PetscCall(MatGetLocalSubMatrix(B, isloc[1], isloc[1], &B22));
1262 
1263   PetscCall(DMDAVecGetArray(da3, X3, &x3));
1264   PetscCall(DMDAVecGetArray(da2, X2, &x2));
1265   PetscCall(DMDAVecGetArray(da2, Xdot2, &xdot2));
1266 
1267   PetscCall(THIJacobianLocal_Momentum(&info3, x3, x2, B11, B12, thi));
1268 
1269   /* Need to switch from ADD_VALUES to INSERT_VALUES */
1270   PetscCall(MatAssemblyBegin(B, MAT_FLUSH_ASSEMBLY));
1271   PetscCall(MatAssemblyEnd(B, MAT_FLUSH_ASSEMBLY));
1272 
1273   PetscCall(THIJacobianLocal_2D(&info3, x3, x2, xdot2, a, B22, B21, thi));
1274 
1275   PetscCall(DMDAVecRestoreArray(da3, X3, &x3));
1276   PetscCall(DMDAVecRestoreArray(da2, X2, &x2));
1277   PetscCall(DMDAVecRestoreArray(da2, Xdot2, &xdot2));
1278 
1279   PetscCall(MatRestoreLocalSubMatrix(B, isloc[0], isloc[0], &B11));
1280   PetscCall(MatRestoreLocalSubMatrix(B, isloc[0], isloc[1], &B12));
1281   PetscCall(MatRestoreLocalSubMatrix(B, isloc[1], isloc[0], &B21));
1282   PetscCall(MatRestoreLocalSubMatrix(B, isloc[1], isloc[1], &B22));
1283   PetscCall(ISDestroy(&isloc[0]));
1284   PetscCall(ISDestroy(&isloc[1]));
1285   PetscCall(PetscFree(isloc));
1286 
1287   PetscCall(DMCompositeRestoreLocalVectors(pack, &X3, &X2));
1288   PetscCall(DMCompositeRestoreLocalVectors(pack, 0, &Xdot2));
1289 
1290   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1291   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1292   if (A != B) {
1293     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1294     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1295   }
1296   if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD));
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 /* VTK's XML formats are so brain-dead that they can't handle multiple grids in the same file.  Since the communication
1301  * can be shared between the two grids, we write two files at once, one for velocity (living on a 3D grid defined by
1302  * h=thickness and b=bed) and another for all properties living on the 2D grid.
1303  */
1304 static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM pack, Vec X, const char filename[], const char filename2[]) {
1305   const PetscInt dof = NODE_SIZE, dof2 = PRMNODE_SIZE;
1306   Units          units = thi->units;
1307   MPI_Comm       comm;
1308   PetscViewer    viewer3, viewer2;
1309   PetscMPIInt    rank, size, tag, nn, nmax, nn2, nmax2;
1310   PetscInt       mx, my, mz, r, range[6];
1311   PetscScalar   *x, *x2;
1312   DM             da3, da2;
1313   Vec            X3, X2;
1314 
1315   PetscFunctionBeginUser;
1316   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1317   PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1318   PetscCall(DMCompositeGetAccess(pack, X, &X3, &X2));
1319   PetscCall(DMDAGetInfo(da3, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1320   PetscCallMPI(MPI_Comm_size(comm, &size));
1321   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1322   PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer3));
1323   PetscCall(PetscViewerASCIIOpen(comm, filename2, &viewer2));
1324   PetscCall(PetscViewerASCIIPrintf(viewer3, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1325   PetscCall(PetscViewerASCIIPrintf(viewer2, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n"));
1326   PetscCall(PetscViewerASCIIPrintf(viewer3, "  <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1));
1327   PetscCall(PetscViewerASCIIPrintf(viewer2, "  <StructuredGrid WholeExtent=\"%d %d %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, 0, 0, my - 1, 0, mx - 1));
1328 
1329   PetscCall(DMDAGetCorners(da3, range, range + 1, range + 2, range + 3, range + 4, range + 5));
1330   PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn));
1331   PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm));
1332   PetscCall(PetscMPIIntCast(range[4] * range[5] * dof2, &nn2));
1333   PetscCallMPI(MPI_Reduce(&nn2, &nmax2, 1, MPI_INT, MPI_MAX, 0, comm));
1334   tag = ((PetscObject)viewer3)->tag;
1335   PetscCall(VecGetArrayRead(X3, (const PetscScalar **)&x));
1336   PetscCall(VecGetArrayRead(X2, (const PetscScalar **)&x2));
1337   if (rank == 0) {
1338     PetscScalar *array, *array2;
1339     PetscCall(PetscMalloc2(nmax, &array, nmax2, &array2));
1340     for (r = 0; r < size; r++) {
1341       PetscInt i, j, k, f, xs, xm, ys, ym, zs, zm;
1342       Node    *y3;
1343       PetscScalar(*y2)[PRMNODE_SIZE];
1344       MPI_Status status;
1345 
1346       if (r) PetscCallMPI(MPI_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE));
1347       zs = range[0];
1348       ys = range[1];
1349       xs = range[2];
1350       zm = range[3];
1351       ym = range[4];
1352       xm = range[5];
1353       PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen");
1354       if (r) {
1355         PetscCallMPI(MPI_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status));
1356         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn));
1357         PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "corrupt da3 send");
1358         y3 = (Node *)array;
1359         PetscCallMPI(MPI_Recv(array2, nmax2, MPIU_SCALAR, r, tag, comm, &status));
1360         PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn2));
1361         PetscCheck(nn2 == xm * ym * dof2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "corrupt da2 send");
1362         y2 = (PetscScalar(*)[PRMNODE_SIZE])array2;
1363       } else {
1364         y3 = (Node *)x;
1365         y2 = (PetscScalar(*)[PRMNODE_SIZE])x2;
1366       }
1367       PetscCall(PetscViewerASCIIPrintf(viewer3, "    <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1));
1368       PetscCall(PetscViewerASCIIPrintf(viewer2, "    <Piece Extent=\"%d %d %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", 0, 0, ys, ys + ym - 1, xs, xs + xm - 1));
1369 
1370       PetscCall(PetscViewerASCIIPrintf(viewer3, "      <Points>\n"));
1371       PetscCall(PetscViewerASCIIPrintf(viewer2, "      <Points>\n"));
1372       PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1373       PetscCall(PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1374       for (i = xs; i < xs + xm; i++) {
1375         for (j = ys; j < ys + ym; j++) {
1376           PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, b = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, b)]), h = PetscRealPart(y2[i * ym + j][FieldOffset(PrmNode, h)]);
1377           for (k = zs; k < zs + zm; k++) {
1378             PetscReal zz = b + h * k / (mz - 1.);
1379             PetscCall(PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)xx, (double)yy, (double)zz));
1380           }
1381           PetscCall(PetscViewerASCIIPrintf(viewer2, "%f %f %f\n", (double)xx, (double)yy, (double)0.0));
1382         }
1383       }
1384       PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1385       PetscCall(PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n"));
1386       PetscCall(PetscViewerASCIIPrintf(viewer3, "      </Points>\n"));
1387       PetscCall(PetscViewerASCIIPrintf(viewer2, "      </Points>\n"));
1388 
1389       { /* Velocity and rank (3D) */
1390         PetscCall(PetscViewerASCIIPrintf(viewer3, "      <PointData>\n"));
1391         PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n"));
1392         for (i = 0; i < nn / dof; i++) PetscCall(PetscViewerASCIIPrintf(viewer3, "%f %f %f\n", (double)(PetscRealPart(y3[i].u) * units->year / units->meter), (double)(PetscRealPart(y3[i].v) * units->year / units->meter), 0.0));
1393         PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1394 
1395         PetscCall(PetscViewerASCIIPrintf(viewer3, "        <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n"));
1396         for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer3, "%" PetscInt_FMT "\n", r));
1397         PetscCall(PetscViewerASCIIPrintf(viewer3, "        </DataArray>\n"));
1398         PetscCall(PetscViewerASCIIPrintf(viewer3, "      </PointData>\n"));
1399       }
1400 
1401       { /* 2D */
1402         PetscCall(PetscViewerASCIIPrintf(viewer2, "      <PointData>\n"));
1403         for (f = 0; f < PRMNODE_SIZE; f++) {
1404           const char *fieldname;
1405           PetscCall(DMDAGetFieldName(da2, f, &fieldname));
1406           PetscCall(PetscViewerASCIIPrintf(viewer2, "        <DataArray type=\"Float32\" Name=\"%s\" format=\"ascii\">\n", fieldname));
1407           for (i = 0; i < nn2 / PRMNODE_SIZE; i++) PetscCall(PetscViewerASCIIPrintf(viewer2, "%g\n", (double)y2[i][f]));
1408           PetscCall(PetscViewerASCIIPrintf(viewer2, "        </DataArray>\n"));
1409         }
1410         PetscCall(PetscViewerASCIIPrintf(viewer2, "      </PointData>\n"));
1411       }
1412 
1413       PetscCall(PetscViewerASCIIPrintf(viewer3, "    </Piece>\n"));
1414       PetscCall(PetscViewerASCIIPrintf(viewer2, "    </Piece>\n"));
1415     }
1416     PetscCall(PetscFree2(array, array2));
1417   } else {
1418     PetscCallMPI(MPI_Send(range, 6, MPIU_INT, 0, tag, comm));
1419     PetscCallMPI(MPI_Send(x, nn, MPIU_SCALAR, 0, tag, comm));
1420     PetscCallMPI(MPI_Send(x2, nn2, MPIU_SCALAR, 0, tag, comm));
1421   }
1422   PetscCall(VecRestoreArrayRead(X3, (const PetscScalar **)&x));
1423   PetscCall(VecRestoreArrayRead(X2, (const PetscScalar **)&x2));
1424   PetscCall(PetscViewerASCIIPrintf(viewer3, "  </StructuredGrid>\n"));
1425   PetscCall(PetscViewerASCIIPrintf(viewer2, "  </StructuredGrid>\n"));
1426 
1427   PetscCall(DMCompositeRestoreAccess(pack, X, &X3, &X2));
1428   PetscCall(PetscViewerASCIIPrintf(viewer3, "</VTKFile>\n"));
1429   PetscCall(PetscViewerASCIIPrintf(viewer2, "</VTKFile>\n"));
1430   PetscCall(PetscViewerDestroy(&viewer3));
1431   PetscCall(PetscViewerDestroy(&viewer2));
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 static PetscErrorCode THITSMonitor(TS ts, PetscInt step, PetscReal t, Vec X, void *ctx) {
1436   THI  thi = (THI)ctx;
1437   DM   pack;
1438   char filename3[PETSC_MAX_PATH_LEN], filename2[PETSC_MAX_PATH_LEN];
1439 
1440   PetscFunctionBeginUser;
1441   if (step < 0) PetscFunctionReturn(0); /* negative one is used to indicate an interpolated solution */
1442   PetscCall(PetscPrintf(PetscObjectComm((PetscObject)ts), "%3" PetscInt_FMT ": t=%g\n", step, (double)t));
1443   if (thi->monitor_interval && step % thi->monitor_interval) PetscFunctionReturn(0);
1444   PetscCall(TSGetDM(ts, &pack));
1445   PetscCall(PetscSNPrintf(filename3, sizeof(filename3), "%s-3d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step));
1446   PetscCall(PetscSNPrintf(filename2, sizeof(filename2), "%s-2d-%03" PetscInt_FMT ".vts", thi->monitor_basename, step));
1447   PetscCall(THIDAVecView_VTK_XML(thi, pack, X, filename3, filename2));
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 static PetscErrorCode THICreateDM3d(THI thi, DM *dm3d) {
1452   MPI_Comm comm;
1453   PetscInt M = 3, N = 3, P = 2;
1454   DM       da;
1455 
1456   PetscFunctionBeginUser;
1457   PetscCall(PetscObjectGetComm((PetscObject)thi, &comm));
1458   PetscOptionsBegin(comm, NULL, "Grid resolution options", "");
1459   {
1460     PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL));
1461     N = M;
1462     PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL));
1463     PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL));
1464   }
1465   PetscOptionsEnd();
1466   PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da));
1467   PetscCall(DMSetFromOptions(da));
1468   PetscCall(DMSetUp(da));
1469   PetscCall(DMDASetFieldName(da, 0, "x-velocity"));
1470   PetscCall(DMDASetFieldName(da, 1, "y-velocity"));
1471   *dm3d = da;
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 int main(int argc, char *argv[]) {
1476   MPI_Comm  comm;
1477   DM        pack, da3, da2;
1478   TS        ts;
1479   THI       thi;
1480   Vec       X;
1481   Mat       B;
1482   PetscInt  i, steps;
1483   PetscReal ftime;
1484 
1485   PetscFunctionBeginUser;
1486   PetscCall(PetscInitialize(&argc, &argv, 0, help));
1487   comm = PETSC_COMM_WORLD;
1488 
1489   PetscCall(THICreate(comm, &thi));
1490   PetscCall(THICreateDM3d(thi, &da3));
1491   {
1492     PetscInt        Mx, My, mx, my, s;
1493     DMDAStencilType st;
1494     PetscCall(DMDAGetInfo(da3, 0, 0, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st));
1495     PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2));
1496     PetscCall(DMSetUp(da2));
1497   }
1498 
1499   PetscCall(PetscObjectSetName((PetscObject)da3, "3D_Velocity"));
1500   PetscCall(DMSetOptionsPrefix(da3, "f3d_"));
1501   PetscCall(DMDASetFieldName(da3, 0, "u"));
1502   PetscCall(DMDASetFieldName(da3, 1, "v"));
1503   PetscCall(PetscObjectSetName((PetscObject)da2, "2D_Fields"));
1504   PetscCall(DMSetOptionsPrefix(da2, "f2d_"));
1505   PetscCall(DMDASetFieldName(da2, 0, "b"));
1506   PetscCall(DMDASetFieldName(da2, 1, "h"));
1507   PetscCall(DMDASetFieldName(da2, 2, "beta2"));
1508   PetscCall(DMCompositeCreate(comm, &pack));
1509   PetscCall(DMCompositeAddDM(pack, da3));
1510   PetscCall(DMCompositeAddDM(pack, da2));
1511   PetscCall(DMDestroy(&da3));
1512   PetscCall(DMDestroy(&da2));
1513   PetscCall(DMSetUp(pack));
1514   PetscCall(DMCreateMatrix(pack, &B));
1515   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1516   PetscCall(MatSetOptionsPrefix(B, "thi_"));
1517 
1518   for (i = 0; i < thi->nlevels; i++) {
1519     PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter;
1520     PetscInt  Mx, My, Mz;
1521     PetscCall(DMCompositeGetEntries(pack, &da3, &da2));
1522     PetscCall(DMDAGetInfo(da3, 0, &Mz, &My, &Mx, 0, 0, 0, 0, 0, 0, 0, 0, 0));
1523     PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n", i, Lx, Ly, Lz, Mx, My, Mz, Mx * My * Mz, Lx / Mx, Ly / My, 1000. / (Mz - 1)));
1524   }
1525 
1526   PetscCall(DMCreateGlobalVector(pack, &X));
1527   PetscCall(THIInitial(thi, pack, X));
1528 
1529   PetscCall(TSCreate(comm, &ts));
1530   PetscCall(TSSetDM(ts, pack));
1531   PetscCall(TSSetProblemType(ts, TS_NONLINEAR));
1532   PetscCall(TSMonitorSet(ts, THITSMonitor, thi, NULL));
1533   PetscCall(TSSetType(ts, TSTHETA));
1534   PetscCall(TSSetIFunction(ts, NULL, THIFunction, thi));
1535   PetscCall(TSSetIJacobian(ts, B, B, THIJacobian, thi));
1536   PetscCall(TSSetMaxTime(ts, 10.0));
1537   PetscCall(TSSetExactFinalTime(ts, TS_EXACTFINALTIME_STEPOVER));
1538   PetscCall(TSSetSolution(ts, X));
1539   PetscCall(TSSetTimeStep(ts, 1e-3));
1540   PetscCall(TSSetFromOptions(ts));
1541 
1542   PetscCall(TSSolve(ts, X));
1543   PetscCall(TSGetSolveTime(ts, &ftime));
1544   PetscCall(TSGetStepNumber(ts, &steps));
1545   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Steps %" PetscInt_FMT "  final time %g\n", steps, (double)ftime));
1546 
1547   if (0) PetscCall(THISolveStatistics(thi, ts, 0, "Full"));
1548 
1549   {
1550     PetscBool flg;
1551     char      filename[PETSC_MAX_PATH_LEN] = "";
1552     PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg));
1553     if (flg) PetscCall(THIDAVecView_VTK_XML(thi, pack, X, filename, NULL));
1554   }
1555 
1556   PetscCall(VecDestroy(&X));
1557   PetscCall(MatDestroy(&B));
1558   PetscCall(DMDestroy(&pack));
1559   PetscCall(TSDestroy(&ts));
1560   PetscCall(THIDestroy(&thi));
1561   PetscCall(PetscFinalize());
1562   return 0;
1563 }
1564