1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\ 2 \n\ 3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\ 4 using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\ 5 to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\ 6 boundary conditions in the x- and y-directions.\n\ 7 \n\ 8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\ 9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\ 10 \n\ 11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\ 12 \n\n"; 13 14 /* 15 The equations for horizontal velocity (u,v) are 16 17 - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0 18 - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0 19 20 where 21 22 eta = B/2 (epsilon + gamma)^((p-2)/2) 23 24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B, 25 written in terms of the second invariant 26 27 gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2 28 29 The surface boundary conditions are the natural conditions. The basal boundary conditions 30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2. 31 32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1). 33 34 The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the 35 map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through 36 the Jacobian of the coordinate transformation from a reference element to the physical element. 37 38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed 39 specially so that columns are never distributed, and are always contiguous in memory. 40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation, 41 and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to 42 use compatible domain decomposition relative to the 3D DMDAs. 43 44 There are two compile-time options: 45 46 NO_SSE2: 47 If the host supports SSE2, we use integration code that has been vectorized with SSE2 48 intrinsics, unless this macro is defined. The intrinsics speed up integration by about 49 30% on my architecture (P8700, gcc-4.5 snapshot). 50 51 COMPUTE_LOWER_TRIANGULAR: 52 The element matrices we assemble are lower-triangular so it is not necessary to compute 53 all entries explicitly. If this macro is defined, the lower-triangular entries are 54 computed explicitly. 55 56 */ 57 58 #if defined(PETSC_APPLE_FRAMEWORK) 59 #import <PETSc/petscsnes.h> 60 #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 61 #else 62 63 #include <petscsnes.h> 64 #include <petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 65 #endif 66 #include <ctype.h> /* toupper() */ 67 68 #if defined(__cplusplus) || defined(PETSC_HAVE_WINDOWS_COMPILERS) || defined(__PGI) 69 /* c++ cannot handle [_restrict_] notation like C does */ 70 #undef PETSC_RESTRICT 71 #define PETSC_RESTRICT 72 #endif 73 74 #if defined __SSE2__ 75 #include <emmintrin.h> 76 #endif 77 78 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */ 79 #if !defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && !defined PETSC_USE_REAL___FLOAT128 && !defined PETSC_USE_REAL___FP16 && defined __SSE2__ 80 #define USE_SSE2_KERNELS 1 81 #else 82 #define USE_SSE2_KERNELS 0 83 #endif 84 85 static PetscClassId THI_CLASSID; 86 87 typedef enum { 88 QUAD_GAUSS, 89 QUAD_LOBATTO 90 } QuadratureType; 91 static const char *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0}; 92 PETSC_UNUSED static const PetscReal HexQWeights[8] = {1, 1, 1, 1, 1, 1, 1, 1}; 93 PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573}; 94 #define G 0.57735026918962573 95 #define H (0.5 * (1. + G)) 96 #define L (0.5 * (1. - G)) 97 #define M (-0.5) 98 #define P (0.5) 99 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */ 100 static const PetscReal HexQInterp_Lobatto[8][8] = { 101 {H, 0, 0, 0, L, 0, 0, 0}, 102 {0, H, 0, 0, 0, L, 0, 0}, 103 {0, 0, H, 0, 0, 0, L, 0}, 104 {0, 0, 0, H, 0, 0, 0, L}, 105 {L, 0, 0, 0, H, 0, 0, 0}, 106 {0, L, 0, 0, 0, H, 0, 0}, 107 {0, 0, L, 0, 0, 0, H, 0}, 108 {0, 0, 0, L, 0, 0, 0, H} 109 }; 110 static const PetscReal HexQDeriv_Lobatto[8][8][3] = { 111 {{M * H, M *H, M}, {P * H, 0, 0}, {0, 0, 0}, {0, P *H, 0}, {M * L, M *L, P}, {P * L, 0, 0}, {0, 0, 0}, {0, P *L, 0} }, 112 {{M * H, 0, 0}, {P * H, M *H, M}, {0, P *H, 0}, {0, 0, 0}, {M * L, 0, 0}, {P * L, M *L, P}, {0, P *L, 0}, {0, 0, 0} }, 113 {{0, 0, 0}, {0, M *H, 0}, {P * H, P *H, M}, {M * H, 0, 0}, {0, 0, 0}, {0, M *L, 0}, {P * L, P *L, P}, {M * L, 0, 0} }, 114 {{0, M *H, 0}, {0, 0, 0}, {P * H, 0, 0}, {M * H, P *H, M}, {0, M *L, 0}, {0, 0, 0}, {P * L, 0, 0}, {M * L, P *L, P}}, 115 {{M * L, M *L, M}, {P * L, 0, 0}, {0, 0, 0}, {0, P *L, 0}, {M * H, M *H, P}, {P * H, 0, 0}, {0, 0, 0}, {0, P *H, 0} }, 116 {{M * L, 0, 0}, {P * L, M *L, M}, {0, P *L, 0}, {0, 0, 0}, {M * H, 0, 0}, {P * H, M *H, P}, {0, P *H, 0}, {0, 0, 0} }, 117 {{0, 0, 0}, {0, M *L, 0}, {P * L, P *L, M}, {M * L, 0, 0}, {0, 0, 0}, {0, M *H, 0}, {P * H, P *H, P}, {M * H, 0, 0} }, 118 {{0, M *L, 0}, {0, 0, 0}, {P * L, 0, 0}, {M * L, P *L, M}, {0, M *H, 0}, {0, 0, 0}, {P * H, 0, 0}, {M * H, P *H, P}} 119 }; 120 /* Stanndard Gauss */ 121 static const PetscReal HexQInterp_Gauss[8][8] = { 122 {H * H * H, L *H *H, L *L *H, H *L *H, H *H *L, L *H *L, L *L *L, H *L *L}, 123 {L * H * H, H *H *H, H *L *H, L *L *H, L *H *L, H *H *L, H *L *L, L *L *L}, 124 {L * L * H, H *L *H, H *H *H, L *H *H, L *L *L, H *L *L, H *H *L, L *H *L}, 125 {H * L * H, L *L *H, L *H *H, H *H *H, H *L *L, L *L *L, L *H *L, H *H *L}, 126 {H * H * L, L *H *L, L *L *L, H *L *L, H *H *H, L *H *H, L *L *H, H *L *H}, 127 {L * H * L, H *H *L, H *L *L, L *L *L, L *H *H, H *H *H, H *L *H, L *L *H}, 128 {L * L * L, H *L *L, H *H *L, L *H *L, L *L *H, H *L *H, H *H *H, L *H *H}, 129 {H * L * L, L *L *L, L *H *L, H *H *L, H *L *H, L *L *H, L *H *H, H *H *H} 130 }; 131 static const PetscReal HexQDeriv_Gauss[8][8][3] = { 132 {{M * H * H, H *M *H, H *H *M}, {P * H * H, L *M *H, L *H *M}, {P * L * H, L *P *H, L *L *M}, {M * L * H, H *P *H, H *L *M}, {M * H * L, H *M *L, H *H *P}, {P * H * L, L *M *L, L *H *P}, {P * L * L, L *P *L, L *L *P}, {M * L * L, H *P *L, H *L *P}}, 133 {{M * H * H, L *M *H, L *H *M}, {P * H * H, H *M *H, H *H *M}, {P * L * H, H *P *H, H *L *M}, {M * L * H, L *P *H, L *L *M}, {M * H * L, L *M *L, L *H *P}, {P * H * L, H *M *L, H *H *P}, {P * L * L, H *P *L, H *L *P}, {M * L * L, L *P *L, L *L *P}}, 134 {{M * L * H, L *M *H, L *L *M}, {P * L * H, H *M *H, H *L *M}, {P * H * H, H *P *H, H *H *M}, {M * H * H, L *P *H, L *H *M}, {M * L * L, L *M *L, L *L *P}, {P * L * L, H *M *L, H *L *P}, {P * H * L, H *P *L, H *H *P}, {M * H * L, L *P *L, L *H *P}}, 135 {{M * L * H, H *M *H, H *L *M}, {P * L * H, L *M *H, L *L *M}, {P * H * H, L *P *H, L *H *M}, {M * H * H, H *P *H, H *H *M}, {M * L * L, H *M *L, H *L *P}, {P * L * L, L *M *L, L *L *P}, {P * H * L, L *P *L, L *H *P}, {M * H * L, H *P *L, H *H *P}}, 136 {{M * H * L, H *M *L, H *H *M}, {P * H * L, L *M *L, L *H *M}, {P * L * L, L *P *L, L *L *M}, {M * L * L, H *P *L, H *L *M}, {M * H * H, H *M *H, H *H *P}, {P * H * H, L *M *H, L *H *P}, {P * L * H, L *P *H, L *L *P}, {M * L * H, H *P *H, H *L *P}}, 137 {{M * H * L, L *M *L, L *H *M}, {P * H * L, H *M *L, H *H *M}, {P * L * L, H *P *L, H *L *M}, {M * L * L, L *P *L, L *L *M}, {M * H * H, L *M *H, L *H *P}, {P * H * H, H *M *H, H *H *P}, {P * L * H, H *P *H, H *L *P}, {M * L * H, L *P *H, L *L *P}}, 138 {{M * L * L, L *M *L, L *L *M}, {P * L * L, H *M *L, H *L *M}, {P * H * L, H *P *L, H *H *M}, {M * H * L, L *P *L, L *H *M}, {M * L * H, L *M *H, L *L *P}, {P * L * H, H *M *H, H *L *P}, {P * H * H, H *P *H, H *H *P}, {M * H * H, L *P *H, L *H *P}}, 139 {{M * L * L, H *M *L, H *L *M}, {P * L * L, L *M *L, L *L *M}, {P * H * L, L *P *L, L *H *M}, {M * H * L, H *P *L, H *H *M}, {M * L * H, H *M *H, H *L *P}, {P * L * H, L *M *H, L *L *P}, {P * H * H, L *P *H, L *H *P}, {M * H * H, H *P *H, H *H *P}} 140 }; 141 static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3]; 142 /* Standard 2x2 Gauss quadrature for the bottom layer. */ 143 static const PetscReal QuadQInterp[4][4] = { 144 {H * H, L *H, L *L, H *L}, 145 {L * H, H *H, H *L, L *L}, 146 {L * L, H *L, H *H, L *H}, 147 {H * L, L *L, L *H, H *H} 148 }; 149 static const PetscReal QuadQDeriv[4][4][2] = { 150 {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}}, 151 {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}}, 152 {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}}, 153 {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}} 154 }; 155 #undef G 156 #undef H 157 #undef L 158 #undef M 159 #undef P 160 161 #define HexExtract(x, i, j, k, n) \ 162 do { \ 163 (n)[0] = (x)[i][j][k]; \ 164 (n)[1] = (x)[i + 1][j][k]; \ 165 (n)[2] = (x)[i + 1][j + 1][k]; \ 166 (n)[3] = (x)[i][j + 1][k]; \ 167 (n)[4] = (x)[i][j][k + 1]; \ 168 (n)[5] = (x)[i + 1][j][k + 1]; \ 169 (n)[6] = (x)[i + 1][j + 1][k + 1]; \ 170 (n)[7] = (x)[i][j + 1][k + 1]; \ 171 } while (0) 172 173 #define HexExtractRef(x, i, j, k, n) \ 174 do { \ 175 (n)[0] = &(x)[i][j][k]; \ 176 (n)[1] = &(x)[i + 1][j][k]; \ 177 (n)[2] = &(x)[i + 1][j + 1][k]; \ 178 (n)[3] = &(x)[i][j + 1][k]; \ 179 (n)[4] = &(x)[i][j][k + 1]; \ 180 (n)[5] = &(x)[i + 1][j][k + 1]; \ 181 (n)[6] = &(x)[i + 1][j + 1][k + 1]; \ 182 (n)[7] = &(x)[i][j + 1][k + 1]; \ 183 } while (0) 184 185 #define QuadExtract(x, i, j, n) \ 186 do { \ 187 (n)[0] = (x)[i][j]; \ 188 (n)[1] = (x)[i + 1][j]; \ 189 (n)[2] = (x)[i + 1][j + 1]; \ 190 (n)[3] = (x)[i][j + 1]; \ 191 } while (0) 192 193 static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[]) { 194 PetscInt i; 195 dz[0] = dz[1] = dz[2] = 0; 196 for (i = 0; i < 8; i++) { 197 dz[0] += dphi[i][0] * zn[i]; 198 dz[1] += dphi[i][1] * zn[i]; 199 dz[2] += dphi[i][2] * zn[i]; 200 } 201 } 202 203 static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[PETSC_RESTRICT], PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscReal *PETSC_RESTRICT jw) { 204 const PetscReal jac[3][3] = { 205 {hx / 2, 0, 0 }, 206 {0, hy / 2, 0 }, 207 {dz[0], dz[1], dz[2]} 208 }; 209 const PetscReal ijac[3][3] = { 210 {1 / jac[0][0], 0, 0 }, 211 {0, 1 / jac[1][1], 0 }, 212 {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]} 213 }; 214 const PetscReal jdet = jac[0][0] * jac[1][1] * jac[2][2]; 215 PetscInt i; 216 217 for (i = 0; i < 8; i++) { 218 const PetscReal *dphir = HexQDeriv[q][i]; 219 phi[i] = HexQInterp[q][i]; 220 dphi[i][0] = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0]; 221 dphi[i][1] = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1]; 222 dphi[i][2] = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2]; 223 } 224 *jw = 1.0 * jdet; 225 } 226 227 typedef struct _p_THI *THI; 228 typedef struct _n_Units *Units; 229 230 typedef struct { 231 PetscScalar u, v; 232 } Node; 233 234 typedef struct { 235 PetscScalar b; /* bed */ 236 PetscScalar h; /* thickness */ 237 PetscScalar beta2; /* friction */ 238 } PrmNode; 239 240 typedef struct { 241 PetscReal min, max, cmin, cmax; 242 } PRange; 243 244 typedef enum { 245 THIASSEMBLY_TRIDIAGONAL, 246 THIASSEMBLY_FULL 247 } THIAssemblyMode; 248 249 struct _p_THI { 250 PETSCHEADER(int); 251 void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p); 252 PetscInt zlevels; 253 PetscReal Lx, Ly, Lz; /* Model domain */ 254 PetscReal alpha; /* Bed angle */ 255 Units units; 256 PetscReal dirichlet_scale; 257 PetscReal ssa_friction_scale; 258 PRange eta; 259 PRange beta2; 260 struct { 261 PetscReal Bd2, eps, exponent; 262 } viscosity; 263 struct { 264 PetscReal irefgam, eps2, exponent, refvel, epsvel; 265 } friction; 266 PetscReal rhog; 267 PetscBool no_slip; 268 PetscBool tridiagonal; 269 PetscBool coarse2d; 270 PetscBool verbose; 271 MatType mattype; 272 }; 273 274 struct _n_Units { 275 /* fundamental */ 276 PetscReal meter; 277 PetscReal kilogram; 278 PetscReal second; 279 /* derived */ 280 PetscReal Pascal; 281 PetscReal year; 282 }; 283 284 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *, Node ***, Mat, Mat, THI); 285 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *, Node ***, Mat, Mat, THI); 286 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *, Node **, Mat, Mat, THI); 287 288 static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[]) { 289 const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1, pn[1].b + pn[1].h * (PetscScalar)k / zm1, pn[2].b + pn[2].h * (PetscScalar)k / zm1, pn[3].b + pn[3].h * (PetscScalar)k / zm1, 290 pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1}; 291 PetscInt i; 292 for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]); 293 } 294 295 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */ 296 static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p) { 297 Units units = thi->units; 298 PetscReal s = -x * PetscSinReal(thi->alpha); 299 300 p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly); 301 p->h = s - p->b; 302 p->beta2 = 1e30; 303 } 304 305 static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p) { 306 Units units = thi->units; 307 PetscReal s = -x * PetscSinReal(thi->alpha); 308 309 p->b = s - 1000 * units->meter; 310 p->h = s - p->b; 311 /* tau_b = beta2 v is a stress (Pa) */ 312 p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter; 313 } 314 315 /* These are just toys */ 316 317 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */ 318 static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) { 319 Units units = thi->units; 320 PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */ 321 PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha); 322 p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 323 p->h = s - p->b; 324 p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter; 325 } 326 327 /* Like Z, but with 200 meter cliffs */ 328 static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) { 329 Units units = thi->units; 330 PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */ 331 PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha); 332 333 p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 334 if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter; 335 p->h = s - p->b; 336 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter; 337 } 338 339 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */ 340 static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) { 341 Units units = thi->units; 342 PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */ 343 PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha); 344 345 p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 346 p->h = s - p->b; 347 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter; 348 } 349 350 static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2) { 351 if (thi->friction.irefgam == 0) { 352 Units units = thi->units; 353 thi->friction.irefgam = 1. / (0.5 * PetscSqr(thi->friction.refvel * units->meter / units->year)); 354 thi->friction.eps2 = 0.5 * PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam; 355 } 356 if (thi->friction.exponent == 0) { 357 *beta2 = rbeta2; 358 *dbeta2 = 0; 359 } else { 360 *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent); 361 *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam; 362 } 363 } 364 365 static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta) { 366 PetscReal Bd2, eps, exponent; 367 if (thi->viscosity.Bd2 == 0) { 368 Units units = thi->units; 369 const PetscReal n = 3., /* Glen exponent */ 370 p = 1. + 1. / n, /* for Stokes */ 371 A = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */ 372 B = PetscPowReal(A, -1. / n); /* hardness parameter */ 373 thi->viscosity.Bd2 = B / 2; 374 thi->viscosity.exponent = (p - 2) / 2; 375 thi->viscosity.eps = 0.5 * PetscSqr(1e-5 / units->year); 376 } 377 Bd2 = thi->viscosity.Bd2; 378 exponent = thi->viscosity.exponent; 379 eps = thi->viscosity.eps; 380 *eta = Bd2 * PetscPowReal(eps + gam, exponent); 381 *deta = exponent * (*eta) / (eps + gam); 382 } 383 384 static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x) { 385 if (x < *min) *min = x; 386 if (x > *max) *max = x; 387 } 388 389 static void PRangeClear(PRange *p) { 390 p->cmin = p->min = 1e100; 391 p->cmax = p->max = -1e100; 392 } 393 394 static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max) { 395 PetscFunctionBeginUser; 396 p->cmin = min; 397 p->cmax = max; 398 if (min < p->min) p->min = min; 399 if (max > p->max) p->max = max; 400 PetscFunctionReturn(0); 401 } 402 403 static PetscErrorCode THIDestroy(THI *thi) { 404 PetscFunctionBeginUser; 405 if (!*thi) PetscFunctionReturn(0); 406 if (--((PetscObject)(*thi))->refct > 0) { 407 *thi = 0; 408 PetscFunctionReturn(0); 409 } 410 PetscCall(PetscFree((*thi)->units)); 411 PetscCall(PetscFree((*thi)->mattype)); 412 PetscCall(PetscHeaderDestroy(thi)); 413 PetscFunctionReturn(0); 414 } 415 416 static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi) { 417 static PetscBool registered = PETSC_FALSE; 418 THI thi; 419 Units units; 420 421 PetscFunctionBeginUser; 422 *inthi = 0; 423 if (!registered) { 424 PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID)); 425 registered = PETSC_TRUE; 426 } 427 PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "", comm, THIDestroy, 0)); 428 429 PetscCall(PetscNew(&thi->units)); 430 units = thi->units; 431 units->meter = 1e-2; 432 units->second = 1e-7; 433 units->kilogram = 1e-12; 434 435 PetscOptionsBegin(comm, NULL, "Scaled units options", ""); 436 { 437 PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL)); 438 PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL)); 439 PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL)); 440 } 441 PetscOptionsEnd(); 442 units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second)); 443 units->year = 31556926. * units->second; /* seconds per year */ 444 445 thi->Lx = 10.e3; 446 thi->Ly = 10.e3; 447 thi->Lz = 1000; 448 thi->dirichlet_scale = 1; 449 thi->verbose = PETSC_FALSE; 450 451 PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", ""); 452 { 453 QuadratureType quad = QUAD_GAUSS; 454 char homexp[] = "A"; 455 char mtype[256] = MATSBAIJ; 456 PetscReal L, m = 1.0; 457 PetscBool flg; 458 L = thi->Lx; 459 PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg)); 460 if (flg) thi->Lx = thi->Ly = L; 461 PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL)); 462 PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL)); 463 PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL)); 464 PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL)); 465 switch (homexp[0] = toupper(homexp[0])) { 466 case 'A': 467 thi->initialize = THIInitialize_HOM_A; 468 thi->no_slip = PETSC_TRUE; 469 thi->alpha = 0.5; 470 break; 471 case 'C': 472 thi->initialize = THIInitialize_HOM_C; 473 thi->no_slip = PETSC_FALSE; 474 thi->alpha = 0.1; 475 break; 476 case 'X': 477 thi->initialize = THIInitialize_HOM_X; 478 thi->no_slip = PETSC_FALSE; 479 thi->alpha = 0.3; 480 break; 481 case 'Y': 482 thi->initialize = THIInitialize_HOM_Y; 483 thi->no_slip = PETSC_FALSE; 484 thi->alpha = 0.5; 485 break; 486 case 'Z': 487 thi->initialize = THIInitialize_HOM_Z; 488 thi->no_slip = PETSC_FALSE; 489 thi->alpha = 0.5; 490 break; 491 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]); 492 } 493 PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL)); 494 switch (quad) { 495 case QUAD_GAUSS: 496 HexQInterp = HexQInterp_Gauss; 497 HexQDeriv = HexQDeriv_Gauss; 498 break; 499 case QUAD_LOBATTO: 500 HexQInterp = HexQInterp_Lobatto; 501 HexQDeriv = HexQDeriv_Lobatto; 502 break; 503 } 504 PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL)); 505 506 thi->friction.refvel = 100.; 507 thi->friction.epsvel = 1.; 508 509 PetscCall(PetscOptionsReal("-thi_friction_refvel", "Reference velocity for sliding", "", thi->friction.refvel, &thi->friction.refvel, NULL)); 510 PetscCall(PetscOptionsReal("-thi_friction_epsvel", "Regularization velocity for sliding", "", thi->friction.epsvel, &thi->friction.epsvel, NULL)); 511 PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL)); 512 513 thi->friction.exponent = (m - 1) / 2; 514 515 PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL)); 516 PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL)); 517 PetscCall(PetscOptionsBool("-thi_coarse2d", "Use a 2D coarse space corresponding to SSA", "", thi->coarse2d, &thi->coarse2d, NULL)); 518 PetscCall(PetscOptionsBool("-thi_tridiagonal", "Assemble a tridiagonal system (column coupling only) on the finest level", "", thi->tridiagonal, &thi->tridiagonal, NULL)); 519 PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL)); 520 PetscCall(PetscStrallocpy(mtype, (char **)&thi->mattype)); 521 PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL)); 522 } 523 PetscOptionsEnd(); 524 525 /* dimensionalize */ 526 thi->Lx *= units->meter; 527 thi->Ly *= units->meter; 528 thi->Lz *= units->meter; 529 thi->alpha *= PETSC_PI / 180; 530 531 PRangeClear(&thi->eta); 532 PRangeClear(&thi->beta2); 533 534 { 535 PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowReal(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second), 536 driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter; 537 THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta); 538 thi->rhog = rho * grav; 539 if (thi->verbose) { 540 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal)); 541 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving)); 542 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu), (double)(2 * eta * gradu / driving))); 543 THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta); 544 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu), (double)(2 * eta * 1e-3 * gradu / driving))); 545 } 546 } 547 548 *inthi = thi; 549 PetscFunctionReturn(0); 550 } 551 552 static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, Vec prm) { 553 PrmNode **p; 554 PetscInt i, j, xs, xm, ys, ym, mx, my; 555 556 PetscFunctionBeginUser; 557 PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0)); 558 PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 559 PetscCall(DMDAVecGetArray(da2prm, prm, &p)); 560 for (i = xs; i < xs + xm; i++) { 561 for (j = ys; j < ys + ym; j++) { 562 PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my; 563 thi->initialize(thi, xx, yy, &p[i][j]); 564 } 565 } 566 PetscCall(DMDAVecRestoreArray(da2prm, prm, &p)); 567 PetscFunctionReturn(0); 568 } 569 570 static PetscErrorCode THISetUpDM(THI thi, DM dm) { 571 PetscInt refinelevel, coarsenlevel, level, dim, Mx, My, Mz, mx, my, s; 572 DMDAStencilType st; 573 DM da2prm; 574 Vec X; 575 576 PetscFunctionBeginUser; 577 PetscCall(DMDAGetInfo(dm, &dim, &Mz, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st)); 578 if (dim == 2) PetscCall(DMDAGetInfo(dm, &dim, &My, &Mx, 0, &my, &mx, 0, 0, &s, 0, 0, 0, &st)); 579 PetscCall(DMGetRefineLevel(dm, &refinelevel)); 580 PetscCall(DMGetCoarsenLevel(dm, &coarsenlevel)); 581 level = refinelevel - coarsenlevel; 582 PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2prm)); 583 PetscCall(DMSetUp(da2prm)); 584 PetscCall(DMCreateLocalVector(da2prm, &X)); 585 { 586 PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter; 587 if (dim == 2) { 588 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g\n", level, (double)Lx, (double)Ly, Mx, My, Mx * My, (double)(Lx / Mx), (double)(Ly / My))); 589 } else { 590 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g x %g\n", level, (double)Lx, (double)Ly, (double)Lz, Mx, My, Mz, Mx * My * Mz, (double)(Lx / Mx), (double)(Ly / My), (double)(1000. / (Mz - 1)))); 591 } 592 } 593 PetscCall(THIInitializePrm(thi, da2prm, X)); 594 if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */ 595 PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobian)THIJacobianLocal_3D_Full, thi)); 596 } 597 if (thi->coarse2d) PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobian)THIJacobianLocal_2D, thi)); 598 PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm", (PetscObject)da2prm)); 599 PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm_Vec", (PetscObject)X)); 600 PetscCall(DMDestroy(&da2prm)); 601 PetscCall(VecDestroy(&X)); 602 PetscFunctionReturn(0); 603 } 604 605 static PetscErrorCode DMCoarsenHook_THI(DM dmf, DM dmc, void *ctx) { 606 THI thi = (THI)ctx; 607 PetscInt rlevel, clevel; 608 609 PetscFunctionBeginUser; 610 PetscCall(THISetUpDM(thi, dmc)); 611 PetscCall(DMGetRefineLevel(dmc, &rlevel)); 612 PetscCall(DMGetCoarsenLevel(dmc, &clevel)); 613 if (rlevel - clevel == 0) PetscCall(DMSetMatType(dmc, MATAIJ)); 614 PetscCall(DMCoarsenHookAdd(dmc, DMCoarsenHook_THI, NULL, thi)); 615 PetscFunctionReturn(0); 616 } 617 618 static PetscErrorCode DMRefineHook_THI(DM dmc, DM dmf, void *ctx) { 619 THI thi = (THI)ctx; 620 621 PetscFunctionBeginUser; 622 PetscCall(THISetUpDM(thi, dmf)); 623 PetscCall(DMSetMatType(dmf, thi->mattype)); 624 PetscCall(DMRefineHookAdd(dmf, DMRefineHook_THI, NULL, thi)); 625 /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */ 626 PetscCall(DMCoarsenHookAdd(dmf, DMCoarsenHook_THI, NULL, thi)); 627 PetscFunctionReturn(0); 628 } 629 630 static PetscErrorCode THIDAGetPrm(DM da, PrmNode ***prm) { 631 DM da2prm; 632 Vec X; 633 634 PetscFunctionBeginUser; 635 PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm)); 636 PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA"); 637 PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X)); 638 PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA"); 639 PetscCall(DMDAVecGetArray(da2prm, X, prm)); 640 PetscFunctionReturn(0); 641 } 642 643 static PetscErrorCode THIDARestorePrm(DM da, PrmNode ***prm) { 644 DM da2prm; 645 Vec X; 646 647 PetscFunctionBeginUser; 648 PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm)); 649 PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA"); 650 PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X)); 651 PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA"); 652 PetscCall(DMDAVecRestoreArray(da2prm, X, prm)); 653 PetscFunctionReturn(0); 654 } 655 656 static PetscErrorCode THIInitial(SNES snes, Vec X, void *ctx) { 657 THI thi; 658 PetscInt i, j, k, xs, xm, ys, ym, zs, zm, mx, my; 659 PetscReal hx, hy; 660 PrmNode **prm; 661 Node ***x; 662 DM da; 663 664 PetscFunctionBeginUser; 665 PetscCall(SNESGetDM(snes, &da)); 666 PetscCall(DMGetApplicationContext(da, &thi)); 667 PetscCall(DMDAGetInfo(da, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 668 PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm)); 669 PetscCall(DMDAVecGetArray(da, X, &x)); 670 PetscCall(THIDAGetPrm(da, &prm)); 671 hx = thi->Lx / mx; 672 hy = thi->Ly / my; 673 for (i = xs; i < xs + xm; i++) { 674 for (j = ys; j < ys + ym; j++) { 675 for (k = zs; k < zs + zm; k++) { 676 const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy); 677 x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1; 678 x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1; 679 } 680 } 681 } 682 PetscCall(DMDAVecRestoreArray(da, X, &x)); 683 PetscCall(THIDARestorePrm(da, &prm)); 684 PetscFunctionReturn(0); 685 } 686 687 static void PointwiseNonlinearity(THI thi, const Node n[PETSC_RESTRICT], const PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscScalar *PETSC_RESTRICT u, PetscScalar *PETSC_RESTRICT v, PetscScalar du[PETSC_RESTRICT], PetscScalar dv[PETSC_RESTRICT], PetscReal *eta, PetscReal *deta) { 688 PetscInt l, ll; 689 PetscScalar gam; 690 691 du[0] = du[1] = du[2] = 0; 692 dv[0] = dv[1] = dv[2] = 0; 693 *u = 0; 694 *v = 0; 695 for (l = 0; l < 8; l++) { 696 *u += phi[l] * n[l].u; 697 *v += phi[l] * n[l].v; 698 for (ll = 0; ll < 3; ll++) { 699 du[ll] += dphi[l][ll] * n[l].u; 700 dv[ll] += dphi[l][ll] * n[l].v; 701 } 702 } 703 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]) + 0.25 * PetscSqr(du[2]) + 0.25 * PetscSqr(dv[2]); 704 THIViscosity(thi, PetscRealPart(gam), eta, deta); 705 } 706 707 static void PointwiseNonlinearity2D(THI thi, Node n[], PetscReal phi[], PetscReal dphi[4][2], PetscScalar *u, PetscScalar *v, PetscScalar du[], PetscScalar dv[], PetscReal *eta, PetscReal *deta) { 708 PetscInt l, ll; 709 PetscScalar gam; 710 711 du[0] = du[1] = 0; 712 dv[0] = dv[1] = 0; 713 *u = 0; 714 *v = 0; 715 for (l = 0; l < 4; l++) { 716 *u += phi[l] * n[l].u; 717 *v += phi[l] * n[l].v; 718 for (ll = 0; ll < 2; ll++) { 719 du[ll] += dphi[l][ll] * n[l].u; 720 dv[ll] += dphi[l][ll] * n[l].v; 721 } 722 } 723 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]); 724 THIViscosity(thi, PetscRealPart(gam), eta, deta); 725 } 726 727 static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info, Node ***x, Node ***f, THI thi) { 728 PetscInt xs, ys, xm, ym, zm, i, j, k, q, l; 729 PetscReal hx, hy, etamin, etamax, beta2min, beta2max; 730 PrmNode **prm; 731 732 PetscFunctionBeginUser; 733 xs = info->zs; 734 ys = info->ys; 735 xm = info->zm; 736 ym = info->ym; 737 zm = info->xm; 738 hx = thi->Lx / info->mz; 739 hy = thi->Ly / info->my; 740 741 etamin = 1e100; 742 etamax = 0; 743 beta2min = 1e100; 744 beta2max = 0; 745 746 PetscCall(THIDAGetPrm(info->da, &prm)); 747 748 for (i = xs; i < xs + xm; i++) { 749 for (j = ys; j < ys + ym; j++) { 750 PrmNode pn[4]; 751 QuadExtract(prm, i, j, pn); 752 for (k = 0; k < zm - 1; k++) { 753 PetscInt ls = 0; 754 Node n[8], *fn[8]; 755 PetscReal zn[8], etabase = 0; 756 PrmHexGetZ(pn, k, zm, zn); 757 HexExtract(x, i, j, k, n); 758 HexExtractRef(f, i, j, k, fn); 759 if (thi->no_slip && k == 0) { 760 for (l = 0; l < 4; l++) n[l].u = n[l].v = 0; 761 /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */ 762 ls = 4; 763 } 764 for (q = 0; q < 8; q++) { 765 PetscReal dz[3], phi[8], dphi[8][3], jw, eta, deta; 766 PetscScalar du[3], dv[3], u, v; 767 HexGrad(HexQDeriv[q], zn, dz); 768 HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw); 769 PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta); 770 jw /= thi->rhog; /* scales residuals to be O(1) */ 771 if (q == 0) etabase = eta; 772 RangeUpdate(&etamin, &etamax, eta); 773 for (l = ls; l < 8; l++) { /* test functions */ 774 const PetscReal ds[2] = {-PetscSinReal(thi->alpha), 0}; 775 const PetscReal pp = phi[l], *dp = dphi[l]; 776 fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0]; 777 fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1]; 778 } 779 } 780 if (k == 0) { /* we are on a bottom face */ 781 if (thi->no_slip) { 782 /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary 783 * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature 784 * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the 785 * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in 786 * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after 787 * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the 788 * assembled matrix (see the similar block in THIJacobianLocal). 789 * 790 * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends 791 * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make 792 * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part, 793 * so the solution will exactly satisfy the boundary condition after the first linear iteration. 794 */ 795 const PetscReal hz = PetscRealPart(pn[0].h) / (zm - 1.); 796 const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx); 797 fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u; 798 fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v; 799 } else { /* Integrate over bottom face to apply boundary condition */ 800 for (q = 0; q < 4; q++) { 801 const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q]; 802 PetscScalar u = 0, v = 0, rbeta2 = 0; 803 PetscReal beta2, dbeta2; 804 for (l = 0; l < 4; l++) { 805 u += phi[l] * n[l].u; 806 v += phi[l] * n[l].v; 807 rbeta2 += phi[l] * pn[l].beta2; 808 } 809 THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2); 810 RangeUpdate(&beta2min, &beta2max, beta2); 811 for (l = 0; l < 4; l++) { 812 const PetscReal pp = phi[l]; 813 fn[ls + l]->u += pp * jw * beta2 * u; 814 fn[ls + l]->v += pp * jw * beta2 * v; 815 } 816 } 817 } 818 } 819 } 820 } 821 } 822 823 PetscCall(THIDARestorePrm(info->da, &prm)); 824 825 PetscCall(PRangeMinMax(&thi->eta, etamin, etamax)); 826 PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max)); 827 PetscFunctionReturn(0); 828 } 829 830 static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer) { 831 PetscReal nrm; 832 PetscInt m; 833 PetscMPIInt rank; 834 835 PetscFunctionBeginUser; 836 PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm)); 837 PetscCall(MatGetSize(B, &m, 0)); 838 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank)); 839 if (rank == 0) { 840 PetscScalar val0, val2; 841 PetscCall(MatGetValue(B, 0, 0, &val0)); 842 PetscCall(MatGetValue(B, 2, 2, &val2)); 843 PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %" PetscInt_FMT " norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2), 844 (double)thi->eta.cmin, (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax)); 845 } 846 PetscFunctionReturn(0); 847 } 848 849 static PetscErrorCode THISurfaceStatistics(DM da, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean) { 850 Node ***x; 851 PetscInt i, j, xs, ys, zs, xm, ym, zm, mx, my, mz; 852 PetscReal umin = 1e100, umax = -1e100; 853 PetscScalar usum = 0.0, gusum; 854 855 PetscFunctionBeginUser; 856 *min = *max = *mean = 0; 857 PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 858 PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm)); 859 PetscCheck(zs == 0 && zm == mz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected decomposition"); 860 PetscCall(DMDAVecGetArray(da, X, &x)); 861 for (i = xs; i < xs + xm; i++) { 862 for (j = ys; j < ys + ym; j++) { 863 PetscReal u = PetscRealPart(x[i][j][zm - 1].u); 864 RangeUpdate(&umin, &umax, u); 865 usum += u; 866 } 867 } 868 PetscCall(DMDAVecRestoreArray(da, X, &x)); 869 PetscCallMPI(MPI_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da))); 870 PetscCallMPI(MPI_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da))); 871 PetscCallMPI(MPI_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da))); 872 *mean = PetscRealPart(gusum) / (mx * my); 873 PetscFunctionReturn(0); 874 } 875 876 static PetscErrorCode THISolveStatistics(THI thi, SNES snes, PetscInt coarsened, const char name[]) { 877 MPI_Comm comm; 878 Vec X; 879 DM dm; 880 881 PetscFunctionBeginUser; 882 PetscCall(PetscObjectGetComm((PetscObject)thi, &comm)); 883 PetscCall(SNESGetSolution(snes, &X)); 884 PetscCall(SNESGetDM(snes, &dm)); 885 PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name)); 886 { 887 PetscInt its, lits; 888 SNESConvergedReason reason; 889 PetscCall(SNESGetIterationNumber(snes, &its)); 890 PetscCall(SNESGetConvergedReason(snes, &reason)); 891 PetscCall(SNESGetLinearSolveIterations(snes, &lits)); 892 PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits)); 893 } 894 { 895 PetscReal nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3]; 896 PetscInt i, j, m; 897 const PetscScalar *x; 898 PetscCall(VecNorm(X, NORM_2, &nrm2)); 899 PetscCall(VecGetLocalSize(X, &m)); 900 PetscCall(VecGetArrayRead(X, &x)); 901 for (i = 0; i < m; i += 2) { 902 PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v); 903 tmin[0] = PetscMin(u, tmin[0]); 904 tmin[1] = PetscMin(v, tmin[1]); 905 tmin[2] = PetscMin(c, tmin[2]); 906 tmax[0] = PetscMax(u, tmax[0]); 907 tmax[1] = PetscMax(v, tmax[1]); 908 tmax[2] = PetscMax(c, tmax[2]); 909 } 910 PetscCall(VecRestoreArrayRead(X, &x)); 911 PetscCallMPI(MPI_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi))); 912 PetscCallMPI(MPI_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi))); 913 /* Dimensionalize to meters/year */ 914 nrm2 *= thi->units->year / thi->units->meter; 915 for (j = 0; j < 3; j++) { 916 min[j] *= thi->units->year / thi->units->meter; 917 max[j] *= thi->units->year / thi->units->meter; 918 } 919 if (min[0] == 0.0) min[0] = 0.0; 920 PetscCall(PetscPrintf(comm, "|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2])); 921 { 922 PetscReal umin, umax, umean; 923 PetscCall(THISurfaceStatistics(dm, X, &umin, &umax, &umean)); 924 umin *= thi->units->year / thi->units->meter; 925 umax *= thi->units->year / thi->units->meter; 926 umean *= thi->units->year / thi->units->meter; 927 PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean)); 928 } 929 /* These values stay nondimensional */ 930 PetscCall(PetscPrintf(comm, "Global eta range %g to %g converged range %g to %g\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax)); 931 PetscCall(PetscPrintf(comm, "Global beta2 range %g to %g converged range %g to %g\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax)); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, Node **x, Mat J, Mat B, THI thi) { 937 PetscInt xs, ys, xm, ym, i, j, q, l, ll; 938 PetscReal hx, hy; 939 PrmNode **prm; 940 941 PetscFunctionBeginUser; 942 xs = info->ys; 943 ys = info->xs; 944 xm = info->ym; 945 ym = info->xm; 946 hx = thi->Lx / info->my; 947 hy = thi->Ly / info->mx; 948 949 PetscCall(MatZeroEntries(B)); 950 PetscCall(THIDAGetPrm(info->da, &prm)); 951 952 for (i = xs; i < xs + xm; i++) { 953 for (j = ys; j < ys + ym; j++) { 954 Node n[4]; 955 PrmNode pn[4]; 956 PetscScalar Ke[4 * 2][4 * 2]; 957 QuadExtract(prm, i, j, pn); 958 QuadExtract(x, i, j, n); 959 PetscCall(PetscMemzero(Ke, sizeof(Ke))); 960 for (q = 0; q < 4; q++) { 961 PetscReal phi[4], dphi[4][2], jw, eta, deta, beta2, dbeta2; 962 PetscScalar u, v, du[2], dv[2], h = 0, rbeta2 = 0; 963 for (l = 0; l < 4; l++) { 964 phi[l] = QuadQInterp[q][l]; 965 dphi[l][0] = QuadQDeriv[q][l][0] * 2. / hx; 966 dphi[l][1] = QuadQDeriv[q][l][1] * 2. / hy; 967 h += phi[l] * pn[l].h; 968 rbeta2 += phi[l] * pn[l].beta2; 969 } 970 jw = 0.25 * hx * hy / thi->rhog; /* rhog is only scaling */ 971 PointwiseNonlinearity2D(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta); 972 THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2); 973 for (l = 0; l < 4; l++) { 974 const PetscReal pp = phi[l], *dp = dphi[l]; 975 for (ll = 0; ll < 4; ll++) { 976 const PetscReal ppl = phi[ll], *dpl = dphi[ll]; 977 PetscScalar dgdu, dgdv; 978 dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1]; 979 dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0]; 980 /* Picard part */ 981 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale; 982 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0]; 983 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1]; 984 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale; 985 /* extra Newton terms */ 986 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * u * ppl * thi->ssa_friction_scale; 987 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * v * ppl * thi->ssa_friction_scale; 988 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * u * ppl * thi->ssa_friction_scale; 989 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * v * ppl * thi->ssa_friction_scale; 990 } 991 } 992 } 993 { 994 const MatStencil rc[4] = { 995 {0, i, j, 0}, 996 {0, i + 1, j, 0}, 997 {0, i + 1, j + 1, 0}, 998 {0, i, j + 1, 0} 999 }; 1000 PetscCall(MatSetValuesBlockedStencil(B, 4, rc, 4, rc, &Ke[0][0], ADD_VALUES)); 1001 } 1002 } 1003 } 1004 PetscCall(THIDARestorePrm(info->da, &prm)); 1005 1006 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1007 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1008 PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE)); 1009 if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD)); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info, Node ***x, Mat B, THI thi, THIAssemblyMode amode) { 1014 PetscInt xs, ys, xm, ym, zm, i, j, k, q, l, ll; 1015 PetscReal hx, hy; 1016 PrmNode **prm; 1017 1018 PetscFunctionBeginUser; 1019 xs = info->zs; 1020 ys = info->ys; 1021 xm = info->zm; 1022 ym = info->ym; 1023 zm = info->xm; 1024 hx = thi->Lx / info->mz; 1025 hy = thi->Ly / info->my; 1026 1027 PetscCall(MatZeroEntries(B)); 1028 PetscCall(MatSetOption(B, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE)); 1029 PetscCall(THIDAGetPrm(info->da, &prm)); 1030 1031 for (i = xs; i < xs + xm; i++) { 1032 for (j = ys; j < ys + ym; j++) { 1033 PrmNode pn[4]; 1034 QuadExtract(prm, i, j, pn); 1035 for (k = 0; k < zm - 1; k++) { 1036 Node n[8]; 1037 PetscReal zn[8], etabase = 0; 1038 PetscScalar Ke[8 * 2][8 * 2]; 1039 PetscInt ls = 0; 1040 1041 PrmHexGetZ(pn, k, zm, zn); 1042 HexExtract(x, i, j, k, n); 1043 PetscCall(PetscMemzero(Ke, sizeof(Ke))); 1044 if (thi->no_slip && k == 0) { 1045 for (l = 0; l < 4; l++) n[l].u = n[l].v = 0; 1046 ls = 4; 1047 } 1048 for (q = 0; q < 8; q++) { 1049 PetscReal dz[3], phi[8], dphi[8][3], jw, eta, deta; 1050 PetscScalar du[3], dv[3], u, v; 1051 HexGrad(HexQDeriv[q], zn, dz); 1052 HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw); 1053 PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta); 1054 jw /= thi->rhog; /* residuals are scaled by this factor */ 1055 if (q == 0) etabase = eta; 1056 for (l = ls; l < 8; l++) { /* test functions */ 1057 const PetscReal *PETSC_RESTRICT dp = dphi[l]; 1058 #if USE_SSE2_KERNELS 1059 /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */ 1060 __m128d p4 = _mm_set1_pd(4), p2 = _mm_set1_pd(2), p05 = _mm_set1_pd(0.5), p42 = _mm_setr_pd(4, 2), p24 = _mm_shuffle_pd(p42, p42, _MM_SHUFFLE2(0, 1)), du0 = _mm_set1_pd(du[0]), du1 = _mm_set1_pd(du[1]), du2 = _mm_set1_pd(du[2]), dv0 = _mm_set1_pd(dv[0]), dv1 = _mm_set1_pd(dv[1]), dv2 = _mm_set1_pd(dv[2]), jweta = _mm_set1_pd(jw * eta), jwdeta = _mm_set1_pd(jw * deta), dp0 = _mm_set1_pd(dp[0]), dp1 = _mm_set1_pd(dp[1]), dp2 = _mm_set1_pd(dp[2]), dp0jweta = _mm_mul_pd(dp0, jweta), dp1jweta = _mm_mul_pd(dp1, jweta), dp2jweta = _mm_mul_pd(dp2, jweta), p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4, du0), _mm_mul_pd(p2, dv1)), /* 4 du0 + 2 dv1 */ 1061 p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4, dv1), _mm_mul_pd(p2, du0)), /* 4 dv1 + 2 du0 */ 1062 pdu2dv2 = _mm_unpacklo_pd(du2, dv2), /* [du2, dv2] */ 1063 du1pdv0 = _mm_add_pd(du1, dv0), /* du1 + dv0 */ 1064 t1 = _mm_mul_pd(dp0, p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */ 1065 t2 = _mm_mul_pd(dp1, p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */ 1066 1067 #endif 1068 #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */ 1069 for (ll = ls; ll < 8; ll++) { /* trial functions */ 1070 #else 1071 for (ll = l; ll < 8; ll++) { 1072 #endif 1073 const PetscReal *PETSC_RESTRICT dpl = dphi[ll]; 1074 if (amode == THIASSEMBLY_TRIDIAGONAL && (l - ll) % 4) continue; /* these entries would not be inserted */ 1075 #if !USE_SSE2_KERNELS 1076 /* The analytic Jacobian in nice, easy-to-read form */ 1077 { 1078 PetscScalar dgdu, dgdv; 1079 dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2]; 1080 dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2]; 1081 /* Picard part */ 1082 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2]; 1083 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0]; 1084 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1]; 1085 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2]; 1086 /* extra Newton terms */ 1087 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2]; 1088 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2]; 1089 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2]; 1090 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2]; 1091 } 1092 #else 1093 /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed 1094 * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost 1095 * by 25 to 30 percent. */ 1096 { 1097 __m128d keu = _mm_loadu_pd(&Ke[l * 2 + 0][ll * 2 + 0]), kev = _mm_loadu_pd(&Ke[l * 2 + 1][ll * 2 + 0]), dpl01 = _mm_loadu_pd(&dpl[0]), dpl10 = _mm_shuffle_pd(dpl01, dpl01, _MM_SHUFFLE2(0, 1)), dpl2 = _mm_set_sd(dpl[2]), t0, t3, pdgduv; 1098 keu = _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta, p42), dpl01), _mm_add_pd(_mm_mul_pd(dp1jweta, dpl10), _mm_mul_pd(dp2jweta, dpl2)))); 1099 kev = _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta, p24), dpl01), _mm_add_pd(_mm_mul_pd(dp0jweta, dpl10), _mm_mul_pd(dp2jweta, _mm_shuffle_pd(dpl2, dpl2, _MM_SHUFFLE2(0, 1)))))); 1100 pdgduv = _mm_mul_pd(p05, _mm_add_pd(_mm_add_pd(_mm_mul_pd(p42, _mm_mul_pd(du0, dpl01)), _mm_mul_pd(p24, _mm_mul_pd(dv1, dpl01))), _mm_add_pd(_mm_mul_pd(du1pdv0, dpl10), _mm_mul_pd(pdu2dv2, _mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */ 1101 t0 = _mm_mul_pd(jwdeta, pdgduv); /* jw deta [dgdu, dgdv] */ 1102 t3 = _mm_mul_pd(t0, du1pdv0); /* t0 (du1 + dv0) */ 1103 _mm_storeu_pd(&Ke[l * 2 + 0][ll * 2 + 0], _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(t1, t0), _mm_add_pd(_mm_mul_pd(dp1, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, du2)))))); 1104 _mm_storeu_pd(&Ke[l * 2 + 1][ll * 2 + 0], _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(t2, t0), _mm_add_pd(_mm_mul_pd(dp0, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, dv2)))))); 1105 } 1106 #endif 1107 } 1108 } 1109 } 1110 if (k == 0) { /* on a bottom face */ 1111 if (thi->no_slip) { 1112 const PetscReal hz = PetscRealPart(pn[0].h) / (zm - 1); 1113 const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx); 1114 Ke[0][0] = thi->dirichlet_scale * diagu; 1115 Ke[1][1] = thi->dirichlet_scale * diagv; 1116 } else { 1117 for (q = 0; q < 4; q++) { 1118 const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q]; 1119 PetscScalar u = 0, v = 0, rbeta2 = 0; 1120 PetscReal beta2, dbeta2; 1121 for (l = 0; l < 4; l++) { 1122 u += phi[l] * n[l].u; 1123 v += phi[l] * n[l].v; 1124 rbeta2 += phi[l] * pn[l].beta2; 1125 } 1126 THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2); 1127 for (l = 0; l < 4; l++) { 1128 const PetscReal pp = phi[l]; 1129 for (ll = 0; ll < 4; ll++) { 1130 const PetscReal ppl = phi[ll]; 1131 Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl; 1132 Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl; 1133 Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl; 1134 Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl; 1135 } 1136 } 1137 } 1138 } 1139 } 1140 { 1141 const MatStencil rc[8] = { 1142 {i, j, k, 0}, 1143 {i + 1, j, k, 0}, 1144 {i + 1, j + 1, k, 0}, 1145 {i, j + 1, k, 0}, 1146 {i, j, k + 1, 0}, 1147 {i + 1, j, k + 1, 0}, 1148 {i + 1, j + 1, k + 1, 0}, 1149 {i, j + 1, k + 1, 0} 1150 }; 1151 if (amode == THIASSEMBLY_TRIDIAGONAL) { 1152 for (l = 0; l < 4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */ 1153 const PetscInt l4 = l + 4; 1154 const MatStencil rcl[2] = { 1155 {rc[l].k, rc[l].j, rc[l].i, 0}, 1156 {rc[l4].k, rc[l4].j, rc[l4].i, 0} 1157 }; 1158 #if defined COMPUTE_LOWER_TRIANGULAR 1159 const PetscScalar Kel[4][4] = { 1160 {Ke[2 * l + 0][2 * l + 0], Ke[2 * l + 0][2 * l + 1], Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 0][2 * l4 + 1] }, 1161 {Ke[2 * l + 1][2 * l + 0], Ke[2 * l + 1][2 * l + 1], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 1] }, 1162 {Ke[2 * l4 + 0][2 * l + 0], Ke[2 * l4 + 0][2 * l + 1], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]}, 1163 {Ke[2 * l4 + 1][2 * l + 0], Ke[2 * l4 + 1][2 * l + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]} 1164 }; 1165 #else 1166 /* Same as above except for the lower-left block */ 1167 const PetscScalar Kel[4][4] = { 1168 {Ke[2 * l + 0][2 * l + 0], Ke[2 * l + 0][2 * l + 1], Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 0][2 * l4 + 1] }, 1169 {Ke[2 * l + 1][2 * l + 0], Ke[2 * l + 1][2 * l + 1], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 1] }, 1170 {Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]}, 1171 {Ke[2 * l + 0][2 * l4 + 1], Ke[2 * l + 1][2 * l4 + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]} 1172 }; 1173 #endif 1174 PetscCall(MatSetValuesBlockedStencil(B, 2, rcl, 2, rcl, &Kel[0][0], ADD_VALUES)); 1175 } 1176 } else { 1177 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */ 1178 for (l = 0; l < 8; l++) { 1179 for (ll = l + 1; ll < 8; ll++) { 1180 Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0]; 1181 Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1]; 1182 Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0]; 1183 Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1]; 1184 } 1185 } 1186 #endif 1187 PetscCall(MatSetValuesBlockedStencil(B, 8, rc, 8, rc, &Ke[0][0], ADD_VALUES)); 1188 } 1189 } 1190 } 1191 } 1192 } 1193 PetscCall(THIDARestorePrm(info->da, &prm)); 1194 1195 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1196 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1197 PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE)); 1198 if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD)); 1199 PetscFunctionReturn(0); 1200 } 1201 1202 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi) { 1203 PetscFunctionBeginUser; 1204 PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_FULL)); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi) { 1209 PetscFunctionBeginUser; 1210 PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_TRIDIAGONAL)); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 static PetscErrorCode DMRefineHierarchy_THI(DM dac0, PetscInt nlevels, DM hierarchy[]) { 1215 THI thi; 1216 PetscInt dim, M, N, m, n, s, dof; 1217 DM dac, daf; 1218 DMDAStencilType st; 1219 DM_DA *ddf, *ddc; 1220 1221 PetscFunctionBeginUser; 1222 PetscCall(PetscObjectQuery((PetscObject)dac0, "THI", (PetscObject *)&thi)); 1223 PetscCheck(thi, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot refine this DMDA, missing composed THI instance"); 1224 if (nlevels > 1) { 1225 PetscCall(DMRefineHierarchy(dac0, nlevels - 1, hierarchy)); 1226 dac = hierarchy[nlevels - 2]; 1227 } else { 1228 dac = dac0; 1229 } 1230 PetscCall(DMDAGetInfo(dac, &dim, &N, &M, 0, &n, &m, 0, &dof, &s, 0, 0, 0, &st)); 1231 PetscCheck(dim == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "This function can only refine 2D DMDAs"); 1232 1233 /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */ 1234 PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac), DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, thi->zlevels, N, M, 1, n, m, dof, s, NULL, NULL, NULL, &daf)); 1235 PetscCall(DMSetUp(daf)); 1236 1237 daf->ops->creatematrix = dac->ops->creatematrix; 1238 daf->ops->createinterpolation = dac->ops->createinterpolation; 1239 daf->ops->getcoloring = dac->ops->getcoloring; 1240 ddf = (DM_DA *)daf->data; 1241 ddc = (DM_DA *)dac->data; 1242 ddf->interptype = ddc->interptype; 1243 1244 PetscCall(DMDASetFieldName(daf, 0, "x-velocity")); 1245 PetscCall(DMDASetFieldName(daf, 1, "y-velocity")); 1246 1247 hierarchy[nlevels - 1] = daf; 1248 PetscFunctionReturn(0); 1249 } 1250 1251 static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac, DM daf, Mat *A, Vec *scale) { 1252 PetscInt dim; 1253 1254 PetscFunctionBeginUser; 1255 PetscValidHeaderSpecific(dac, DM_CLASSID, 1); 1256 PetscValidHeaderSpecific(daf, DM_CLASSID, 2); 1257 PetscValidPointer(A, 3); 1258 if (scale) PetscValidPointer(scale, 4); 1259 PetscCall(DMDAGetInfo(daf, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 1260 if (dim == 2) { 1261 /* We are in the 2D problem and use normal DMDA interpolation */ 1262 PetscCall(DMCreateInterpolation(dac, daf, A, scale)); 1263 } else { 1264 PetscInt i, j, k, xs, ys, zs, xm, ym, zm, mx, my, mz, rstart, cstart; 1265 Mat B; 1266 1267 PetscCall(DMDAGetInfo(daf, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 1268 PetscCall(DMDAGetCorners(daf, &zs, &ys, &xs, &zm, &ym, &xm)); 1269 PetscCheck(!zs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "unexpected"); 1270 PetscCall(MatCreate(PetscObjectComm((PetscObject)daf), &B)); 1271 PetscCall(MatSetSizes(B, xm * ym * zm, xm * ym, mx * my * mz, mx * my)); 1272 1273 PetscCall(MatSetType(B, MATAIJ)); 1274 PetscCall(MatSeqAIJSetPreallocation(B, 1, NULL)); 1275 PetscCall(MatMPIAIJSetPreallocation(B, 1, NULL, 0, NULL)); 1276 PetscCall(MatGetOwnershipRange(B, &rstart, NULL)); 1277 PetscCall(MatGetOwnershipRangeColumn(B, &cstart, NULL)); 1278 for (i = xs; i < xs + xm; i++) { 1279 for (j = ys; j < ys + ym; j++) { 1280 for (k = zs; k < zs + zm; k++) { 1281 PetscInt i2 = i * ym + j, i3 = i2 * zm + k; 1282 PetscScalar val = ((k == 0 || k == mz - 1) ? 0.5 : 1.) / (mz - 1.); /* Integration using trapezoid rule */ 1283 PetscCall(MatSetValue(B, cstart + i3, rstart + i2, val, INSERT_VALUES)); 1284 } 1285 } 1286 } 1287 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1288 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1289 PetscCall(MatCreateMAIJ(B, sizeof(Node) / sizeof(PetscScalar), A)); 1290 PetscCall(MatDestroy(&B)); 1291 } 1292 PetscFunctionReturn(0); 1293 } 1294 1295 static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da, Mat *J) { 1296 Mat A; 1297 PetscInt xm, ym, zm, dim, dof = 2, starts[3], dims[3]; 1298 ISLocalToGlobalMapping ltog; 1299 1300 PetscFunctionBeginUser; 1301 PetscCall(DMDAGetInfo(da, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 1302 PetscCheck(dim == 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Expected DMDA to be 3D"); 1303 PetscCall(DMDAGetCorners(da, 0, 0, 0, &zm, &ym, &xm)); 1304 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1305 PetscCall(MatCreate(PetscObjectComm((PetscObject)da), &A)); 1306 PetscCall(MatSetSizes(A, dof * xm * ym * zm, dof * xm * ym * zm, PETSC_DETERMINE, PETSC_DETERMINE)); 1307 PetscCall(MatSetType(A, da->mattype)); 1308 PetscCall(MatSetFromOptions(A)); 1309 PetscCall(MatSeqAIJSetPreallocation(A, 3 * 2, NULL)); 1310 PetscCall(MatMPIAIJSetPreallocation(A, 3 * 2, NULL, 0, NULL)); 1311 PetscCall(MatSeqBAIJSetPreallocation(A, 2, 3, NULL)); 1312 PetscCall(MatMPIBAIJSetPreallocation(A, 2, 3, NULL, 0, NULL)); 1313 PetscCall(MatSeqSBAIJSetPreallocation(A, 2, 2, NULL)); 1314 PetscCall(MatMPISBAIJSetPreallocation(A, 2, 2, NULL, 0, NULL)); 1315 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 1316 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 1317 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 1318 *J = A; 1319 PetscFunctionReturn(0); 1320 } 1321 1322 static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM da, Vec X, const char filename[]) { 1323 const PetscInt dof = 2; 1324 Units units = thi->units; 1325 MPI_Comm comm; 1326 PetscViewer viewer; 1327 PetscMPIInt rank, size, tag, nn, nmax; 1328 PetscInt mx, my, mz, r, range[6]; 1329 const PetscScalar *x; 1330 1331 PetscFunctionBeginUser; 1332 PetscCall(PetscObjectGetComm((PetscObject)thi, &comm)); 1333 PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 1334 PetscCallMPI(MPI_Comm_size(comm, &size)); 1335 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1336 PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer)); 1337 PetscCall(PetscViewerASCIIPrintf(viewer, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n")); 1338 PetscCall(PetscViewerASCIIPrintf(viewer, " <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1)); 1339 1340 PetscCall(DMDAGetCorners(da, range, range + 1, range + 2, range + 3, range + 4, range + 5)); 1341 PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn)); 1342 PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm)); 1343 tag = ((PetscObject)viewer)->tag; 1344 PetscCall(VecGetArrayRead(X, &x)); 1345 if (rank == 0) { 1346 PetscScalar *array; 1347 PetscCall(PetscMalloc1(nmax, &array)); 1348 for (r = 0; r < size; r++) { 1349 PetscInt i, j, k, xs, xm, ys, ym, zs, zm; 1350 const PetscScalar *ptr; 1351 MPI_Status status; 1352 if (r) PetscCallMPI(MPI_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE)); 1353 zs = range[0]; 1354 ys = range[1]; 1355 xs = range[2]; 1356 zm = range[3]; 1357 ym = range[4]; 1358 xm = range[5]; 1359 PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen"); 1360 if (r) { 1361 PetscCallMPI(MPI_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status)); 1362 PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn)); 1363 PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen"); 1364 ptr = array; 1365 } else ptr = x; 1366 PetscCall(PetscViewerASCIIPrintf(viewer, " <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1)); 1367 1368 PetscCall(PetscViewerASCIIPrintf(viewer, " <Points>\n")); 1369 PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1370 for (i = xs; i < xs + xm; i++) { 1371 for (j = ys; j < ys + ym; j++) { 1372 for (k = zs; k < zs + zm; k++) { 1373 PrmNode p; 1374 PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, zz; 1375 thi->initialize(thi, xx, yy, &p); 1376 zz = PetscRealPart(p.b) + PetscRealPart(p.h) * k / (mz - 1); 1377 PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)xx, (double)yy, (double)zz)); 1378 } 1379 } 1380 } 1381 PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n")); 1382 PetscCall(PetscViewerASCIIPrintf(viewer, " </Points>\n")); 1383 1384 PetscCall(PetscViewerASCIIPrintf(viewer, " <PointData>\n")); 1385 PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1386 for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)(PetscRealPart(ptr[i]) * units->year / units->meter), (double)(PetscRealPart(ptr[i + 1]) * units->year / units->meter), 0.0)); 1387 PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n")); 1388 1389 PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n")); 1390 for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT "\n", r)); 1391 PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n")); 1392 PetscCall(PetscViewerASCIIPrintf(viewer, " </PointData>\n")); 1393 1394 PetscCall(PetscViewerASCIIPrintf(viewer, " </Piece>\n")); 1395 } 1396 PetscCall(PetscFree(array)); 1397 } else { 1398 PetscCallMPI(MPI_Send(range, 6, MPIU_INT, 0, tag, comm)); 1399 PetscCallMPI(MPI_Send((PetscScalar *)x, nn, MPIU_SCALAR, 0, tag, comm)); 1400 } 1401 PetscCall(VecRestoreArrayRead(X, &x)); 1402 PetscCall(PetscViewerASCIIPrintf(viewer, " </StructuredGrid>\n")); 1403 PetscCall(PetscViewerASCIIPrintf(viewer, "</VTKFile>\n")); 1404 PetscCall(PetscViewerDestroy(&viewer)); 1405 PetscFunctionReturn(0); 1406 } 1407 1408 int main(int argc, char *argv[]) { 1409 MPI_Comm comm; 1410 THI thi; 1411 DM da; 1412 SNES snes; 1413 1414 PetscFunctionBeginUser; 1415 PetscCall(PetscInitialize(&argc, &argv, 0, help)); 1416 comm = PETSC_COMM_WORLD; 1417 1418 PetscCall(THICreate(comm, &thi)); 1419 { 1420 PetscInt M = 3, N = 3, P = 2; 1421 PetscOptionsBegin(comm, NULL, "Grid resolution options", ""); 1422 { 1423 PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL)); 1424 N = M; 1425 PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL)); 1426 if (thi->coarse2d) { 1427 PetscCall(PetscOptionsInt("-zlevels", "Number of elements in z-direction on fine level", "", thi->zlevels, &thi->zlevels, NULL)); 1428 } else { 1429 PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL)); 1430 } 1431 } 1432 PetscOptionsEnd(); 1433 if (thi->coarse2d) { 1434 PetscCall(DMDACreate2d(comm, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, N, M, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, &da)); 1435 PetscCall(DMSetFromOptions(da)); 1436 PetscCall(DMSetUp(da)); 1437 da->ops->refinehierarchy = DMRefineHierarchy_THI; 1438 da->ops->createinterpolation = DMCreateInterpolation_DA_THI; 1439 1440 PetscCall(PetscObjectCompose((PetscObject)da, "THI", (PetscObject)thi)); 1441 } else { 1442 PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da)); 1443 PetscCall(DMSetFromOptions(da)); 1444 PetscCall(DMSetUp(da)); 1445 } 1446 PetscCall(DMDASetFieldName(da, 0, "x-velocity")); 1447 PetscCall(DMDASetFieldName(da, 1, "y-velocity")); 1448 } 1449 PetscCall(THISetUpDM(thi, da)); 1450 if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal; 1451 1452 { /* Set the fine level matrix type if -da_refine */ 1453 PetscInt rlevel, clevel; 1454 PetscCall(DMGetRefineLevel(da, &rlevel)); 1455 PetscCall(DMGetCoarsenLevel(da, &clevel)); 1456 if (rlevel - clevel > 0) PetscCall(DMSetMatType(da, thi->mattype)); 1457 } 1458 1459 PetscCall(DMDASNESSetFunctionLocal(da, ADD_VALUES, (DMDASNESFunction)THIFunctionLocal, thi)); 1460 if (thi->tridiagonal) { 1461 PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal, thi)); 1462 } else { 1463 PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobian)THIJacobianLocal_3D_Full, thi)); 1464 } 1465 PetscCall(DMCoarsenHookAdd(da, DMCoarsenHook_THI, NULL, thi)); 1466 PetscCall(DMRefineHookAdd(da, DMRefineHook_THI, NULL, thi)); 1467 1468 PetscCall(DMSetApplicationContext(da, thi)); 1469 1470 PetscCall(SNESCreate(comm, &snes)); 1471 PetscCall(SNESSetDM(snes, da)); 1472 PetscCall(DMDestroy(&da)); 1473 PetscCall(SNESSetComputeInitialGuess(snes, THIInitial, NULL)); 1474 PetscCall(SNESSetFromOptions(snes)); 1475 1476 PetscCall(SNESSolve(snes, NULL, NULL)); 1477 1478 PetscCall(THISolveStatistics(thi, snes, 0, "Full")); 1479 1480 { 1481 PetscBool flg; 1482 char filename[PETSC_MAX_PATH_LEN] = ""; 1483 PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg)); 1484 if (flg) { 1485 Vec X; 1486 DM dm; 1487 PetscCall(SNESGetSolution(snes, &X)); 1488 PetscCall(SNESGetDM(snes, &dm)); 1489 PetscCall(THIDAVecView_VTK_XML(thi, dm, X, filename)); 1490 } 1491 } 1492 1493 PetscCall(DMDestroy(&da)); 1494 PetscCall(SNESDestroy(&snes)); 1495 PetscCall(THIDestroy(&thi)); 1496 PetscCall(PetscFinalize()); 1497 return 0; 1498 } 1499 1500 /*TEST 1501 1502 build: 1503 requires: !single 1504 1505 test: 1506 args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc 1507 1508 test: 1509 suffix: 2 1510 nsize: 2 1511 args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1 1512 1513 test: 1514 suffix: 3 1515 nsize: 3 1516 args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current 1517 1518 test: 1519 suffix: 4 1520 nsize: 6 1521 args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1 1522 1523 test: 1524 suffix: 5 1525 nsize: 6 1526 args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}} 1527 1528 TEST*/ 1529