1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\ 2 \n\ 3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\ 4 using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\ 5 to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\ 6 boundary conditions in the x- and y-directions.\n\ 7 \n\ 8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\ 9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\ 10 \n\ 11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\ 12 \n\n"; 13 14 /* 15 The equations for horizontal velocity (u,v) are 16 17 - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0 18 - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0 19 20 where 21 22 eta = B/2 (epsilon + gamma)^((p-2)/2) 23 24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B, 25 written in terms of the second invariant 26 27 gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2 28 29 The surface boundary conditions are the natural conditions. The basal boundary conditions 30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2. 31 32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1). 33 34 The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the 35 map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through 36 the Jacobian of the coordinate transformation from a reference element to the physical element. 37 38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed 39 specially so that columns are never distributed, and are always contiguous in memory. 40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation, 41 and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to 42 use compatible domain decomposition relative to the 3D DMDAs. 43 44 There are two compile-time options: 45 46 NO_SSE2: 47 If the host supports SSE2, we use integration code that has been vectorized with SSE2 48 intrinsics, unless this macro is defined. The intrinsics speed up integration by about 49 30% on my architecture (P8700, gcc-4.5 snapshot). 50 51 COMPUTE_LOWER_TRIANGULAR: 52 The element matrices we assemble are lower-triangular so it is not necessary to compute 53 all entries explicitly. If this macro is defined, the lower-triangular entries are 54 computed explicitly. 55 56 */ 57 58 #if defined(PETSC_APPLE_FRAMEWORK) 59 #import <PETSc/petscsnes.h> 60 #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 61 #else 62 63 #include <petscsnes.h> 64 #include <petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 65 #endif 66 #include <ctype.h> /* toupper() */ 67 68 #if defined(__cplusplus) || defined(PETSC_HAVE_WINDOWS_COMPILERS) || defined(__PGI) 69 /* c++ cannot handle [_restrict_] notation like C does */ 70 #undef PETSC_RESTRICT 71 #define PETSC_RESTRICT 72 #endif 73 74 #if defined __SSE2__ 75 #include <emmintrin.h> 76 #endif 77 78 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */ 79 #if !defined NO_SSE2 && !defined PETSC_USE_COMPLEX && !defined PETSC_USE_REAL_SINGLE && !defined PETSC_USE_REAL___FLOAT128 && !defined PETSC_USE_REAL___FP16 && defined __SSE2__ 80 #define USE_SSE2_KERNELS 1 81 #else 82 #define USE_SSE2_KERNELS 0 83 #endif 84 85 static PetscClassId THI_CLASSID; 86 87 typedef enum { 88 QUAD_GAUSS, 89 QUAD_LOBATTO 90 } QuadratureType; 91 static const char *QuadratureTypes[] = {"gauss", "lobatto", "QuadratureType", "QUAD_", 0}; 92 PETSC_UNUSED static const PetscReal HexQWeights[8] = {1, 1, 1, 1, 1, 1, 1, 1}; 93 PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573}; 94 #define G 0.57735026918962573 95 #define H (0.5 * (1. + G)) 96 #define L (0.5 * (1. - G)) 97 #define M (-0.5) 98 #define P (0.5) 99 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */ 100 static const PetscReal HexQInterp_Lobatto[8][8] = { 101 {H, 0, 0, 0, L, 0, 0, 0}, 102 {0, H, 0, 0, 0, L, 0, 0}, 103 {0, 0, H, 0, 0, 0, L, 0}, 104 {0, 0, 0, H, 0, 0, 0, L}, 105 {L, 0, 0, 0, H, 0, 0, 0}, 106 {0, L, 0, 0, 0, H, 0, 0}, 107 {0, 0, L, 0, 0, 0, H, 0}, 108 {0, 0, 0, L, 0, 0, 0, H} 109 }; 110 static const PetscReal HexQDeriv_Lobatto[8][8][3] = { 111 {{M * H, M *H, M}, {P * H, 0, 0}, {0, 0, 0}, {0, P *H, 0}, {M * L, M *L, P}, {P * L, 0, 0}, {0, 0, 0}, {0, P *L, 0} }, 112 {{M * H, 0, 0}, {P * H, M *H, M}, {0, P *H, 0}, {0, 0, 0}, {M * L, 0, 0}, {P * L, M *L, P}, {0, P *L, 0}, {0, 0, 0} }, 113 {{0, 0, 0}, {0, M *H, 0}, {P * H, P *H, M}, {M * H, 0, 0}, {0, 0, 0}, {0, M *L, 0}, {P * L, P *L, P}, {M * L, 0, 0} }, 114 {{0, M *H, 0}, {0, 0, 0}, {P * H, 0, 0}, {M * H, P *H, M}, {0, M *L, 0}, {0, 0, 0}, {P * L, 0, 0}, {M * L, P *L, P}}, 115 {{M * L, M *L, M}, {P * L, 0, 0}, {0, 0, 0}, {0, P *L, 0}, {M * H, M *H, P}, {P * H, 0, 0}, {0, 0, 0}, {0, P *H, 0} }, 116 {{M * L, 0, 0}, {P * L, M *L, M}, {0, P *L, 0}, {0, 0, 0}, {M * H, 0, 0}, {P * H, M *H, P}, {0, P *H, 0}, {0, 0, 0} }, 117 {{0, 0, 0}, {0, M *L, 0}, {P * L, P *L, M}, {M * L, 0, 0}, {0, 0, 0}, {0, M *H, 0}, {P * H, P *H, P}, {M * H, 0, 0} }, 118 {{0, M *L, 0}, {0, 0, 0}, {P * L, 0, 0}, {M * L, P *L, M}, {0, M *H, 0}, {0, 0, 0}, {P * H, 0, 0}, {M * H, P *H, P}} 119 }; 120 /* Stanndard Gauss */ 121 static const PetscReal HexQInterp_Gauss[8][8] = { 122 {H * H * H, L *H *H, L *L *H, H *L *H, H *H *L, L *H *L, L *L *L, H *L *L}, 123 {L * H * H, H *H *H, H *L *H, L *L *H, L *H *L, H *H *L, H *L *L, L *L *L}, 124 {L * L * H, H *L *H, H *H *H, L *H *H, L *L *L, H *L *L, H *H *L, L *H *L}, 125 {H * L * H, L *L *H, L *H *H, H *H *H, H *L *L, L *L *L, L *H *L, H *H *L}, 126 {H * H * L, L *H *L, L *L *L, H *L *L, H *H *H, L *H *H, L *L *H, H *L *H}, 127 {L * H * L, H *H *L, H *L *L, L *L *L, L *H *H, H *H *H, H *L *H, L *L *H}, 128 {L * L * L, H *L *L, H *H *L, L *H *L, L *L *H, H *L *H, H *H *H, L *H *H}, 129 {H * L * L, L *L *L, L *H *L, H *H *L, H *L *H, L *L *H, L *H *H, H *H *H} 130 }; 131 static const PetscReal HexQDeriv_Gauss[8][8][3] = { 132 {{M * H * H, H *M *H, H *H *M}, {P * H * H, L *M *H, L *H *M}, {P * L * H, L *P *H, L *L *M}, {M * L * H, H *P *H, H *L *M}, {M * H * L, H *M *L, H *H *P}, {P * H * L, L *M *L, L *H *P}, {P * L * L, L *P *L, L *L *P}, {M * L * L, H *P *L, H *L *P}}, 133 {{M * H * H, L *M *H, L *H *M}, {P * H * H, H *M *H, H *H *M}, {P * L * H, H *P *H, H *L *M}, {M * L * H, L *P *H, L *L *M}, {M * H * L, L *M *L, L *H *P}, {P * H * L, H *M *L, H *H *P}, {P * L * L, H *P *L, H *L *P}, {M * L * L, L *P *L, L *L *P}}, 134 {{M * L * H, L *M *H, L *L *M}, {P * L * H, H *M *H, H *L *M}, {P * H * H, H *P *H, H *H *M}, {M * H * H, L *P *H, L *H *M}, {M * L * L, L *M *L, L *L *P}, {P * L * L, H *M *L, H *L *P}, {P * H * L, H *P *L, H *H *P}, {M * H * L, L *P *L, L *H *P}}, 135 {{M * L * H, H *M *H, H *L *M}, {P * L * H, L *M *H, L *L *M}, {P * H * H, L *P *H, L *H *M}, {M * H * H, H *P *H, H *H *M}, {M * L * L, H *M *L, H *L *P}, {P * L * L, L *M *L, L *L *P}, {P * H * L, L *P *L, L *H *P}, {M * H * L, H *P *L, H *H *P}}, 136 {{M * H * L, H *M *L, H *H *M}, {P * H * L, L *M *L, L *H *M}, {P * L * L, L *P *L, L *L *M}, {M * L * L, H *P *L, H *L *M}, {M * H * H, H *M *H, H *H *P}, {P * H * H, L *M *H, L *H *P}, {P * L * H, L *P *H, L *L *P}, {M * L * H, H *P *H, H *L *P}}, 137 {{M * H * L, L *M *L, L *H *M}, {P * H * L, H *M *L, H *H *M}, {P * L * L, H *P *L, H *L *M}, {M * L * L, L *P *L, L *L *M}, {M * H * H, L *M *H, L *H *P}, {P * H * H, H *M *H, H *H *P}, {P * L * H, H *P *H, H *L *P}, {M * L * H, L *P *H, L *L *P}}, 138 {{M * L * L, L *M *L, L *L *M}, {P * L * L, H *M *L, H *L *M}, {P * H * L, H *P *L, H *H *M}, {M * H * L, L *P *L, L *H *M}, {M * L * H, L *M *H, L *L *P}, {P * L * H, H *M *H, H *L *P}, {P * H * H, H *P *H, H *H *P}, {M * H * H, L *P *H, L *H *P}}, 139 {{M * L * L, H *M *L, H *L *M}, {P * L * L, L *M *L, L *L *M}, {P * H * L, L *P *L, L *H *M}, {M * H * L, H *P *L, H *H *M}, {M * L * H, H *M *H, H *L *P}, {P * L * H, L *M *H, L *L *P}, {P * H * H, L *P *H, L *H *P}, {M * H * H, H *P *H, H *H *P}} 140 }; 141 static const PetscReal (*HexQInterp)[8], (*HexQDeriv)[8][3]; 142 /* Standard 2x2 Gauss quadrature for the bottom layer. */ 143 static const PetscReal QuadQInterp[4][4] = { 144 {H * H, L *H, L *L, H *L}, 145 {L * H, H *H, H *L, L *L}, 146 {L * L, H *L, H *H, L *H}, 147 {H * L, L *L, L *H, H *H} 148 }; 149 static const PetscReal QuadQDeriv[4][4][2] = { 150 {{M * H, M *H}, {P * H, M *L}, {P * L, P *L}, {M * L, P *H}}, 151 {{M * H, M *L}, {P * H, M *H}, {P * L, P *H}, {M * L, P *L}}, 152 {{M * L, M *L}, {P * L, M *H}, {P * H, P *H}, {M * H, P *L}}, 153 {{M * L, M *H}, {P * L, M *L}, {P * H, P *L}, {M * H, P *H}} 154 }; 155 #undef G 156 #undef H 157 #undef L 158 #undef M 159 #undef P 160 161 #define HexExtract(x, i, j, k, n) \ 162 do { \ 163 (n)[0] = (x)[i][j][k]; \ 164 (n)[1] = (x)[i + 1][j][k]; \ 165 (n)[2] = (x)[i + 1][j + 1][k]; \ 166 (n)[3] = (x)[i][j + 1][k]; \ 167 (n)[4] = (x)[i][j][k + 1]; \ 168 (n)[5] = (x)[i + 1][j][k + 1]; \ 169 (n)[6] = (x)[i + 1][j + 1][k + 1]; \ 170 (n)[7] = (x)[i][j + 1][k + 1]; \ 171 } while (0) 172 173 #define HexExtractRef(x, i, j, k, n) \ 174 do { \ 175 (n)[0] = &(x)[i][j][k]; \ 176 (n)[1] = &(x)[i + 1][j][k]; \ 177 (n)[2] = &(x)[i + 1][j + 1][k]; \ 178 (n)[3] = &(x)[i][j + 1][k]; \ 179 (n)[4] = &(x)[i][j][k + 1]; \ 180 (n)[5] = &(x)[i + 1][j][k + 1]; \ 181 (n)[6] = &(x)[i + 1][j + 1][k + 1]; \ 182 (n)[7] = &(x)[i][j + 1][k + 1]; \ 183 } while (0) 184 185 #define QuadExtract(x, i, j, n) \ 186 do { \ 187 (n)[0] = (x)[i][j]; \ 188 (n)[1] = (x)[i + 1][j]; \ 189 (n)[2] = (x)[i + 1][j + 1]; \ 190 (n)[3] = (x)[i][j + 1]; \ 191 } while (0) 192 193 static void HexGrad(const PetscReal dphi[][3], const PetscReal zn[], PetscReal dz[]) 194 { 195 PetscInt i; 196 dz[0] = dz[1] = dz[2] = 0; 197 for (i = 0; i < 8; i++) { 198 dz[0] += dphi[i][0] * zn[i]; 199 dz[1] += dphi[i][1] * zn[i]; 200 dz[2] += dphi[i][2] * zn[i]; 201 } 202 } 203 204 static void HexComputeGeometry(PetscInt q, PetscReal hx, PetscReal hy, const PetscReal dz[PETSC_RESTRICT], PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscReal *PETSC_RESTRICT jw) 205 { 206 const PetscReal jac[3][3] = { 207 {hx / 2, 0, 0 }, 208 {0, hy / 2, 0 }, 209 {dz[0], dz[1], dz[2]} 210 }; 211 const PetscReal ijac[3][3] = { 212 {1 / jac[0][0], 0, 0 }, 213 {0, 1 / jac[1][1], 0 }, 214 {-jac[2][0] / (jac[0][0] * jac[2][2]), -jac[2][1] / (jac[1][1] * jac[2][2]), 1 / jac[2][2]} 215 }; 216 const PetscReal jdet = jac[0][0] * jac[1][1] * jac[2][2]; 217 PetscInt i; 218 219 for (i = 0; i < 8; i++) { 220 const PetscReal *dphir = HexQDeriv[q][i]; 221 phi[i] = HexQInterp[q][i]; 222 dphi[i][0] = dphir[0] * ijac[0][0] + dphir[1] * ijac[1][0] + dphir[2] * ijac[2][0]; 223 dphi[i][1] = dphir[0] * ijac[0][1] + dphir[1] * ijac[1][1] + dphir[2] * ijac[2][1]; 224 dphi[i][2] = dphir[0] * ijac[0][2] + dphir[1] * ijac[1][2] + dphir[2] * ijac[2][2]; 225 } 226 *jw = 1.0 * jdet; 227 } 228 229 typedef struct _p_THI *THI; 230 typedef struct _n_Units *Units; 231 232 typedef struct { 233 PetscScalar u, v; 234 } Node; 235 236 typedef struct { 237 PetscScalar b; /* bed */ 238 PetscScalar h; /* thickness */ 239 PetscScalar beta2; /* friction */ 240 } PrmNode; 241 242 typedef struct { 243 PetscReal min, max, cmin, cmax; 244 } PRange; 245 246 typedef enum { 247 THIASSEMBLY_TRIDIAGONAL, 248 THIASSEMBLY_FULL 249 } THIAssemblyMode; 250 251 struct _p_THI { 252 PETSCHEADER(int); 253 void (*initialize)(THI, PetscReal x, PetscReal y, PrmNode *p); 254 PetscInt zlevels; 255 PetscReal Lx, Ly, Lz; /* Model domain */ 256 PetscReal alpha; /* Bed angle */ 257 Units units; 258 PetscReal dirichlet_scale; 259 PetscReal ssa_friction_scale; 260 PRange eta; 261 PRange beta2; 262 struct { 263 PetscReal Bd2, eps, exponent; 264 } viscosity; 265 struct { 266 PetscReal irefgam, eps2, exponent, refvel, epsvel; 267 } friction; 268 PetscReal rhog; 269 PetscBool no_slip; 270 PetscBool tridiagonal; 271 PetscBool coarse2d; 272 PetscBool verbose; 273 MatType mattype; 274 }; 275 276 struct _n_Units { 277 /* fundamental */ 278 PetscReal meter; 279 PetscReal kilogram; 280 PetscReal second; 281 /* derived */ 282 PetscReal Pascal; 283 PetscReal year; 284 }; 285 286 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *, Node ***, Mat, Mat, THI); 287 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *, Node ***, Mat, Mat, THI); 288 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *, Node **, Mat, Mat, THI); 289 290 static void PrmHexGetZ(const PrmNode pn[], PetscInt k, PetscInt zm, PetscReal zn[]) 291 { 292 const PetscScalar zm1 = zm - 1, znl[8] = {pn[0].b + pn[0].h * (PetscScalar)k / zm1, pn[1].b + pn[1].h * (PetscScalar)k / zm1, pn[2].b + pn[2].h * (PetscScalar)k / zm1, pn[3].b + pn[3].h * (PetscScalar)k / zm1, 293 pn[0].b + pn[0].h * (PetscScalar)(k + 1) / zm1, pn[1].b + pn[1].h * (PetscScalar)(k + 1) / zm1, pn[2].b + pn[2].h * (PetscScalar)(k + 1) / zm1, pn[3].b + pn[3].h * (PetscScalar)(k + 1) / zm1}; 294 PetscInt i; 295 for (i = 0; i < 8; i++) zn[i] = PetscRealPart(znl[i]); 296 } 297 298 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */ 299 static void THIInitialize_HOM_A(THI thi, PetscReal x, PetscReal y, PrmNode *p) 300 { 301 Units units = thi->units; 302 PetscReal s = -x * PetscSinReal(thi->alpha); 303 304 p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly); 305 p->h = s - p->b; 306 p->beta2 = 1e30; 307 } 308 309 static void THIInitialize_HOM_C(THI thi, PetscReal x, PetscReal y, PrmNode *p) 310 { 311 Units units = thi->units; 312 PetscReal s = -x * PetscSinReal(thi->alpha); 313 314 p->b = s - 1000 * units->meter; 315 p->h = s - p->b; 316 /* tau_b = beta2 v is a stress (Pa) */ 317 p->beta2 = 1000 * (1 + PetscSinReal(x * 2 * PETSC_PI / thi->Lx) * PetscSinReal(y * 2 * PETSC_PI / thi->Ly)) * units->Pascal * units->year / units->meter; 318 } 319 320 /* These are just toys */ 321 322 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */ 323 static void THIInitialize_HOM_X(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) 324 { 325 Units units = thi->units; 326 PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */ 327 PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha); 328 p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 329 p->h = s - p->b; 330 p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter; 331 } 332 333 /* Like Z, but with 200 meter cliffs */ 334 static void THIInitialize_HOM_Y(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) 335 { 336 Units units = thi->units; 337 PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */ 338 PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha); 339 340 p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 341 if (PetscRealPart(p->b) > -700 * units->meter) p->b += 200 * units->meter; 342 p->h = s - p->b; 343 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter; 344 } 345 346 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */ 347 static void THIInitialize_HOM_Z(THI thi, PetscReal xx, PetscReal yy, PrmNode *p) 348 { 349 Units units = thi->units; 350 PetscReal x = xx * 2 * PETSC_PI / thi->Lx - PETSC_PI, y = yy * 2 * PETSC_PI / thi->Ly - PETSC_PI; /* [-pi,pi] */ 351 PetscReal r = PetscSqrtReal(x * x + y * y), s = -x * PetscSinReal(thi->alpha); 352 353 p->b = s - 1000 * units->meter + 500 * units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 354 p->h = s - p->b; 355 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16 * r)) / PetscSqrtReal(1e-2 + 16 * r) * PetscCosReal(x * 3 / 2) * PetscCosReal(y * 3 / 2)) * units->Pascal * units->year / units->meter; 356 } 357 358 static void THIFriction(THI thi, PetscReal rbeta2, PetscReal gam, PetscReal *beta2, PetscReal *dbeta2) 359 { 360 if (thi->friction.irefgam == 0) { 361 Units units = thi->units; 362 thi->friction.irefgam = 1. / (0.5 * PetscSqr(thi->friction.refvel * units->meter / units->year)); 363 thi->friction.eps2 = 0.5 * PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam; 364 } 365 if (thi->friction.exponent == 0) { 366 *beta2 = rbeta2; 367 *dbeta2 = 0; 368 } else { 369 *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam * thi->friction.irefgam, thi->friction.exponent); 370 *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam * thi->friction.irefgam) * thi->friction.irefgam; 371 } 372 } 373 374 static void THIViscosity(THI thi, PetscReal gam, PetscReal *eta, PetscReal *deta) 375 { 376 PetscReal Bd2, eps, exponent; 377 if (thi->viscosity.Bd2 == 0) { 378 Units units = thi->units; 379 const PetscReal n = 3., /* Glen exponent */ 380 p = 1. + 1. / n, /* for Stokes */ 381 A = 1.e-16 * PetscPowReal(units->Pascal, -n) / units->year, /* softness parameter (Pa^{-n}/s) */ 382 B = PetscPowReal(A, -1. / n); /* hardness parameter */ 383 thi->viscosity.Bd2 = B / 2; 384 thi->viscosity.exponent = (p - 2) / 2; 385 thi->viscosity.eps = 0.5 * PetscSqr(1e-5 / units->year); 386 } 387 Bd2 = thi->viscosity.Bd2; 388 exponent = thi->viscosity.exponent; 389 eps = thi->viscosity.eps; 390 *eta = Bd2 * PetscPowReal(eps + gam, exponent); 391 *deta = exponent * (*eta) / (eps + gam); 392 } 393 394 static void RangeUpdate(PetscReal *min, PetscReal *max, PetscReal x) 395 { 396 if (x < *min) *min = x; 397 if (x > *max) *max = x; 398 } 399 400 static void PRangeClear(PRange *p) 401 { 402 p->cmin = p->min = 1e100; 403 p->cmax = p->max = -1e100; 404 } 405 406 static PetscErrorCode PRangeMinMax(PRange *p, PetscReal min, PetscReal max) 407 { 408 PetscFunctionBeginUser; 409 p->cmin = min; 410 p->cmax = max; 411 if (min < p->min) p->min = min; 412 if (max > p->max) p->max = max; 413 PetscFunctionReturn(PETSC_SUCCESS); 414 } 415 416 static PetscErrorCode THIDestroy(THI *thi) 417 { 418 PetscFunctionBeginUser; 419 if (!*thi) PetscFunctionReturn(PETSC_SUCCESS); 420 if (--((PetscObject)*thi)->refct > 0) { 421 *thi = 0; 422 PetscFunctionReturn(PETSC_SUCCESS); 423 } 424 PetscCall(PetscFree((*thi)->units)); 425 PetscCall(PetscFree((*thi)->mattype)); 426 PetscCall(PetscHeaderDestroy(thi)); 427 PetscFunctionReturn(PETSC_SUCCESS); 428 } 429 430 static PetscErrorCode THICreate(MPI_Comm comm, THI *inthi) 431 { 432 static PetscBool registered = PETSC_FALSE; 433 THI thi; 434 Units units; 435 436 PetscFunctionBeginUser; 437 *inthi = 0; 438 if (!registered) { 439 PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice", &THI_CLASSID)); 440 registered = PETSC_TRUE; 441 } 442 PetscCall(PetscHeaderCreate(thi, THI_CLASSID, "THI", "Toy Hydrostatic Ice", "", comm, THIDestroy, 0)); 443 444 PetscCall(PetscNew(&thi->units)); 445 units = thi->units; 446 units->meter = 1e-2; 447 units->second = 1e-7; 448 units->kilogram = 1e-12; 449 450 PetscOptionsBegin(comm, NULL, "Scaled units options", ""); 451 { 452 PetscCall(PetscOptionsReal("-units_meter", "1 meter in scaled length units", "", units->meter, &units->meter, NULL)); 453 PetscCall(PetscOptionsReal("-units_second", "1 second in scaled time units", "", units->second, &units->second, NULL)); 454 PetscCall(PetscOptionsReal("-units_kilogram", "1 kilogram in scaled mass units", "", units->kilogram, &units->kilogram, NULL)); 455 } 456 PetscOptionsEnd(); 457 units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second)); 458 units->year = 31556926. * units->second; /* seconds per year */ 459 460 thi->Lx = 10.e3; 461 thi->Ly = 10.e3; 462 thi->Lz = 1000; 463 thi->dirichlet_scale = 1; 464 thi->verbose = PETSC_FALSE; 465 466 PetscOptionsBegin(comm, NULL, "Toy Hydrostatic Ice options", ""); 467 { 468 QuadratureType quad = QUAD_GAUSS; 469 char homexp[] = "A"; 470 char mtype[256] = MATSBAIJ; 471 PetscReal L, m = 1.0; 472 PetscBool flg; 473 L = thi->Lx; 474 PetscCall(PetscOptionsReal("-thi_L", "Domain size (m)", "", L, &L, &flg)); 475 if (flg) thi->Lx = thi->Ly = L; 476 PetscCall(PetscOptionsReal("-thi_Lx", "X Domain size (m)", "", thi->Lx, &thi->Lx, NULL)); 477 PetscCall(PetscOptionsReal("-thi_Ly", "Y Domain size (m)", "", thi->Ly, &thi->Ly, NULL)); 478 PetscCall(PetscOptionsReal("-thi_Lz", "Z Domain size (m)", "", thi->Lz, &thi->Lz, NULL)); 479 PetscCall(PetscOptionsString("-thi_hom", "ISMIP-HOM experiment (A or C)", "", homexp, homexp, sizeof(homexp), NULL)); 480 switch (homexp[0] = toupper(homexp[0])) { 481 case 'A': 482 thi->initialize = THIInitialize_HOM_A; 483 thi->no_slip = PETSC_TRUE; 484 thi->alpha = 0.5; 485 break; 486 case 'C': 487 thi->initialize = THIInitialize_HOM_C; 488 thi->no_slip = PETSC_FALSE; 489 thi->alpha = 0.1; 490 break; 491 case 'X': 492 thi->initialize = THIInitialize_HOM_X; 493 thi->no_slip = PETSC_FALSE; 494 thi->alpha = 0.3; 495 break; 496 case 'Y': 497 thi->initialize = THIInitialize_HOM_Y; 498 thi->no_slip = PETSC_FALSE; 499 thi->alpha = 0.5; 500 break; 501 case 'Z': 502 thi->initialize = THIInitialize_HOM_Z; 503 thi->no_slip = PETSC_FALSE; 504 thi->alpha = 0.5; 505 break; 506 default: 507 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "HOM experiment '%c' not implemented", homexp[0]); 508 } 509 PetscCall(PetscOptionsEnum("-thi_quadrature", "Quadrature to use for 3D elements", "", QuadratureTypes, (PetscEnum)quad, (PetscEnum *)&quad, NULL)); 510 switch (quad) { 511 case QUAD_GAUSS: 512 HexQInterp = HexQInterp_Gauss; 513 HexQDeriv = HexQDeriv_Gauss; 514 break; 515 case QUAD_LOBATTO: 516 HexQInterp = HexQInterp_Lobatto; 517 HexQDeriv = HexQDeriv_Lobatto; 518 break; 519 } 520 PetscCall(PetscOptionsReal("-thi_alpha", "Bed angle (degrees)", "", thi->alpha, &thi->alpha, NULL)); 521 522 thi->friction.refvel = 100.; 523 thi->friction.epsvel = 1.; 524 525 PetscCall(PetscOptionsReal("-thi_friction_refvel", "Reference velocity for sliding", "", thi->friction.refvel, &thi->friction.refvel, NULL)); 526 PetscCall(PetscOptionsReal("-thi_friction_epsvel", "Regularization velocity for sliding", "", thi->friction.epsvel, &thi->friction.epsvel, NULL)); 527 PetscCall(PetscOptionsReal("-thi_friction_m", "Friction exponent, 0=Coulomb, 1=Navier", "", m, &m, NULL)); 528 529 thi->friction.exponent = (m - 1) / 2; 530 531 PetscCall(PetscOptionsReal("-thi_dirichlet_scale", "Scale Dirichlet boundary conditions by this factor", "", thi->dirichlet_scale, &thi->dirichlet_scale, NULL)); 532 PetscCall(PetscOptionsReal("-thi_ssa_friction_scale", "Scale slip boundary conditions by this factor in SSA (2D) assembly", "", thi->ssa_friction_scale, &thi->ssa_friction_scale, NULL)); 533 PetscCall(PetscOptionsBool("-thi_coarse2d", "Use a 2D coarse space corresponding to SSA", "", thi->coarse2d, &thi->coarse2d, NULL)); 534 PetscCall(PetscOptionsBool("-thi_tridiagonal", "Assemble a tridiagonal system (column coupling only) on the finest level", "", thi->tridiagonal, &thi->tridiagonal, NULL)); 535 PetscCall(PetscOptionsFList("-thi_mat_type", "Matrix type", "MatSetType", MatList, mtype, (char *)mtype, sizeof(mtype), NULL)); 536 PetscCall(PetscStrallocpy(mtype, (char **)&thi->mattype)); 537 PetscCall(PetscOptionsBool("-thi_verbose", "Enable verbose output (like matrix sizes and statistics)", "", thi->verbose, &thi->verbose, NULL)); 538 } 539 PetscOptionsEnd(); 540 541 /* dimensionalize */ 542 thi->Lx *= units->meter; 543 thi->Ly *= units->meter; 544 thi->Lz *= units->meter; 545 thi->alpha *= PETSC_PI / 180; 546 547 PRangeClear(&thi->eta); 548 PRangeClear(&thi->beta2); 549 550 { 551 PetscReal u = 1000 * units->meter / (3e7 * units->second), gradu = u / (100 * units->meter), eta, deta, rho = 910 * units->kilogram / PetscPowReal(units->meter, 3), grav = 9.81 * units->meter / PetscSqr(units->second), 552 driving = rho * grav * PetscSinReal(thi->alpha) * 1000 * units->meter; 553 THIViscosity(thi, 0.5 * gradu * gradu, &eta, &deta); 554 thi->rhog = rho * grav; 555 if (thi->verbose) { 556 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n", (double)units->meter, (double)units->second, (double)units->kilogram, (double)units->Pascal)); 557 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n", (double)thi->Lx, (double)thi->Ly, (double)thi->Lz, (double)(rho * grav * 1e3 * units->meter), (double)driving)); 558 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)u, (double)gradu, (double)eta, (double)(2 * eta * gradu), (double)(2 * eta * gradu / driving))); 559 THIViscosity(thi, 0.5 * PetscSqr(1e-3 * gradu), &eta, &deta); 560 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n", (double)(1e-3 * u), (double)(1e-3 * gradu), (double)eta, (double)(2 * eta * 1e-3 * gradu), (double)(2 * eta * 1e-3 * gradu / driving))); 561 } 562 } 563 564 *inthi = thi; 565 PetscFunctionReturn(PETSC_SUCCESS); 566 } 567 568 static PetscErrorCode THIInitializePrm(THI thi, DM da2prm, Vec prm) 569 { 570 PrmNode **p; 571 PetscInt i, j, xs, xm, ys, ym, mx, my; 572 573 PetscFunctionBeginUser; 574 PetscCall(DMDAGetGhostCorners(da2prm, &ys, &xs, 0, &ym, &xm, 0)); 575 PetscCall(DMDAGetInfo(da2prm, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 576 PetscCall(DMDAVecGetArray(da2prm, prm, &p)); 577 for (i = xs; i < xs + xm; i++) { 578 for (j = ys; j < ys + ym; j++) { 579 PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my; 580 thi->initialize(thi, xx, yy, &p[i][j]); 581 } 582 } 583 PetscCall(DMDAVecRestoreArray(da2prm, prm, &p)); 584 PetscFunctionReturn(PETSC_SUCCESS); 585 } 586 587 static PetscErrorCode THISetUpDM(THI thi, DM dm) 588 { 589 PetscInt refinelevel, coarsenlevel, level, dim, Mx, My, Mz, mx, my, s; 590 DMDAStencilType st; 591 DM da2prm; 592 Vec X; 593 594 PetscFunctionBeginUser; 595 PetscCall(DMDAGetInfo(dm, &dim, &Mz, &My, &Mx, 0, &my, &mx, 0, &s, 0, 0, 0, &st)); 596 if (dim == 2) PetscCall(DMDAGetInfo(dm, &dim, &My, &Mx, 0, &my, &mx, 0, 0, &s, 0, 0, 0, &st)); 597 PetscCall(DMGetRefineLevel(dm, &refinelevel)); 598 PetscCall(DMGetCoarsenLevel(dm, &coarsenlevel)); 599 level = refinelevel - coarsenlevel; 600 PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi), DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, My, Mx, my, mx, sizeof(PrmNode) / sizeof(PetscScalar), s, 0, 0, &da2prm)); 601 PetscCall(DMSetUp(da2prm)); 602 PetscCall(DMCreateLocalVector(da2prm, &X)); 603 { 604 PetscReal Lx = thi->Lx / thi->units->meter, Ly = thi->Ly / thi->units->meter, Lz = thi->Lz / thi->units->meter; 605 if (dim == 2) { 606 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g\n", level, (double)Lx, (double)Ly, Mx, My, Mx * My, (double)(Lx / Mx), (double)(Ly / My))); 607 } else { 608 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi), "Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g x %g\n", level, (double)Lx, (double)Ly, (double)Lz, Mx, My, Mz, Mx * My * Mz, (double)(Lx / Mx), (double)(Ly / My), (double)(1000. / (Mz - 1)))); 609 } 610 } 611 PetscCall(THIInitializePrm(thi, da2prm, X)); 612 if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */ 613 PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi)); 614 } 615 if (thi->coarse2d) PetscCall(DMDASNESSetJacobianLocal(dm, (DMDASNESJacobianFn *)THIJacobianLocal_2D, thi)); 616 PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm", (PetscObject)da2prm)); 617 PetscCall(PetscObjectCompose((PetscObject)dm, "DMDA2Prm_Vec", (PetscObject)X)); 618 PetscCall(DMDestroy(&da2prm)); 619 PetscCall(VecDestroy(&X)); 620 PetscFunctionReturn(PETSC_SUCCESS); 621 } 622 623 static PetscErrorCode DMCoarsenHook_THI(DM dmf, DM dmc, void *ctx) 624 { 625 THI thi = (THI)ctx; 626 PetscInt rlevel, clevel; 627 628 PetscFunctionBeginUser; 629 PetscCall(THISetUpDM(thi, dmc)); 630 PetscCall(DMGetRefineLevel(dmc, &rlevel)); 631 PetscCall(DMGetCoarsenLevel(dmc, &clevel)); 632 if (rlevel - clevel == 0) PetscCall(DMSetMatType(dmc, MATAIJ)); 633 PetscCall(DMCoarsenHookAdd(dmc, DMCoarsenHook_THI, NULL, thi)); 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 static PetscErrorCode DMRefineHook_THI(DM dmc, DM dmf, void *ctx) 638 { 639 THI thi = (THI)ctx; 640 641 PetscFunctionBeginUser; 642 PetscCall(THISetUpDM(thi, dmf)); 643 PetscCall(DMSetMatType(dmf, thi->mattype)); 644 PetscCall(DMRefineHookAdd(dmf, DMRefineHook_THI, NULL, thi)); 645 /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */ 646 PetscCall(DMCoarsenHookAdd(dmf, DMCoarsenHook_THI, NULL, thi)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode THIDAGetPrm(DM da, PrmNode ***prm) 651 { 652 DM da2prm; 653 Vec X; 654 655 PetscFunctionBeginUser; 656 PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm)); 657 PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA"); 658 PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X)); 659 PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA"); 660 PetscCall(DMDAVecGetArray(da2prm, X, prm)); 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode THIDARestorePrm(DM da, PrmNode ***prm) 665 { 666 DM da2prm; 667 Vec X; 668 669 PetscFunctionBeginUser; 670 PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm", (PetscObject *)&da2prm)); 671 PetscCheck(da2prm, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm composed with given DMDA"); 672 PetscCall(PetscObjectQuery((PetscObject)da, "DMDA2Prm_Vec", (PetscObject *)&X)); 673 PetscCheck(X, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "No DMDA2Prm_Vec composed with given DMDA"); 674 PetscCall(DMDAVecRestoreArray(da2prm, X, prm)); 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 static PetscErrorCode THIInitial(SNES snes, Vec X, void *ctx) 679 { 680 THI thi; 681 PetscInt i, j, k, xs, xm, ys, ym, zs, zm, mx, my; 682 PetscReal hx, hy; 683 PrmNode **prm; 684 Node ***x; 685 DM da; 686 687 PetscFunctionBeginUser; 688 PetscCall(SNESGetDM(snes, &da)); 689 PetscCall(DMGetApplicationContext(da, &thi)); 690 PetscCall(DMDAGetInfo(da, 0, 0, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 691 PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm)); 692 PetscCall(DMDAVecGetArray(da, X, &x)); 693 PetscCall(THIDAGetPrm(da, &prm)); 694 hx = thi->Lx / mx; 695 hy = thi->Ly / my; 696 for (i = xs; i < xs + xm; i++) { 697 for (j = ys; j < ys + ym; j++) { 698 for (k = zs; k < zs + zm; k++) { 699 const PetscScalar zm1 = zm - 1, drivingx = thi->rhog * (prm[i + 1][j].b + prm[i + 1][j].h - prm[i - 1][j].b - prm[i - 1][j].h) / (2 * hx), drivingy = thi->rhog * (prm[i][j + 1].b + prm[i][j + 1].h - prm[i][j - 1].b - prm[i][j - 1].h) / (2 * hy); 700 x[i][j][k].u = 0. * drivingx * prm[i][j].h * (PetscScalar)k / zm1; 701 x[i][j][k].v = 0. * drivingy * prm[i][j].h * (PetscScalar)k / zm1; 702 } 703 } 704 } 705 PetscCall(DMDAVecRestoreArray(da, X, &x)); 706 PetscCall(THIDARestorePrm(da, &prm)); 707 PetscFunctionReturn(PETSC_SUCCESS); 708 } 709 710 static void PointwiseNonlinearity(THI thi, const Node n[PETSC_RESTRICT], const PetscReal phi[PETSC_RESTRICT], PetscReal dphi[PETSC_RESTRICT][3], PetscScalar *PETSC_RESTRICT u, PetscScalar *PETSC_RESTRICT v, PetscScalar du[PETSC_RESTRICT], PetscScalar dv[PETSC_RESTRICT], PetscReal *eta, PetscReal *deta) 711 { 712 PetscInt l, ll; 713 PetscScalar gam; 714 715 du[0] = du[1] = du[2] = 0; 716 dv[0] = dv[1] = dv[2] = 0; 717 *u = 0; 718 *v = 0; 719 for (l = 0; l < 8; l++) { 720 *u += phi[l] * n[l].u; 721 *v += phi[l] * n[l].v; 722 for (ll = 0; ll < 3; ll++) { 723 du[ll] += dphi[l][ll] * n[l].u; 724 dv[ll] += dphi[l][ll] * n[l].v; 725 } 726 } 727 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]) + 0.25 * PetscSqr(du[2]) + 0.25 * PetscSqr(dv[2]); 728 THIViscosity(thi, PetscRealPart(gam), eta, deta); 729 } 730 731 static void PointwiseNonlinearity2D(THI thi, Node n[], PetscReal phi[], PetscReal dphi[4][2], PetscScalar *u, PetscScalar *v, PetscScalar du[], PetscScalar dv[], PetscReal *eta, PetscReal *deta) 732 { 733 PetscInt l, ll; 734 PetscScalar gam; 735 736 du[0] = du[1] = 0; 737 dv[0] = dv[1] = 0; 738 *u = 0; 739 *v = 0; 740 for (l = 0; l < 4; l++) { 741 *u += phi[l] * n[l].u; 742 *v += phi[l] * n[l].v; 743 for (ll = 0; ll < 2; ll++) { 744 du[ll] += dphi[l][ll] * n[l].u; 745 dv[ll] += dphi[l][ll] * n[l].v; 746 } 747 } 748 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0] * dv[1] + 0.25 * PetscSqr(du[1] + dv[0]); 749 THIViscosity(thi, PetscRealPart(gam), eta, deta); 750 } 751 752 static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info, Node ***x, Node ***f, THI thi) 753 { 754 PetscInt xs, ys, xm, ym, zm, i, j, k, q, l; 755 PetscReal hx, hy, etamin, etamax, beta2min, beta2max; 756 PrmNode **prm; 757 758 PetscFunctionBeginUser; 759 xs = info->zs; 760 ys = info->ys; 761 xm = info->zm; 762 ym = info->ym; 763 zm = info->xm; 764 hx = thi->Lx / info->mz; 765 hy = thi->Ly / info->my; 766 767 etamin = 1e100; 768 etamax = 0; 769 beta2min = 1e100; 770 beta2max = 0; 771 772 PetscCall(THIDAGetPrm(info->da, &prm)); 773 774 for (i = xs; i < xs + xm; i++) { 775 for (j = ys; j < ys + ym; j++) { 776 PrmNode pn[4]; 777 QuadExtract(prm, i, j, pn); 778 for (k = 0; k < zm - 1; k++) { 779 PetscInt ls = 0; 780 Node n[8], *fn[8]; 781 PetscReal zn[8], etabase = 0; 782 PrmHexGetZ(pn, k, zm, zn); 783 HexExtract(x, i, j, k, n); 784 HexExtractRef(f, i, j, k, fn); 785 if (thi->no_slip && k == 0) { 786 for (l = 0; l < 4; l++) n[l].u = n[l].v = 0; 787 /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */ 788 ls = 4; 789 } 790 for (q = 0; q < 8; q++) { 791 PetscReal dz[3], phi[8], dphi[8][3], jw, eta, deta; 792 PetscScalar du[3], dv[3], u, v; 793 HexGrad(HexQDeriv[q], zn, dz); 794 HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw); 795 PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta); 796 jw /= thi->rhog; /* scales residuals to be O(1) */ 797 if (q == 0) etabase = eta; 798 RangeUpdate(&etamin, &etamax, eta); 799 for (l = ls; l < 8; l++) { /* test functions */ 800 const PetscReal ds[2] = {-PetscSinReal(thi->alpha), 0}; 801 const PetscReal pp = phi[l], *dp = dphi[l]; 802 fn[l]->u += dp[0] * jw * eta * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * du[2] + pp * jw * thi->rhog * ds[0]; 803 fn[l]->v += dp[1] * jw * eta * (2. * du[0] + 4. * dv[1]) + dp[0] * jw * eta * (du[1] + dv[0]) + dp[2] * jw * eta * dv[2] + pp * jw * thi->rhog * ds[1]; 804 } 805 } 806 if (k == 0) { /* we are on a bottom face */ 807 if (thi->no_slip) { 808 /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary 809 * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature 810 * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the 811 * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in 812 * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after 813 * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the 814 * assembled matrix (see the similar block in THIJacobianLocal). 815 * 816 * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends 817 * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make 818 * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part, 819 * so the solution will exactly satisfy the boundary condition after the first linear iteration. 820 */ 821 const PetscReal hz = PetscRealPart(pn[0].h) / (zm - 1.); 822 const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx); 823 fn[0]->u = thi->dirichlet_scale * diagu * x[i][j][k].u; 824 fn[0]->v = thi->dirichlet_scale * diagv * x[i][j][k].v; 825 } else { /* Integrate over bottom face to apply boundary condition */ 826 for (q = 0; q < 4; q++) { 827 const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q]; 828 PetscScalar u = 0, v = 0, rbeta2 = 0; 829 PetscReal beta2, dbeta2; 830 for (l = 0; l < 4; l++) { 831 u += phi[l] * n[l].u; 832 v += phi[l] * n[l].v; 833 rbeta2 += phi[l] * pn[l].beta2; 834 } 835 THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2); 836 RangeUpdate(&beta2min, &beta2max, beta2); 837 for (l = 0; l < 4; l++) { 838 const PetscReal pp = phi[l]; 839 fn[ls + l]->u += pp * jw * beta2 * u; 840 fn[ls + l]->v += pp * jw * beta2 * v; 841 } 842 } 843 } 844 } 845 } 846 } 847 } 848 849 PetscCall(THIDARestorePrm(info->da, &prm)); 850 851 PetscCall(PRangeMinMax(&thi->eta, etamin, etamax)); 852 PetscCall(PRangeMinMax(&thi->beta2, beta2min, beta2max)); 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 static PetscErrorCode THIMatrixStatistics(THI thi, Mat B, PetscViewer viewer) 857 { 858 PetscReal nrm; 859 PetscInt m; 860 PetscMPIInt rank; 861 862 PetscFunctionBeginUser; 863 PetscCall(MatNorm(B, NORM_FROBENIUS, &nrm)); 864 PetscCall(MatGetSize(B, &m, 0)); 865 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &rank)); 866 if (rank == 0) { 867 PetscScalar val0, val2; 868 PetscCall(MatGetValue(B, 0, 0, &val0)); 869 PetscCall(MatGetValue(B, 2, 2, &val2)); 870 PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix dim %" PetscInt_FMT " norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n", m, (double)nrm, (double)PetscRealPart(val0), (double)PetscRealPart(val2), 871 (double)thi->eta.cmin, (double)thi->eta.cmax, (double)thi->beta2.cmin, (double)thi->beta2.cmax)); 872 } 873 PetscFunctionReturn(PETSC_SUCCESS); 874 } 875 876 static PetscErrorCode THISurfaceStatistics(DM da, Vec X, PetscReal *min, PetscReal *max, PetscReal *mean) 877 { 878 Node ***x; 879 PetscInt i, j, xs, ys, zs, xm, ym, zm, mx, my, mz; 880 PetscReal umin = 1e100, umax = -1e100; 881 PetscScalar usum = 0.0, gusum; 882 883 PetscFunctionBeginUser; 884 *min = *max = *mean = 0; 885 PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 886 PetscCall(DMDAGetCorners(da, &zs, &ys, &xs, &zm, &ym, &xm)); 887 PetscCheck(zs == 0 && zm == mz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected decomposition"); 888 PetscCall(DMDAVecGetArray(da, X, &x)); 889 for (i = xs; i < xs + xm; i++) { 890 for (j = ys; j < ys + ym; j++) { 891 PetscReal u = PetscRealPart(x[i][j][zm - 1].u); 892 RangeUpdate(&umin, &umax, u); 893 usum += u; 894 } 895 } 896 PetscCall(DMDAVecRestoreArray(da, X, &x)); 897 PetscCall(MPIU_Allreduce(&umin, min, 1, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)da))); 898 PetscCall(MPIU_Allreduce(&umax, max, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)da))); 899 PetscCall(MPIU_Allreduce(&usum, &gusum, 1, MPIU_SCALAR, MPIU_SUM, PetscObjectComm((PetscObject)da))); 900 *mean = PetscRealPart(gusum) / (mx * my); 901 PetscFunctionReturn(PETSC_SUCCESS); 902 } 903 904 static PetscErrorCode THISolveStatistics(THI thi, SNES snes, PetscInt coarsened, const char name[]) 905 { 906 MPI_Comm comm; 907 Vec X; 908 DM dm; 909 910 PetscFunctionBeginUser; 911 PetscCall(PetscObjectGetComm((PetscObject)thi, &comm)); 912 PetscCall(SNESGetSolution(snes, &X)); 913 PetscCall(SNESGetDM(snes, &dm)); 914 PetscCall(PetscPrintf(comm, "Solution statistics after solve: %s\n", name)); 915 { 916 PetscInt its, lits; 917 SNESConvergedReason reason; 918 PetscCall(SNESGetIterationNumber(snes, &its)); 919 PetscCall(SNESGetConvergedReason(snes, &reason)); 920 PetscCall(SNESGetLinearSolveIterations(snes, &lits)); 921 PetscCall(PetscPrintf(comm, "%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n", SNESConvergedReasons[reason], its, lits)); 922 } 923 { 924 PetscReal nrm2, tmin[3] = {1e100, 1e100, 1e100}, tmax[3] = {-1e100, -1e100, -1e100}, min[3], max[3]; 925 PetscInt i, j, m; 926 const PetscScalar *x; 927 PetscCall(VecNorm(X, NORM_2, &nrm2)); 928 PetscCall(VecGetLocalSize(X, &m)); 929 PetscCall(VecGetArrayRead(X, &x)); 930 for (i = 0; i < m; i += 2) { 931 PetscReal u = PetscRealPart(x[i]), v = PetscRealPart(x[i + 1]), c = PetscSqrtReal(u * u + v * v); 932 tmin[0] = PetscMin(u, tmin[0]); 933 tmin[1] = PetscMin(v, tmin[1]); 934 tmin[2] = PetscMin(c, tmin[2]); 935 tmax[0] = PetscMax(u, tmax[0]); 936 tmax[1] = PetscMax(v, tmax[1]); 937 tmax[2] = PetscMax(c, tmax[2]); 938 } 939 PetscCall(VecRestoreArrayRead(X, &x)); 940 PetscCall(MPIU_Allreduce(tmin, min, 3, MPIU_REAL, MPIU_MIN, PetscObjectComm((PetscObject)thi))); 941 PetscCall(MPIU_Allreduce(tmax, max, 3, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)thi))); 942 /* Dimensionalize to meters/year */ 943 nrm2 *= thi->units->year / thi->units->meter; 944 for (j = 0; j < 3; j++) { 945 min[j] *= thi->units->year / thi->units->meter; 946 max[j] *= thi->units->year / thi->units->meter; 947 } 948 if (min[0] == 0.0) min[0] = 0.0; 949 PetscCall(PetscPrintf(comm, "|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n", (double)nrm2, (double)min[0], (double)max[0], (double)min[1], (double)max[1], (double)min[2], (double)max[2])); 950 { 951 PetscReal umin, umax, umean; 952 PetscCall(THISurfaceStatistics(dm, X, &umin, &umax, &umean)); 953 umin *= thi->units->year / thi->units->meter; 954 umax *= thi->units->year / thi->units->meter; 955 umean *= thi->units->year / thi->units->meter; 956 PetscCall(PetscPrintf(comm, "Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n", (double)umin, (double)umax, (double)umean)); 957 } 958 /* These values stay nondimensional */ 959 PetscCall(PetscPrintf(comm, "Global eta range %g to %g converged range %g to %g\n", (double)thi->eta.min, (double)thi->eta.max, (double)thi->eta.cmin, (double)thi->eta.cmax)); 960 PetscCall(PetscPrintf(comm, "Global beta2 range %g to %g converged range %g to %g\n", (double)thi->beta2.min, (double)thi->beta2.max, (double)thi->beta2.cmin, (double)thi->beta2.cmax)); 961 } 962 PetscFunctionReturn(PETSC_SUCCESS); 963 } 964 965 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info, Node **x, Mat J, Mat B, THI thi) 966 { 967 PetscInt xs, ys, xm, ym, i, j, q, l, ll; 968 PetscReal hx, hy; 969 PrmNode **prm; 970 971 PetscFunctionBeginUser; 972 xs = info->ys; 973 ys = info->xs; 974 xm = info->ym; 975 ym = info->xm; 976 hx = thi->Lx / info->my; 977 hy = thi->Ly / info->mx; 978 979 PetscCall(MatZeroEntries(B)); 980 PetscCall(THIDAGetPrm(info->da, &prm)); 981 982 for (i = xs; i < xs + xm; i++) { 983 for (j = ys; j < ys + ym; j++) { 984 Node n[4]; 985 PrmNode pn[4]; 986 PetscScalar Ke[4 * 2][4 * 2]; 987 QuadExtract(prm, i, j, pn); 988 QuadExtract(x, i, j, n); 989 PetscCall(PetscMemzero(Ke, sizeof(Ke))); 990 for (q = 0; q < 4; q++) { 991 PetscReal phi[4], dphi[4][2], jw, eta, deta, beta2, dbeta2; 992 PetscScalar u, v, du[2], dv[2], h = 0, rbeta2 = 0; 993 for (l = 0; l < 4; l++) { 994 phi[l] = QuadQInterp[q][l]; 995 dphi[l][0] = QuadQDeriv[q][l][0] * 2. / hx; 996 dphi[l][1] = QuadQDeriv[q][l][1] * 2. / hy; 997 h += phi[l] * pn[l].h; 998 rbeta2 += phi[l] * pn[l].beta2; 999 } 1000 jw = 0.25 * hx * hy / thi->rhog; /* rhog is only scaling */ 1001 PointwiseNonlinearity2D(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta); 1002 THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2); 1003 for (l = 0; l < 4; l++) { 1004 const PetscReal pp = phi[l], *dp = dphi[l]; 1005 for (ll = 0; ll < 4; ll++) { 1006 const PetscReal ppl = phi[ll], *dpl = dphi[ll]; 1007 PetscScalar dgdu, dgdv; 1008 dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1]; 1009 dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0]; 1010 /* Picard part */ 1011 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale; 1012 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0]; 1013 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1]; 1014 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + pp * jw * (beta2 / h) * ppl * thi->ssa_friction_scale; 1015 /* extra Newton terms */ 1016 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * u * ppl * thi->ssa_friction_scale; 1017 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * u * v * ppl * thi->ssa_friction_scale; 1018 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * u * ppl * thi->ssa_friction_scale; 1019 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + pp * jw * (dbeta2 / h) * v * v * ppl * thi->ssa_friction_scale; 1020 } 1021 } 1022 } 1023 { 1024 const MatStencil rc[4] = { 1025 {0, i, j, 0}, 1026 {0, i + 1, j, 0}, 1027 {0, i + 1, j + 1, 0}, 1028 {0, i, j + 1, 0} 1029 }; 1030 PetscCall(MatSetValuesBlockedStencil(B, 4, rc, 4, rc, &Ke[0][0], ADD_VALUES)); 1031 } 1032 } 1033 } 1034 PetscCall(THIDARestorePrm(info->da, &prm)); 1035 1036 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1037 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1038 PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE)); 1039 if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 1043 static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info, Node ***x, Mat B, THI thi, THIAssemblyMode amode) 1044 { 1045 PetscInt xs, ys, xm, ym, zm, i, j, k, q, l, ll; 1046 PetscReal hx, hy; 1047 PrmNode **prm; 1048 1049 PetscFunctionBeginUser; 1050 xs = info->zs; 1051 ys = info->ys; 1052 xm = info->zm; 1053 ym = info->ym; 1054 zm = info->xm; 1055 hx = thi->Lx / info->mz; 1056 hy = thi->Ly / info->my; 1057 1058 PetscCall(MatZeroEntries(B)); 1059 PetscCall(MatSetOption(B, MAT_SUBSET_OFF_PROC_ENTRIES, PETSC_TRUE)); 1060 PetscCall(THIDAGetPrm(info->da, &prm)); 1061 1062 for (i = xs; i < xs + xm; i++) { 1063 for (j = ys; j < ys + ym; j++) { 1064 PrmNode pn[4]; 1065 QuadExtract(prm, i, j, pn); 1066 for (k = 0; k < zm - 1; k++) { 1067 Node n[8]; 1068 PetscReal zn[8], etabase = 0; 1069 PetscScalar Ke[8 * 2][8 * 2]; 1070 PetscInt ls = 0; 1071 1072 PrmHexGetZ(pn, k, zm, zn); 1073 HexExtract(x, i, j, k, n); 1074 PetscCall(PetscMemzero(Ke, sizeof(Ke))); 1075 if (thi->no_slip && k == 0) { 1076 for (l = 0; l < 4; l++) n[l].u = n[l].v = 0; 1077 ls = 4; 1078 } 1079 for (q = 0; q < 8; q++) { 1080 PetscReal dz[3], phi[8], dphi[8][3], jw, eta, deta; 1081 PetscScalar du[3], dv[3], u, v; 1082 HexGrad(HexQDeriv[q], zn, dz); 1083 HexComputeGeometry(q, hx, hy, dz, phi, dphi, &jw); 1084 PointwiseNonlinearity(thi, n, phi, dphi, &u, &v, du, dv, &eta, &deta); 1085 jw /= thi->rhog; /* residuals are scaled by this factor */ 1086 if (q == 0) etabase = eta; 1087 for (l = ls; l < 8; l++) { /* test functions */ 1088 const PetscReal *PETSC_RESTRICT dp = dphi[l]; 1089 #if USE_SSE2_KERNELS 1090 /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */ 1091 __m128d p4 = _mm_set1_pd(4), p2 = _mm_set1_pd(2), p05 = _mm_set1_pd(0.5), p42 = _mm_setr_pd(4, 2), p24 = _mm_shuffle_pd(p42, p42, _MM_SHUFFLE2(0, 1)), du0 = _mm_set1_pd(du[0]), du1 = _mm_set1_pd(du[1]), du2 = _mm_set1_pd(du[2]), dv0 = _mm_set1_pd(dv[0]), dv1 = _mm_set1_pd(dv[1]), dv2 = _mm_set1_pd(dv[2]), jweta = _mm_set1_pd(jw * eta), jwdeta = _mm_set1_pd(jw * deta), dp0 = _mm_set1_pd(dp[0]), dp1 = _mm_set1_pd(dp[1]), dp2 = _mm_set1_pd(dp[2]), dp0jweta = _mm_mul_pd(dp0, jweta), dp1jweta = _mm_mul_pd(dp1, jweta), dp2jweta = _mm_mul_pd(dp2, jweta), p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4, du0), _mm_mul_pd(p2, dv1)), /* 4 du0 + 2 dv1 */ 1092 p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4, dv1), _mm_mul_pd(p2, du0)), /* 4 dv1 + 2 du0 */ 1093 pdu2dv2 = _mm_unpacklo_pd(du2, dv2), /* [du2, dv2] */ 1094 du1pdv0 = _mm_add_pd(du1, dv0), /* du1 + dv0 */ 1095 t1 = _mm_mul_pd(dp0, p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */ 1096 t2 = _mm_mul_pd(dp1, p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */ 1097 1098 #endif 1099 #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */ 1100 for (ll = ls; ll < 8; ll++) { /* trial functions */ 1101 #else 1102 for (ll = l; ll < 8; ll++) { 1103 #endif 1104 const PetscReal *PETSC_RESTRICT dpl = dphi[ll]; 1105 if (amode == THIASSEMBLY_TRIDIAGONAL && (l - ll) % 4) continue; /* these entries would not be inserted */ 1106 #if !USE_SSE2_KERNELS 1107 /* The analytic Jacobian in nice, easy-to-read form */ 1108 { 1109 PetscScalar dgdu, dgdv; 1110 dgdu = 2. * du[0] * dpl[0] + dv[1] * dpl[0] + 0.5 * (du[1] + dv[0]) * dpl[1] + 0.5 * du[2] * dpl[2]; 1111 dgdv = 2. * dv[1] * dpl[1] + du[0] * dpl[1] + 0.5 * (du[1] + dv[0]) * dpl[0] + 0.5 * dv[2] * dpl[2]; 1112 /* Picard part */ 1113 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * eta * 4. * dpl[0] + dp[1] * jw * eta * dpl[1] + dp[2] * jw * eta * dpl[2]; 1114 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * eta * 2. * dpl[1] + dp[1] * jw * eta * dpl[0]; 1115 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * eta * 2. * dpl[0] + dp[0] * jw * eta * dpl[1]; 1116 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * eta * 4. * dpl[1] + dp[0] * jw * eta * dpl[0] + dp[2] * jw * eta * dpl[2]; 1117 /* extra Newton terms */ 1118 Ke[l * 2 + 0][ll * 2 + 0] += dp[0] * jw * deta * dgdu * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * du[2]; 1119 Ke[l * 2 + 0][ll * 2 + 1] += dp[0] * jw * deta * dgdv * (4. * du[0] + 2. * dv[1]) + dp[1] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * du[2]; 1120 Ke[l * 2 + 1][ll * 2 + 0] += dp[1] * jw * deta * dgdu * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdu * (du[1] + dv[0]) + dp[2] * jw * deta * dgdu * dv[2]; 1121 Ke[l * 2 + 1][ll * 2 + 1] += dp[1] * jw * deta * dgdv * (4. * dv[1] + 2. * du[0]) + dp[0] * jw * deta * dgdv * (du[1] + dv[0]) + dp[2] * jw * deta * dgdv * dv[2]; 1122 } 1123 #else 1124 /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed 1125 * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost 1126 * by 25 to 30 percent. */ 1127 { 1128 __m128d keu = _mm_loadu_pd(&Ke[l * 2 + 0][ll * 2 + 0]), kev = _mm_loadu_pd(&Ke[l * 2 + 1][ll * 2 + 0]), dpl01 = _mm_loadu_pd(&dpl[0]), dpl10 = _mm_shuffle_pd(dpl01, dpl01, _MM_SHUFFLE2(0, 1)), dpl2 = _mm_set_sd(dpl[2]), t0, t3, pdgduv; 1129 keu = _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta, p42), dpl01), _mm_add_pd(_mm_mul_pd(dp1jweta, dpl10), _mm_mul_pd(dp2jweta, dpl2)))); 1130 kev = _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta, p24), dpl01), _mm_add_pd(_mm_mul_pd(dp0jweta, dpl10), _mm_mul_pd(dp2jweta, _mm_shuffle_pd(dpl2, dpl2, _MM_SHUFFLE2(0, 1)))))); 1131 pdgduv = _mm_mul_pd(p05, _mm_add_pd(_mm_add_pd(_mm_mul_pd(p42, _mm_mul_pd(du0, dpl01)), _mm_mul_pd(p24, _mm_mul_pd(dv1, dpl01))), _mm_add_pd(_mm_mul_pd(du1pdv0, dpl10), _mm_mul_pd(pdu2dv2, _mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */ 1132 t0 = _mm_mul_pd(jwdeta, pdgduv); /* jw deta [dgdu, dgdv] */ 1133 t3 = _mm_mul_pd(t0, du1pdv0); /* t0 (du1 + dv0) */ 1134 _mm_storeu_pd(&Ke[l * 2 + 0][ll * 2 + 0], _mm_add_pd(keu, _mm_add_pd(_mm_mul_pd(t1, t0), _mm_add_pd(_mm_mul_pd(dp1, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, du2)))))); 1135 _mm_storeu_pd(&Ke[l * 2 + 1][ll * 2 + 0], _mm_add_pd(kev, _mm_add_pd(_mm_mul_pd(t2, t0), _mm_add_pd(_mm_mul_pd(dp0, t3), _mm_mul_pd(t0, _mm_mul_pd(dp2, dv2)))))); 1136 } 1137 #endif 1138 } 1139 } 1140 } 1141 if (k == 0) { /* on a bottom face */ 1142 if (thi->no_slip) { 1143 const PetscReal hz = PetscRealPart(pn[0].h) / (zm - 1); 1144 const PetscScalar diagu = 2 * etabase / thi->rhog * (hx * hy / hz + hx * hz / hy + 4 * hy * hz / hx), diagv = 2 * etabase / thi->rhog * (hx * hy / hz + 4 * hx * hz / hy + hy * hz / hx); 1145 Ke[0][0] = thi->dirichlet_scale * diagu; 1146 Ke[1][1] = thi->dirichlet_scale * diagv; 1147 } else { 1148 for (q = 0; q < 4; q++) { 1149 const PetscReal jw = 0.25 * hx * hy / thi->rhog, *phi = QuadQInterp[q]; 1150 PetscScalar u = 0, v = 0, rbeta2 = 0; 1151 PetscReal beta2, dbeta2; 1152 for (l = 0; l < 4; l++) { 1153 u += phi[l] * n[l].u; 1154 v += phi[l] * n[l].v; 1155 rbeta2 += phi[l] * pn[l].beta2; 1156 } 1157 THIFriction(thi, PetscRealPart(rbeta2), PetscRealPart(u * u + v * v) / 2, &beta2, &dbeta2); 1158 for (l = 0; l < 4; l++) { 1159 const PetscReal pp = phi[l]; 1160 for (ll = 0; ll < 4; ll++) { 1161 const PetscReal ppl = phi[ll]; 1162 Ke[l * 2 + 0][ll * 2 + 0] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * u * u * ppl; 1163 Ke[l * 2 + 0][ll * 2 + 1] += pp * jw * dbeta2 * u * v * ppl; 1164 Ke[l * 2 + 1][ll * 2 + 0] += pp * jw * dbeta2 * v * u * ppl; 1165 Ke[l * 2 + 1][ll * 2 + 1] += pp * jw * beta2 * ppl + pp * jw * dbeta2 * v * v * ppl; 1166 } 1167 } 1168 } 1169 } 1170 } 1171 { 1172 const MatStencil rc[8] = { 1173 {i, j, k, 0}, 1174 {i + 1, j, k, 0}, 1175 {i + 1, j + 1, k, 0}, 1176 {i, j + 1, k, 0}, 1177 {i, j, k + 1, 0}, 1178 {i + 1, j, k + 1, 0}, 1179 {i + 1, j + 1, k + 1, 0}, 1180 {i, j + 1, k + 1, 0} 1181 }; 1182 if (amode == THIASSEMBLY_TRIDIAGONAL) { 1183 for (l = 0; l < 4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */ 1184 const PetscInt l4 = l + 4; 1185 const MatStencil rcl[2] = { 1186 {rc[l].k, rc[l].j, rc[l].i, 0}, 1187 {rc[l4].k, rc[l4].j, rc[l4].i, 0} 1188 }; 1189 #if defined COMPUTE_LOWER_TRIANGULAR 1190 const PetscScalar Kel[4][4] = { 1191 {Ke[2 * l + 0][2 * l + 0], Ke[2 * l + 0][2 * l + 1], Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 0][2 * l4 + 1] }, 1192 {Ke[2 * l + 1][2 * l + 0], Ke[2 * l + 1][2 * l + 1], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 1] }, 1193 {Ke[2 * l4 + 0][2 * l + 0], Ke[2 * l4 + 0][2 * l + 1], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]}, 1194 {Ke[2 * l4 + 1][2 * l + 0], Ke[2 * l4 + 1][2 * l + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]} 1195 }; 1196 #else 1197 /* Same as above except for the lower-left block */ 1198 const PetscScalar Kel[4][4] = { 1199 {Ke[2 * l + 0][2 * l + 0], Ke[2 * l + 0][2 * l + 1], Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 0][2 * l4 + 1] }, 1200 {Ke[2 * l + 1][2 * l + 0], Ke[2 * l + 1][2 * l + 1], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 1] }, 1201 {Ke[2 * l + 0][2 * l4 + 0], Ke[2 * l + 1][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 0], Ke[2 * l4 + 0][2 * l4 + 1]}, 1202 {Ke[2 * l + 0][2 * l4 + 1], Ke[2 * l + 1][2 * l4 + 1], Ke[2 * l4 + 1][2 * l4 + 0], Ke[2 * l4 + 1][2 * l4 + 1]} 1203 }; 1204 #endif 1205 PetscCall(MatSetValuesBlockedStencil(B, 2, rcl, 2, rcl, &Kel[0][0], ADD_VALUES)); 1206 } 1207 } else { 1208 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */ 1209 for (l = 0; l < 8; l++) { 1210 for (ll = l + 1; ll < 8; ll++) { 1211 Ke[ll * 2 + 0][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 0]; 1212 Ke[ll * 2 + 1][l * 2 + 0] = Ke[l * 2 + 0][ll * 2 + 1]; 1213 Ke[ll * 2 + 0][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 0]; 1214 Ke[ll * 2 + 1][l * 2 + 1] = Ke[l * 2 + 1][ll * 2 + 1]; 1215 } 1216 } 1217 #endif 1218 PetscCall(MatSetValuesBlockedStencil(B, 8, rc, 8, rc, &Ke[0][0], ADD_VALUES)); 1219 } 1220 } 1221 } 1222 } 1223 } 1224 PetscCall(THIDARestorePrm(info->da, &prm)); 1225 1226 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1227 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1228 PetscCall(MatSetOption(B, MAT_SYMMETRIC, PETSC_TRUE)); 1229 if (thi->verbose) PetscCall(THIMatrixStatistics(thi, B, PETSC_VIEWER_STDOUT_WORLD)); 1230 PetscFunctionReturn(PETSC_SUCCESS); 1231 } 1232 1233 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi) 1234 { 1235 PetscFunctionBeginUser; 1236 PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_FULL)); 1237 PetscFunctionReturn(PETSC_SUCCESS); 1238 } 1239 1240 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info, Node ***x, Mat A, Mat B, THI thi) 1241 { 1242 PetscFunctionBeginUser; 1243 PetscCall(THIJacobianLocal_3D(info, x, B, thi, THIASSEMBLY_TRIDIAGONAL)); 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 static PetscErrorCode DMRefineHierarchy_THI(DM dac0, PetscInt nlevels, DM hierarchy[]) 1248 { 1249 THI thi; 1250 PetscInt dim, M, N, m, n, s, dof; 1251 DM dac, daf; 1252 DMDAStencilType st; 1253 DM_DA *ddf, *ddc; 1254 1255 PetscFunctionBeginUser; 1256 PetscCall(PetscObjectQuery((PetscObject)dac0, "THI", (PetscObject *)&thi)); 1257 PetscCheck(thi, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot refine this DMDA, missing composed THI instance"); 1258 if (nlevels > 1) { 1259 PetscCall(DMRefineHierarchy(dac0, nlevels - 1, hierarchy)); 1260 dac = hierarchy[nlevels - 2]; 1261 } else { 1262 dac = dac0; 1263 } 1264 PetscCall(DMDAGetInfo(dac, &dim, &N, &M, 0, &n, &m, 0, &dof, &s, 0, 0, 0, &st)); 1265 PetscCheck(dim == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "This function can only refine 2D DMDAs"); 1266 1267 /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */ 1268 PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac), DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, st, thi->zlevels, N, M, 1, n, m, dof, s, NULL, NULL, NULL, &daf)); 1269 PetscCall(DMSetUp(daf)); 1270 1271 daf->ops->creatematrix = dac->ops->creatematrix; 1272 daf->ops->createinterpolation = dac->ops->createinterpolation; 1273 daf->ops->getcoloring = dac->ops->getcoloring; 1274 ddf = (DM_DA *)daf->data; 1275 ddc = (DM_DA *)dac->data; 1276 ddf->interptype = ddc->interptype; 1277 1278 PetscCall(DMDASetFieldName(daf, 0, "x-velocity")); 1279 PetscCall(DMDASetFieldName(daf, 1, "y-velocity")); 1280 1281 hierarchy[nlevels - 1] = daf; 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 1285 static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac, DM daf, Mat *A, Vec *scale) 1286 { 1287 PetscInt dim; 1288 1289 PetscFunctionBeginUser; 1290 PetscValidHeaderSpecific(dac, DM_CLASSID, 1); 1291 PetscValidHeaderSpecific(daf, DM_CLASSID, 2); 1292 PetscAssertPointer(A, 3); 1293 if (scale) PetscAssertPointer(scale, 4); 1294 PetscCall(DMDAGetInfo(daf, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 1295 if (dim == 2) { 1296 /* We are in the 2D problem and use normal DMDA interpolation */ 1297 PetscCall(DMCreateInterpolation(dac, daf, A, scale)); 1298 } else { 1299 PetscInt i, j, k, xs, ys, zs, xm, ym, zm, mx, my, mz, rstart, cstart; 1300 Mat B; 1301 1302 PetscCall(DMDAGetInfo(daf, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 1303 PetscCall(DMDAGetCorners(daf, &zs, &ys, &xs, &zm, &ym, &xm)); 1304 PetscCheck(!zs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "unexpected"); 1305 PetscCall(MatCreate(PetscObjectComm((PetscObject)daf), &B)); 1306 PetscCall(MatSetSizes(B, xm * ym * zm, xm * ym, mx * my * mz, mx * my)); 1307 1308 PetscCall(MatSetType(B, MATAIJ)); 1309 PetscCall(MatSeqAIJSetPreallocation(B, 1, NULL)); 1310 PetscCall(MatMPIAIJSetPreallocation(B, 1, NULL, 0, NULL)); 1311 PetscCall(MatGetOwnershipRange(B, &rstart, NULL)); 1312 PetscCall(MatGetOwnershipRangeColumn(B, &cstart, NULL)); 1313 for (i = xs; i < xs + xm; i++) { 1314 for (j = ys; j < ys + ym; j++) { 1315 for (k = zs; k < zs + zm; k++) { 1316 PetscInt i2 = i * ym + j, i3 = i2 * zm + k; 1317 PetscScalar val = ((k == 0 || k == mz - 1) ? 0.5 : 1.) / (mz - 1.); /* Integration using trapezoid rule */ 1318 PetscCall(MatSetValue(B, cstart + i3, rstart + i2, val, INSERT_VALUES)); 1319 } 1320 } 1321 } 1322 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1323 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1324 PetscCall(MatCreateMAIJ(B, sizeof(Node) / sizeof(PetscScalar), A)); 1325 PetscCall(MatDestroy(&B)); 1326 } 1327 PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da, Mat *J) 1331 { 1332 Mat A; 1333 PetscInt xm, ym, zm, dim, dof = 2, starts[3], dims[3]; 1334 ISLocalToGlobalMapping ltog; 1335 1336 PetscFunctionBeginUser; 1337 PetscCall(DMDAGetInfo(da, &dim, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 1338 PetscCheck(dim == 3, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Expected DMDA to be 3D"); 1339 PetscCall(DMDAGetCorners(da, 0, 0, 0, &zm, &ym, &xm)); 1340 PetscCall(DMGetLocalToGlobalMapping(da, <og)); 1341 PetscCall(MatCreate(PetscObjectComm((PetscObject)da), &A)); 1342 PetscCall(MatSetSizes(A, dof * xm * ym * zm, dof * xm * ym * zm, PETSC_DETERMINE, PETSC_DETERMINE)); 1343 PetscCall(MatSetType(A, da->mattype)); 1344 PetscCall(MatSetFromOptions(A)); 1345 PetscCall(MatSeqAIJSetPreallocation(A, 3 * 2, NULL)); 1346 PetscCall(MatMPIAIJSetPreallocation(A, 3 * 2, NULL, 0, NULL)); 1347 PetscCall(MatSeqBAIJSetPreallocation(A, 2, 3, NULL)); 1348 PetscCall(MatMPIBAIJSetPreallocation(A, 2, 3, NULL, 0, NULL)); 1349 PetscCall(MatSeqSBAIJSetPreallocation(A, 2, 2, NULL)); 1350 PetscCall(MatMPISBAIJSetPreallocation(A, 2, 2, NULL, 0, NULL)); 1351 PetscCall(MatSetLocalToGlobalMapping(A, ltog, ltog)); 1352 PetscCall(DMDAGetGhostCorners(da, &starts[0], &starts[1], &starts[2], &dims[0], &dims[1], &dims[2])); 1353 PetscCall(MatSetStencil(A, dim, dims, starts, dof)); 1354 *J = A; 1355 PetscFunctionReturn(PETSC_SUCCESS); 1356 } 1357 1358 static PetscErrorCode THIDAVecView_VTK_XML(THI thi, DM da, Vec X, const char filename[]) 1359 { 1360 const PetscInt dof = 2; 1361 Units units = thi->units; 1362 MPI_Comm comm; 1363 PetscViewer viewer; 1364 PetscMPIInt rank, size, tag, nn, nmax; 1365 PetscInt mx, my, mz, r, range[6]; 1366 const PetscScalar *x; 1367 1368 PetscFunctionBeginUser; 1369 PetscCall(PetscObjectGetComm((PetscObject)thi, &comm)); 1370 PetscCall(DMDAGetInfo(da, 0, &mz, &my, &mx, 0, 0, 0, 0, 0, 0, 0, 0, 0)); 1371 PetscCallMPI(MPI_Comm_size(comm, &size)); 1372 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1373 PetscCall(PetscViewerASCIIOpen(comm, filename, &viewer)); 1374 PetscCall(PetscViewerASCIIPrintf(viewer, "<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n")); 1375 PetscCall(PetscViewerASCIIPrintf(viewer, " <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n", 0, mz - 1, 0, my - 1, 0, mx - 1)); 1376 1377 PetscCall(DMDAGetCorners(da, range, range + 1, range + 2, range + 3, range + 4, range + 5)); 1378 PetscCall(PetscMPIIntCast(range[3] * range[4] * range[5] * dof, &nn)); 1379 PetscCallMPI(MPI_Reduce(&nn, &nmax, 1, MPI_INT, MPI_MAX, 0, comm)); 1380 tag = ((PetscObject)viewer)->tag; 1381 PetscCall(VecGetArrayRead(X, &x)); 1382 if (rank == 0) { 1383 PetscScalar *array; 1384 PetscCall(PetscMalloc1(nmax, &array)); 1385 for (r = 0; r < size; r++) { 1386 PetscInt i, j, k, xs, xm, ys, ym, zs, zm; 1387 const PetscScalar *ptr; 1388 MPI_Status status; 1389 if (r) PetscCallMPI(MPI_Recv(range, 6, MPIU_INT, r, tag, comm, MPI_STATUS_IGNORE)); 1390 zs = range[0]; 1391 ys = range[1]; 1392 xs = range[2]; 1393 zm = range[3]; 1394 ym = range[4]; 1395 xm = range[5]; 1396 PetscCheck(xm * ym * zm * dof <= nmax, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen"); 1397 if (r) { 1398 PetscCallMPI(MPI_Recv(array, nmax, MPIU_SCALAR, r, tag, comm, &status)); 1399 PetscCallMPI(MPI_Get_count(&status, MPIU_SCALAR, &nn)); 1400 PetscCheck(nn == xm * ym * zm * dof, PETSC_COMM_SELF, PETSC_ERR_PLIB, "should not happen"); 1401 ptr = array; 1402 } else ptr = x; 1403 PetscCall(PetscViewerASCIIPrintf(viewer, " <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n", zs, zs + zm - 1, ys, ys + ym - 1, xs, xs + xm - 1)); 1404 1405 PetscCall(PetscViewerASCIIPrintf(viewer, " <Points>\n")); 1406 PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1407 for (i = xs; i < xs + xm; i++) { 1408 for (j = ys; j < ys + ym; j++) { 1409 for (k = zs; k < zs + zm; k++) { 1410 PrmNode p; 1411 PetscReal xx = thi->Lx * i / mx, yy = thi->Ly * j / my, zz; 1412 thi->initialize(thi, xx, yy, &p); 1413 zz = PetscRealPart(p.b) + PetscRealPart(p.h) * k / (mz - 1); 1414 PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)xx, (double)yy, (double)zz)); 1415 } 1416 } 1417 } 1418 PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n")); 1419 PetscCall(PetscViewerASCIIPrintf(viewer, " </Points>\n")); 1420 1421 PetscCall(PetscViewerASCIIPrintf(viewer, " <PointData>\n")); 1422 PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1423 for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%f %f %f\n", (double)(PetscRealPart(ptr[i]) * units->year / units->meter), (double)(PetscRealPart(ptr[i + 1]) * units->year / units->meter), 0.0)); 1424 PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n")); 1425 1426 PetscCall(PetscViewerASCIIPrintf(viewer, " <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n")); 1427 for (i = 0; i < nn; i += dof) PetscCall(PetscViewerASCIIPrintf(viewer, "%" PetscInt_FMT "\n", r)); 1428 PetscCall(PetscViewerASCIIPrintf(viewer, " </DataArray>\n")); 1429 PetscCall(PetscViewerASCIIPrintf(viewer, " </PointData>\n")); 1430 1431 PetscCall(PetscViewerASCIIPrintf(viewer, " </Piece>\n")); 1432 } 1433 PetscCall(PetscFree(array)); 1434 } else { 1435 PetscCallMPI(MPI_Send(range, 6, MPIU_INT, 0, tag, comm)); 1436 PetscCallMPI(MPI_Send((PetscScalar *)x, nn, MPIU_SCALAR, 0, tag, comm)); 1437 } 1438 PetscCall(VecRestoreArrayRead(X, &x)); 1439 PetscCall(PetscViewerASCIIPrintf(viewer, " </StructuredGrid>\n")); 1440 PetscCall(PetscViewerASCIIPrintf(viewer, "</VTKFile>\n")); 1441 PetscCall(PetscViewerDestroy(&viewer)); 1442 PetscFunctionReturn(PETSC_SUCCESS); 1443 } 1444 1445 int main(int argc, char *argv[]) 1446 { 1447 MPI_Comm comm; 1448 THI thi; 1449 DM da; 1450 SNES snes; 1451 1452 PetscFunctionBeginUser; 1453 PetscCall(PetscInitialize(&argc, &argv, 0, help)); 1454 comm = PETSC_COMM_WORLD; 1455 1456 PetscCall(THICreate(comm, &thi)); 1457 { 1458 PetscInt M = 3, N = 3, P = 2; 1459 PetscOptionsBegin(comm, NULL, "Grid resolution options", ""); 1460 { 1461 PetscCall(PetscOptionsInt("-M", "Number of elements in x-direction on coarse level", "", M, &M, NULL)); 1462 N = M; 1463 PetscCall(PetscOptionsInt("-N", "Number of elements in y-direction on coarse level (if different from M)", "", N, &N, NULL)); 1464 if (thi->coarse2d) { 1465 PetscCall(PetscOptionsInt("-zlevels", "Number of elements in z-direction on fine level", "", thi->zlevels, &thi->zlevels, NULL)); 1466 } else { 1467 PetscCall(PetscOptionsInt("-P", "Number of elements in z-direction on coarse level", "", P, &P, NULL)); 1468 } 1469 } 1470 PetscOptionsEnd(); 1471 if (thi->coarse2d) { 1472 PetscCall(DMDACreate2d(comm, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, N, M, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, &da)); 1473 PetscCall(DMSetFromOptions(da)); 1474 PetscCall(DMSetUp(da)); 1475 da->ops->refinehierarchy = DMRefineHierarchy_THI; 1476 da->ops->createinterpolation = DMCreateInterpolation_DA_THI; 1477 1478 PetscCall(PetscObjectCompose((PetscObject)da, "THI", (PetscObject)thi)); 1479 } else { 1480 PetscCall(DMDACreate3d(comm, DM_BOUNDARY_NONE, DM_BOUNDARY_PERIODIC, DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX, P, N, M, 1, PETSC_DETERMINE, PETSC_DETERMINE, sizeof(Node) / sizeof(PetscScalar), 1, 0, 0, 0, &da)); 1481 PetscCall(DMSetFromOptions(da)); 1482 PetscCall(DMSetUp(da)); 1483 } 1484 PetscCall(DMDASetFieldName(da, 0, "x-velocity")); 1485 PetscCall(DMDASetFieldName(da, 1, "y-velocity")); 1486 } 1487 PetscCall(THISetUpDM(thi, da)); 1488 if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal; 1489 1490 { /* Set the fine level matrix type if -da_refine */ 1491 PetscInt rlevel, clevel; 1492 PetscCall(DMGetRefineLevel(da, &rlevel)); 1493 PetscCall(DMGetCoarsenLevel(da, &clevel)); 1494 if (rlevel - clevel > 0) PetscCall(DMSetMatType(da, thi->mattype)); 1495 } 1496 1497 PetscCall(DMDASNESSetFunctionLocal(da, ADD_VALUES, (DMDASNESFunctionFn *)THIFunctionLocal, thi)); 1498 if (thi->tridiagonal) { 1499 PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Tridiagonal, thi)); 1500 } else { 1501 PetscCall(DMDASNESSetJacobianLocal(da, (DMDASNESJacobianFn *)THIJacobianLocal_3D_Full, thi)); 1502 } 1503 PetscCall(DMCoarsenHookAdd(da, DMCoarsenHook_THI, NULL, thi)); 1504 PetscCall(DMRefineHookAdd(da, DMRefineHook_THI, NULL, thi)); 1505 1506 PetscCall(DMSetApplicationContext(da, thi)); 1507 1508 PetscCall(SNESCreate(comm, &snes)); 1509 PetscCall(SNESSetDM(snes, da)); 1510 PetscCall(DMDestroy(&da)); 1511 PetscCall(SNESSetComputeInitialGuess(snes, THIInitial, NULL)); 1512 PetscCall(SNESSetFromOptions(snes)); 1513 1514 PetscCall(SNESSolve(snes, NULL, NULL)); 1515 1516 PetscCall(THISolveStatistics(thi, snes, 0, "Full")); 1517 1518 { 1519 PetscBool flg; 1520 char filename[PETSC_MAX_PATH_LEN] = ""; 1521 PetscCall(PetscOptionsGetString(NULL, NULL, "-o", filename, sizeof(filename), &flg)); 1522 if (flg) { 1523 Vec X; 1524 DM dm; 1525 PetscCall(SNESGetSolution(snes, &X)); 1526 PetscCall(SNESGetDM(snes, &dm)); 1527 PetscCall(THIDAVecView_VTK_XML(thi, dm, X, filename)); 1528 } 1529 } 1530 1531 PetscCall(DMDestroy(&da)); 1532 PetscCall(SNESDestroy(&snes)); 1533 PetscCall(THIDestroy(&thi)); 1534 PetscCall(PetscFinalize()); 1535 return 0; 1536 } 1537 1538 /*TEST 1539 1540 build: 1541 requires: !single 1542 1543 test: 1544 args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc 1545 1546 test: 1547 suffix: 2 1548 nsize: 2 1549 args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1 1550 1551 test: 1552 suffix: 3 1553 nsize: 3 1554 args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current 1555 1556 test: 1557 suffix: 4 1558 nsize: 6 1559 args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1 1560 1561 test: 1562 suffix: 5 1563 nsize: 6 1564 args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}} 1565 1566 TEST*/ 1567