1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\ 2 \n\ 3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\ 4 using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\ 5 to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\ 6 boundary conditions in the x- and y-directions.\n\ 7 \n\ 8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\ 9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\ 10 \n\ 11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\ 12 \n\n"; 13 14 /* 15 The equations for horizontal velocity (u,v) are 16 17 - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0 18 - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0 19 20 where 21 22 eta = B/2 (epsilon + gamma)^((p-2)/2) 23 24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B, 25 written in terms of the second invariant 26 27 gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2 28 29 The surface boundary conditions are the natural conditions. The basal boundary conditions 30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2. 31 32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1). 33 34 The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the 35 map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through 36 the Jacobian of the coordinate transformation from a reference element to the physical element. 37 38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed 39 specially so that columns are never distributed, and are always contiguous in memory. 40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation, 41 and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to 42 use compatible domain decomposition relative to the 3D DMDAs. 43 44 There are two compile-time options: 45 46 NO_SSE2: 47 If the host supports SSE2, we use integration code that has been vectorized with SSE2 48 intrinsics, unless this macro is defined. The intrinsics speed up integration by about 49 30% on my architecture (P8700, gcc-4.5 snapshot). 50 51 COMPUTE_LOWER_TRIANGULAR: 52 The element matrices we assemble are lower-triangular so it is not necessary to compute 53 all entries explicitly. If this macro is defined, the lower-triangular entries are 54 computed explicitly. 55 56 */ 57 58 #if defined(PETSC_APPLE_FRAMEWORK) 59 #import <PETSc/petscsnes.h> 60 #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 61 #else 62 63 #include <petscsnes.h> 64 #include <petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 65 #endif 66 #include <ctype.h> /* toupper() */ 67 68 #if defined(__cplusplus) || defined (PETSC_HAVE_WINDOWS_COMPILERS) || defined (__PGI) 69 /* c++ cannot handle [_restrict_] notation like C does */ 70 #undef PETSC_RESTRICT 71 #define PETSC_RESTRICT 72 #endif 73 74 #if defined __SSE2__ 75 # include <emmintrin.h> 76 #endif 77 78 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */ 79 #if !defined NO_SSE2 \ 80 && !defined PETSC_USE_COMPLEX \ 81 && !defined PETSC_USE_REAL_SINGLE \ 82 && !defined PETSC_USE_REAL___FLOAT128 \ 83 && !defined PETSC_USE_REAL___FP16 \ 84 && defined __SSE2__ 85 #define USE_SSE2_KERNELS 1 86 #else 87 #define USE_SSE2_KERNELS 0 88 #endif 89 90 static PetscClassId THI_CLASSID; 91 92 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType; 93 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0}; 94 PETSC_UNUSED static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1}; 95 PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573}; 96 #define G 0.57735026918962573 97 #define H (0.5*(1.+G)) 98 #define L (0.5*(1.-G)) 99 #define M (-0.5) 100 #define P (0.5) 101 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */ 102 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0}, 103 {0,H,0,0,0,L,0,0}, 104 {0,0,H,0,0,0,L,0}, 105 {0,0,0,H,0,0,0,L}, 106 {L,0,0,0,H,0,0,0}, 107 {0,L,0,0,0,H,0,0}, 108 {0,0,L,0,0,0,H,0}, 109 {0,0,0,L,0,0,0,H}}; 110 static const PetscReal HexQDeriv_Lobatto[8][8][3] = { 111 {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} }, 112 {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} }, 113 {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} }, 114 {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}}, 115 {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} }, 116 {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} }, 117 {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} }, 118 {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}}; 119 /* Stanndard Gauss */ 120 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L}, 121 {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L}, 122 {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L}, 123 {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L}, 124 {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H}, 125 {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H}, 126 {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H}, 127 {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}}; 128 static const PetscReal HexQDeriv_Gauss[8][8][3] = { 129 {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}}, 130 {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}}, 131 {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}}, 132 {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}}, 133 {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}}, 134 {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}}, 135 {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}}, 136 {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}}; 137 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3]; 138 /* Standard 2x2 Gauss quadrature for the bottom layer. */ 139 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L}, 140 {L*H,H*H,H*L,L*L}, 141 {L*L,H*L,H*H,L*H}, 142 {H*L,L*L,L*H,H*H}}; 143 static const PetscReal QuadQDeriv[4][4][2] = { 144 {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}}, 145 {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}}, 146 {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}}, 147 {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}}; 148 #undef G 149 #undef H 150 #undef L 151 #undef M 152 #undef P 153 154 #define HexExtract(x,i,j,k,n) do { \ 155 (n)[0] = (x)[i][j][k]; \ 156 (n)[1] = (x)[i+1][j][k]; \ 157 (n)[2] = (x)[i+1][j+1][k]; \ 158 (n)[3] = (x)[i][j+1][k]; \ 159 (n)[4] = (x)[i][j][k+1]; \ 160 (n)[5] = (x)[i+1][j][k+1]; \ 161 (n)[6] = (x)[i+1][j+1][k+1]; \ 162 (n)[7] = (x)[i][j+1][k+1]; \ 163 } while (0) 164 165 #define HexExtractRef(x,i,j,k,n) do { \ 166 (n)[0] = &(x)[i][j][k]; \ 167 (n)[1] = &(x)[i+1][j][k]; \ 168 (n)[2] = &(x)[i+1][j+1][k]; \ 169 (n)[3] = &(x)[i][j+1][k]; \ 170 (n)[4] = &(x)[i][j][k+1]; \ 171 (n)[5] = &(x)[i+1][j][k+1]; \ 172 (n)[6] = &(x)[i+1][j+1][k+1]; \ 173 (n)[7] = &(x)[i][j+1][k+1]; \ 174 } while (0) 175 176 #define QuadExtract(x,i,j,n) do { \ 177 (n)[0] = (x)[i][j]; \ 178 (n)[1] = (x)[i+1][j]; \ 179 (n)[2] = (x)[i+1][j+1]; \ 180 (n)[3] = (x)[i][j+1]; \ 181 } while (0) 182 183 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[]) 184 { 185 PetscInt i; 186 dz[0] = dz[1] = dz[2] = 0; 187 for (i=0; i<8; i++) { 188 dz[0] += dphi[i][0] * zn[i]; 189 dz[1] += dphi[i][1] * zn[i]; 190 dz[2] += dphi[i][2] * zn[i]; 191 } 192 } 193 194 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw) 195 { 196 const PetscReal jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}; 197 const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}; 198 const PetscReal jdet = jac[0][0]*jac[1][1]*jac[2][2]; 199 PetscInt i; 200 201 for (i=0; i<8; i++) { 202 const PetscReal *dphir = HexQDeriv[q][i]; 203 phi[i] = HexQInterp[q][i]; 204 dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0]; 205 dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1]; 206 dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2]; 207 } 208 *jw = 1.0 * jdet; 209 } 210 211 typedef struct _p_THI *THI; 212 typedef struct _n_Units *Units; 213 214 typedef struct { 215 PetscScalar u,v; 216 } Node; 217 218 typedef struct { 219 PetscScalar b; /* bed */ 220 PetscScalar h; /* thickness */ 221 PetscScalar beta2; /* friction */ 222 } PrmNode; 223 224 typedef struct { 225 PetscReal min,max,cmin,cmax; 226 } PRange; 227 228 typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode; 229 230 struct _p_THI { 231 PETSCHEADER(int); 232 void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p); 233 PetscInt zlevels; 234 PetscReal Lx,Ly,Lz; /* Model domain */ 235 PetscReal alpha; /* Bed angle */ 236 Units units; 237 PetscReal dirichlet_scale; 238 PetscReal ssa_friction_scale; 239 PRange eta; 240 PRange beta2; 241 struct { 242 PetscReal Bd2,eps,exponent; 243 } viscosity; 244 struct { 245 PetscReal irefgam,eps2,exponent,refvel,epsvel; 246 } friction; 247 PetscReal rhog; 248 PetscBool no_slip; 249 PetscBool tridiagonal; 250 PetscBool coarse2d; 251 PetscBool verbose; 252 MatType mattype; 253 }; 254 255 struct _n_Units { 256 /* fundamental */ 257 PetscReal meter; 258 PetscReal kilogram; 259 PetscReal second; 260 /* derived */ 261 PetscReal Pascal; 262 PetscReal year; 263 }; 264 265 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI); 266 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI); 267 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI); 268 269 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[]) 270 { 271 const PetscScalar zm1 = zm-1, 272 znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1, 273 pn[1].b + pn[1].h*(PetscScalar)k/zm1, 274 pn[2].b + pn[2].h*(PetscScalar)k/zm1, 275 pn[3].b + pn[3].h*(PetscScalar)k/zm1, 276 pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1, 277 pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1, 278 pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1, 279 pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1}; 280 PetscInt i; 281 for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]); 282 } 283 284 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */ 285 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p) 286 { 287 Units units = thi->units; 288 PetscReal s = -x*PetscSinReal(thi->alpha); 289 290 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly); 291 p->h = s - p->b; 292 p->beta2 = 1e30; 293 } 294 295 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p) 296 { 297 Units units = thi->units; 298 PetscReal s = -x*PetscSinReal(thi->alpha); 299 300 p->b = s - 1000*units->meter; 301 p->h = s - p->b; 302 /* tau_b = beta2 v is a stress (Pa) */ 303 p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter; 304 } 305 306 /* These are just toys */ 307 308 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */ 309 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 310 { 311 Units units = thi->units; 312 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 313 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 314 p->b = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 315 p->h = s - p->b; 316 p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter; 317 } 318 319 /* Like Z, but with 200 meter cliffs */ 320 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 321 { 322 Units units = thi->units; 323 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 324 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 325 326 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 327 if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter; 328 p->h = s - p->b; 329 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 330 } 331 332 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */ 333 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 334 { 335 Units units = thi->units; 336 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 337 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 338 339 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 340 p->h = s - p->b; 341 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 342 } 343 344 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2) 345 { 346 if (thi->friction.irefgam == 0) { 347 Units units = thi->units; 348 thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year)); 349 thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam; 350 } 351 if (thi->friction.exponent == 0) { 352 *beta2 = rbeta2; 353 *dbeta2 = 0; 354 } else { 355 *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent); 356 *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam; 357 } 358 } 359 360 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta) 361 { 362 PetscReal Bd2,eps,exponent; 363 if (thi->viscosity.Bd2 == 0) { 364 Units units = thi->units; 365 const PetscReal 366 n = 3., /* Glen exponent */ 367 p = 1. + 1./n, /* for Stokes */ 368 A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */ 369 B = PetscPowReal(A,-1./n); /* hardness parameter */ 370 thi->viscosity.Bd2 = B/2; 371 thi->viscosity.exponent = (p-2)/2; 372 thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year); 373 } 374 Bd2 = thi->viscosity.Bd2; 375 exponent = thi->viscosity.exponent; 376 eps = thi->viscosity.eps; 377 *eta = Bd2 * PetscPowReal(eps + gam,exponent); 378 *deta = exponent * (*eta) / (eps + gam); 379 } 380 381 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x) 382 { 383 if (x < *min) *min = x; 384 if (x > *max) *max = x; 385 } 386 387 static void PRangeClear(PRange *p) 388 { 389 p->cmin = p->min = 1e100; 390 p->cmax = p->max = -1e100; 391 } 392 393 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max) 394 { 395 PetscFunctionBeginUser; 396 p->cmin = min; 397 p->cmax = max; 398 if (min < p->min) p->min = min; 399 if (max > p->max) p->max = max; 400 PetscFunctionReturn(0); 401 } 402 403 static PetscErrorCode THIDestroy(THI *thi) 404 { 405 PetscFunctionBeginUser; 406 if (!*thi) PetscFunctionReturn(0); 407 if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; PetscFunctionReturn(0);} 408 PetscCall(PetscFree((*thi)->units)); 409 PetscCall(PetscFree((*thi)->mattype)); 410 PetscCall(PetscHeaderDestroy(thi)); 411 PetscFunctionReturn(0); 412 } 413 414 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi) 415 { 416 static PetscBool registered = PETSC_FALSE; 417 THI thi; 418 Units units; 419 420 PetscFunctionBeginUser; 421 *inthi = 0; 422 if (!registered) { 423 PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID)); 424 registered = PETSC_TRUE; 425 } 426 PetscCall(PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0)); 427 428 PetscCall(PetscNew(&thi->units)); 429 units = thi->units; 430 units->meter = 1e-2; 431 units->second = 1e-7; 432 units->kilogram = 1e-12; 433 434 PetscOptionsBegin(comm,NULL,"Scaled units options",""); 435 { 436 PetscCall(PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL)); 437 PetscCall(PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL)); 438 PetscCall(PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL)); 439 } 440 PetscOptionsEnd(); 441 units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second)); 442 units->year = 31556926. * units->second; /* seconds per year */ 443 444 thi->Lx = 10.e3; 445 thi->Ly = 10.e3; 446 thi->Lz = 1000; 447 thi->dirichlet_scale = 1; 448 thi->verbose = PETSC_FALSE; 449 450 PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options",""); 451 { 452 QuadratureType quad = QUAD_GAUSS; 453 char homexp[] = "A"; 454 char mtype[256] = MATSBAIJ; 455 PetscReal L,m = 1.0; 456 PetscBool flg; 457 L = thi->Lx; 458 PetscCall(PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg)); 459 if (flg) thi->Lx = thi->Ly = L; 460 PetscCall(PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL)); 461 PetscCall(PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL)); 462 PetscCall(PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL)); 463 PetscCall(PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL)); 464 switch (homexp[0] = toupper(homexp[0])) { 465 case 'A': 466 thi->initialize = THIInitialize_HOM_A; 467 thi->no_slip = PETSC_TRUE; 468 thi->alpha = 0.5; 469 break; 470 case 'C': 471 thi->initialize = THIInitialize_HOM_C; 472 thi->no_slip = PETSC_FALSE; 473 thi->alpha = 0.1; 474 break; 475 case 'X': 476 thi->initialize = THIInitialize_HOM_X; 477 thi->no_slip = PETSC_FALSE; 478 thi->alpha = 0.3; 479 break; 480 case 'Y': 481 thi->initialize = THIInitialize_HOM_Y; 482 thi->no_slip = PETSC_FALSE; 483 thi->alpha = 0.5; 484 break; 485 case 'Z': 486 thi->initialize = THIInitialize_HOM_Z; 487 thi->no_slip = PETSC_FALSE; 488 thi->alpha = 0.5; 489 break; 490 default: 491 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]); 492 } 493 PetscCall(PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL)); 494 switch (quad) { 495 case QUAD_GAUSS: 496 HexQInterp = HexQInterp_Gauss; 497 HexQDeriv = HexQDeriv_Gauss; 498 break; 499 case QUAD_LOBATTO: 500 HexQInterp = HexQInterp_Lobatto; 501 HexQDeriv = HexQDeriv_Lobatto; 502 break; 503 } 504 PetscCall(PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL)); 505 506 thi->friction.refvel = 100.; 507 thi->friction.epsvel = 1.; 508 509 PetscCall(PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL)); 510 PetscCall(PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL)); 511 PetscCall(PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL)); 512 513 thi->friction.exponent = (m-1)/2; 514 515 PetscCall(PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL)); 516 PetscCall(PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL)); 517 PetscCall(PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL)); 518 PetscCall(PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL)); 519 PetscCall(PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL)); 520 PetscCall(PetscStrallocpy(mtype,(char**)&thi->mattype)); 521 PetscCall(PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL)); 522 } 523 PetscOptionsEnd(); 524 525 /* dimensionalize */ 526 thi->Lx *= units->meter; 527 thi->Ly *= units->meter; 528 thi->Lz *= units->meter; 529 thi->alpha *= PETSC_PI / 180; 530 531 PRangeClear(&thi->eta); 532 PRangeClear(&thi->beta2); 533 534 { 535 PetscReal u = 1000*units->meter/(3e7*units->second), 536 gradu = u / (100*units->meter),eta,deta, 537 rho = 910 * units->kilogram/PetscPowReal(units->meter,3), 538 grav = 9.81 * units->meter/PetscSqr(units->second), 539 driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter; 540 THIViscosity(thi,0.5*gradu*gradu,&eta,&deta); 541 thi->rhog = rho * grav; 542 if (thi->verbose) { 543 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal)); 544 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving)); 545 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving))); 546 THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta); 547 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving))); 548 } 549 } 550 551 *inthi = thi; 552 PetscFunctionReturn(0); 553 } 554 555 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm) 556 { 557 PrmNode **p; 558 PetscInt i,j,xs,xm,ys,ym,mx,my; 559 560 PetscFunctionBeginUser; 561 PetscCall(DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0)); 562 PetscCall(DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0)); 563 PetscCall(DMDAVecGetArray(da2prm,prm,&p)); 564 for (i=xs; i<xs+xm; i++) { 565 for (j=ys; j<ys+ym; j++) { 566 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my; 567 thi->initialize(thi,xx,yy,&p[i][j]); 568 } 569 } 570 PetscCall(DMDAVecRestoreArray(da2prm,prm,&p)); 571 PetscFunctionReturn(0); 572 } 573 574 static PetscErrorCode THISetUpDM(THI thi,DM dm) 575 { 576 PetscInt refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s; 577 DMDAStencilType st; 578 DM da2prm; 579 Vec X; 580 581 PetscFunctionBeginUser; 582 PetscCall(DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st)); 583 if (dim == 2) { 584 PetscCall(DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st)); 585 } 586 PetscCall(DMGetRefineLevel(dm,&refinelevel)); 587 PetscCall(DMGetCoarsenLevel(dm,&coarsenlevel)); 588 level = refinelevel - coarsenlevel; 589 PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm)); 590 PetscCall(DMSetUp(da2prm)); 591 PetscCall(DMCreateLocalVector(da2prm,&X)); 592 { 593 PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter; 594 if (dim == 2) { 595 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My))); 596 } else { 597 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %" PetscInt_FMT " domain size (m) %8.2g x %8.2g x %8.2g, num elements %" PetscInt_FMT " x %" PetscInt_FMT " x %" PetscInt_FMT " (%" PetscInt_FMT "), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)))); 598 } 599 } 600 PetscCall(THIInitializePrm(thi,da2prm,X)); 601 if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */ 602 PetscCall(DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi)); 603 } 604 if (thi->coarse2d) PetscCall(DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi)); 605 PetscCall(PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm)); 606 PetscCall(PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X)); 607 PetscCall(DMDestroy(&da2prm)); 608 PetscCall(VecDestroy(&X)); 609 PetscFunctionReturn(0); 610 } 611 612 static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx) 613 { 614 THI thi = (THI)ctx; 615 PetscInt rlevel,clevel; 616 617 PetscFunctionBeginUser; 618 PetscCall(THISetUpDM(thi,dmc)); 619 PetscCall(DMGetRefineLevel(dmc,&rlevel)); 620 PetscCall(DMGetCoarsenLevel(dmc,&clevel)); 621 if (rlevel-clevel == 0) PetscCall(DMSetMatType(dmc,MATAIJ)); 622 PetscCall(DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi)); 623 PetscFunctionReturn(0); 624 } 625 626 static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx) 627 { 628 THI thi = (THI)ctx; 629 630 PetscFunctionBeginUser; 631 PetscCall(THISetUpDM(thi,dmf)); 632 PetscCall(DMSetMatType(dmf,thi->mattype)); 633 PetscCall(DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi)); 634 /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */ 635 PetscCall(DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi)); 636 PetscFunctionReturn(0); 637 } 638 639 static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm) 640 { 641 DM da2prm; 642 Vec X; 643 644 PetscFunctionBeginUser; 645 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm)); 646 PetscCheck(da2prm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 647 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X)); 648 PetscCheck(X,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 649 PetscCall(DMDAVecGetArray(da2prm,X,prm)); 650 PetscFunctionReturn(0); 651 } 652 653 static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm) 654 { 655 DM da2prm; 656 Vec X; 657 658 PetscFunctionBeginUser; 659 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm)); 660 PetscCheck(da2prm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 661 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X)); 662 PetscCheck(X,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 663 PetscCall(DMDAVecRestoreArray(da2prm,X,prm)); 664 PetscFunctionReturn(0); 665 } 666 667 static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx) 668 { 669 THI thi; 670 PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my; 671 PetscReal hx,hy; 672 PrmNode **prm; 673 Node ***x; 674 DM da; 675 676 PetscFunctionBeginUser; 677 PetscCall(SNESGetDM(snes,&da)); 678 PetscCall(DMGetApplicationContext(da,&thi)); 679 PetscCall(DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 680 PetscCall(DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm)); 681 PetscCall(DMDAVecGetArray(da,X,&x)); 682 PetscCall(THIDAGetPrm(da,&prm)); 683 hx = thi->Lx / mx; 684 hy = thi->Ly / my; 685 for (i=xs; i<xs+xm; i++) { 686 for (j=ys; j<ys+ym; j++) { 687 for (k=zs; k<zs+zm; k++) { 688 const PetscScalar zm1 = zm-1, 689 drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx), 690 drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy); 691 x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1; 692 x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1; 693 } 694 } 695 } 696 PetscCall(DMDAVecRestoreArray(da,X,&x)); 697 PetscCall(THIDARestorePrm(da,&prm)); 698 PetscFunctionReturn(0); 699 } 700 701 static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta) 702 { 703 PetscInt l,ll; 704 PetscScalar gam; 705 706 du[0] = du[1] = du[2] = 0; 707 dv[0] = dv[1] = dv[2] = 0; 708 *u = 0; 709 *v = 0; 710 for (l=0; l<8; l++) { 711 *u += phi[l] * n[l].u; 712 *v += phi[l] * n[l].v; 713 for (ll=0; ll<3; ll++) { 714 du[ll] += dphi[l][ll] * n[l].u; 715 dv[ll] += dphi[l][ll] * n[l].v; 716 } 717 } 718 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]); 719 THIViscosity(thi,PetscRealPart(gam),eta,deta); 720 } 721 722 static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta) 723 { 724 PetscInt l,ll; 725 PetscScalar gam; 726 727 du[0] = du[1] = 0; 728 dv[0] = dv[1] = 0; 729 *u = 0; 730 *v = 0; 731 for (l=0; l<4; l++) { 732 *u += phi[l] * n[l].u; 733 *v += phi[l] * n[l].v; 734 for (ll=0; ll<2; ll++) { 735 du[ll] += dphi[l][ll] * n[l].u; 736 dv[ll] += dphi[l][ll] * n[l].v; 737 } 738 } 739 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]); 740 THIViscosity(thi,PetscRealPart(gam),eta,deta); 741 } 742 743 static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi) 744 { 745 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l; 746 PetscReal hx,hy,etamin,etamax,beta2min,beta2max; 747 PrmNode **prm; 748 749 PetscFunctionBeginUser; 750 xs = info->zs; 751 ys = info->ys; 752 xm = info->zm; 753 ym = info->ym; 754 zm = info->xm; 755 hx = thi->Lx / info->mz; 756 hy = thi->Ly / info->my; 757 758 etamin = 1e100; 759 etamax = 0; 760 beta2min = 1e100; 761 beta2max = 0; 762 763 PetscCall(THIDAGetPrm(info->da,&prm)); 764 765 for (i=xs; i<xs+xm; i++) { 766 for (j=ys; j<ys+ym; j++) { 767 PrmNode pn[4]; 768 QuadExtract(prm,i,j,pn); 769 for (k=0; k<zm-1; k++) { 770 PetscInt ls = 0; 771 Node n[8],*fn[8]; 772 PetscReal zn[8],etabase = 0; 773 PrmHexGetZ(pn,k,zm,zn); 774 HexExtract(x,i,j,k,n); 775 HexExtractRef(f,i,j,k,fn); 776 if (thi->no_slip && k == 0) { 777 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 778 /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */ 779 ls = 4; 780 } 781 for (q=0; q<8; q++) { 782 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 783 PetscScalar du[3],dv[3],u,v; 784 HexGrad(HexQDeriv[q],zn,dz); 785 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 786 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 787 jw /= thi->rhog; /* scales residuals to be O(1) */ 788 if (q == 0) etabase = eta; 789 RangeUpdate(&etamin,&etamax,eta); 790 for (l=ls; l<8; l++) { /* test functions */ 791 const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0}; 792 const PetscReal pp = phi[l],*dp = dphi[l]; 793 fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0]; 794 fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1]; 795 } 796 } 797 if (k == 0) { /* we are on a bottom face */ 798 if (thi->no_slip) { 799 /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary 800 * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature 801 * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the 802 * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in 803 * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after 804 * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the 805 * assembled matrix (see the similar block in THIJacobianLocal). 806 * 807 * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends 808 * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make 809 * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part, 810 * so the solution will exactly satisfy the boundary condition after the first linear iteration. 811 */ 812 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.); 813 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 814 fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u; 815 fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v; 816 } else { /* Integrate over bottom face to apply boundary condition */ 817 for (q=0; q<4; q++) { 818 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 819 PetscScalar u =0,v=0,rbeta2=0; 820 PetscReal beta2,dbeta2; 821 for (l=0; l<4; l++) { 822 u += phi[l]*n[l].u; 823 v += phi[l]*n[l].v; 824 rbeta2 += phi[l]*pn[l].beta2; 825 } 826 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 827 RangeUpdate(&beta2min,&beta2max,beta2); 828 for (l=0; l<4; l++) { 829 const PetscReal pp = phi[l]; 830 fn[ls+l]->u += pp*jw*beta2*u; 831 fn[ls+l]->v += pp*jw*beta2*v; 832 } 833 } 834 } 835 } 836 } 837 } 838 } 839 840 PetscCall(THIDARestorePrm(info->da,&prm)); 841 842 PetscCall(PRangeMinMax(&thi->eta,etamin,etamax)); 843 PetscCall(PRangeMinMax(&thi->beta2,beta2min,beta2max)); 844 PetscFunctionReturn(0); 845 } 846 847 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer) 848 { 849 PetscReal nrm; 850 PetscInt m; 851 PetscMPIInt rank; 852 853 PetscFunctionBeginUser; 854 PetscCall(MatNorm(B,NORM_FROBENIUS,&nrm)); 855 PetscCall(MatGetSize(B,&m,0)); 856 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank)); 857 if (rank == 0) { 858 PetscScalar val0,val2; 859 PetscCall(MatGetValue(B,0,0,&val0)); 860 PetscCall(MatGetValue(B,2,2,&val2)); 861 PetscCall(PetscViewerASCIIPrintf(viewer,"Matrix dim %" PetscInt_FMT " norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax)); 862 } 863 PetscFunctionReturn(0); 864 } 865 866 static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean) 867 { 868 Node ***x; 869 PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz; 870 PetscReal umin = 1e100,umax=-1e100; 871 PetscScalar usum = 0.0,gusum; 872 873 PetscFunctionBeginUser; 874 *min = *max = *mean = 0; 875 PetscCall(DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 876 PetscCall(DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm)); 877 PetscCheck(zs == 0 && zm == mz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected decomposition"); 878 PetscCall(DMDAVecGetArray(da,X,&x)); 879 for (i=xs; i<xs+xm; i++) { 880 for (j=ys; j<ys+ym; j++) { 881 PetscReal u = PetscRealPart(x[i][j][zm-1].u); 882 RangeUpdate(&umin,&umax,u); 883 usum += u; 884 } 885 } 886 PetscCall(DMDAVecRestoreArray(da,X,&x)); 887 PetscCallMPI(MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da))); 888 PetscCallMPI(MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da))); 889 PetscCallMPI(MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da))); 890 *mean = PetscRealPart(gusum) / (mx*my); 891 PetscFunctionReturn(0); 892 } 893 894 static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[]) 895 { 896 MPI_Comm comm; 897 Vec X; 898 DM dm; 899 900 PetscFunctionBeginUser; 901 PetscCall(PetscObjectGetComm((PetscObject)thi,&comm)); 902 PetscCall(SNESGetSolution(snes,&X)); 903 PetscCall(SNESGetDM(snes,&dm)); 904 PetscCall(PetscPrintf(comm,"Solution statistics after solve: %s\n",name)); 905 { 906 PetscInt its,lits; 907 SNESConvergedReason reason; 908 PetscCall(SNESGetIterationNumber(snes,&its)); 909 PetscCall(SNESGetConvergedReason(snes,&reason)); 910 PetscCall(SNESGetLinearSolveIterations(snes,&lits)); 911 PetscCall(PetscPrintf(comm,"%s: Number of SNES iterations = %" PetscInt_FMT ", total linear iterations = %" PetscInt_FMT "\n",SNESConvergedReasons[reason],its,lits)); 912 } 913 { 914 PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3]; 915 PetscInt i,j,m; 916 const PetscScalar *x; 917 PetscCall(VecNorm(X,NORM_2,&nrm2)); 918 PetscCall(VecGetLocalSize(X,&m)); 919 PetscCall(VecGetArrayRead(X,&x)); 920 for (i=0; i<m; i+=2) { 921 PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v); 922 tmin[0] = PetscMin(u,tmin[0]); 923 tmin[1] = PetscMin(v,tmin[1]); 924 tmin[2] = PetscMin(c,tmin[2]); 925 tmax[0] = PetscMax(u,tmax[0]); 926 tmax[1] = PetscMax(v,tmax[1]); 927 tmax[2] = PetscMax(c,tmax[2]); 928 } 929 PetscCall(VecRestoreArrayRead(X,&x)); 930 PetscCallMPI(MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi))); 931 PetscCallMPI(MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi))); 932 /* Dimensionalize to meters/year */ 933 nrm2 *= thi->units->year / thi->units->meter; 934 for (j=0; j<3; j++) { 935 min[j] *= thi->units->year / thi->units->meter; 936 max[j] *= thi->units->year / thi->units->meter; 937 } 938 if (min[0] == 0.0) min[0] = 0.0; 939 PetscCall(PetscPrintf(comm,"|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2])); 940 { 941 PetscReal umin,umax,umean; 942 PetscCall(THISurfaceStatistics(dm,X,&umin,&umax,&umean)); 943 umin *= thi->units->year / thi->units->meter; 944 umax *= thi->units->year / thi->units->meter; 945 umean *= thi->units->year / thi->units->meter; 946 PetscCall(PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean)); 947 } 948 /* These values stay nondimensional */ 949 PetscCall(PetscPrintf(comm,"Global eta range %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax)); 950 PetscCall(PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax)); 951 } 952 PetscFunctionReturn(0); 953 } 954 955 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi) 956 { 957 PetscInt xs,ys,xm,ym,i,j,q,l,ll; 958 PetscReal hx,hy; 959 PrmNode **prm; 960 961 PetscFunctionBeginUser; 962 xs = info->ys; 963 ys = info->xs; 964 xm = info->ym; 965 ym = info->xm; 966 hx = thi->Lx / info->my; 967 hy = thi->Ly / info->mx; 968 969 PetscCall(MatZeroEntries(B)); 970 PetscCall(THIDAGetPrm(info->da,&prm)); 971 972 for (i=xs; i<xs+xm; i++) { 973 for (j=ys; j<ys+ym; j++) { 974 Node n[4]; 975 PrmNode pn[4]; 976 PetscScalar Ke[4*2][4*2]; 977 QuadExtract(prm,i,j,pn); 978 QuadExtract(x,i,j,n); 979 PetscCall(PetscMemzero(Ke,sizeof(Ke))); 980 for (q=0; q<4; q++) { 981 PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2; 982 PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0; 983 for (l=0; l<4; l++) { 984 phi[l] = QuadQInterp[q][l]; 985 dphi[l][0] = QuadQDeriv[q][l][0]*2./hx; 986 dphi[l][1] = QuadQDeriv[q][l][1]*2./hy; 987 h += phi[l] * pn[l].h; 988 rbeta2 += phi[l] * pn[l].beta2; 989 } 990 jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */ 991 PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 992 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 993 for (l=0; l<4; l++) { 994 const PetscReal pp = phi[l],*dp = dphi[l]; 995 for (ll=0; ll<4; ll++) { 996 const PetscReal ppl = phi[ll],*dpl = dphi[ll]; 997 PetscScalar dgdu,dgdv; 998 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1]; 999 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0]; 1000 /* Picard part */ 1001 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1002 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1003 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1004 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1005 /* extra Newton terms */ 1006 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale; 1007 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale; 1008 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale; 1009 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale; 1010 } 1011 } 1012 } 1013 { 1014 const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}}; 1015 PetscCall(MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES)); 1016 } 1017 } 1018 } 1019 PetscCall(THIDARestorePrm(info->da,&prm)); 1020 1021 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1022 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1023 PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 1024 if (thi->verbose) PetscCall(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD)); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode) 1029 { 1030 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll; 1031 PetscReal hx,hy; 1032 PrmNode **prm; 1033 1034 PetscFunctionBeginUser; 1035 xs = info->zs; 1036 ys = info->ys; 1037 xm = info->zm; 1038 ym = info->ym; 1039 zm = info->xm; 1040 hx = thi->Lx / info->mz; 1041 hy = thi->Ly / info->my; 1042 1043 PetscCall(MatZeroEntries(B)); 1044 PetscCall(MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE)); 1045 PetscCall(THIDAGetPrm(info->da,&prm)); 1046 1047 for (i=xs; i<xs+xm; i++) { 1048 for (j=ys; j<ys+ym; j++) { 1049 PrmNode pn[4]; 1050 QuadExtract(prm,i,j,pn); 1051 for (k=0; k<zm-1; k++) { 1052 Node n[8]; 1053 PetscReal zn[8],etabase = 0; 1054 PetscScalar Ke[8*2][8*2]; 1055 PetscInt ls = 0; 1056 1057 PrmHexGetZ(pn,k,zm,zn); 1058 HexExtract(x,i,j,k,n); 1059 PetscCall(PetscMemzero(Ke,sizeof(Ke))); 1060 if (thi->no_slip && k == 0) { 1061 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 1062 ls = 4; 1063 } 1064 for (q=0; q<8; q++) { 1065 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 1066 PetscScalar du[3],dv[3],u,v; 1067 HexGrad(HexQDeriv[q],zn,dz); 1068 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 1069 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 1070 jw /= thi->rhog; /* residuals are scaled by this factor */ 1071 if (q == 0) etabase = eta; 1072 for (l=ls; l<8; l++) { /* test functions */ 1073 const PetscReal *PETSC_RESTRICT dp = dphi[l]; 1074 #if USE_SSE2_KERNELS 1075 /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */ 1076 __m128d 1077 p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5), 1078 p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)), 1079 du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]), 1080 dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]), 1081 jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta), 1082 dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]), 1083 dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta), 1084 p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */ 1085 p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */ 1086 pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */ 1087 du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */ 1088 t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */ 1089 t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */ 1090 1091 #endif 1092 #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */ 1093 for (ll=ls; ll<8; ll++) { /* trial functions */ 1094 #else 1095 for (ll=l; ll<8; ll++) { 1096 #endif 1097 const PetscReal *PETSC_RESTRICT dpl = dphi[ll]; 1098 if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */ 1099 #if !USE_SSE2_KERNELS 1100 /* The analytic Jacobian in nice, easy-to-read form */ 1101 { 1102 PetscScalar dgdu,dgdv; 1103 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2]; 1104 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2]; 1105 /* Picard part */ 1106 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2]; 1107 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1108 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1109 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2]; 1110 /* extra Newton terms */ 1111 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2]; 1112 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2]; 1113 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2]; 1114 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2]; 1115 } 1116 #else 1117 /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed 1118 * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost 1119 * by 25 to 30 percent. */ 1120 { 1121 __m128d 1122 keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]), 1123 kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]), 1124 dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]), 1125 t0,t3,pdgduv; 1126 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01), 1127 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10), 1128 _mm_mul_pd(dp2jweta,dpl2)))); 1129 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01), 1130 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10), 1131 _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1)))))); 1132 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)), 1133 _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))), 1134 _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10), 1135 _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */ 1136 t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */ 1137 t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */ 1138 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0), 1139 _mm_add_pd(_mm_mul_pd(dp1,t3), 1140 _mm_mul_pd(t0,_mm_mul_pd(dp2,du2)))))); 1141 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0), 1142 _mm_add_pd(_mm_mul_pd(dp0,t3), 1143 _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2)))))); 1144 } 1145 #endif 1146 } 1147 } 1148 } 1149 if (k == 0) { /* on a bottom face */ 1150 if (thi->no_slip) { 1151 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1); 1152 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 1153 Ke[0][0] = thi->dirichlet_scale*diagu; 1154 Ke[1][1] = thi->dirichlet_scale*diagv; 1155 } else { 1156 for (q=0; q<4; q++) { 1157 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 1158 PetscScalar u =0,v=0,rbeta2=0; 1159 PetscReal beta2,dbeta2; 1160 for (l=0; l<4; l++) { 1161 u += phi[l]*n[l].u; 1162 v += phi[l]*n[l].v; 1163 rbeta2 += phi[l]*pn[l].beta2; 1164 } 1165 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 1166 for (l=0; l<4; l++) { 1167 const PetscReal pp = phi[l]; 1168 for (ll=0; ll<4; ll++) { 1169 const PetscReal ppl = phi[ll]; 1170 Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl; 1171 Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl; 1172 Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl; 1173 Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl; 1174 } 1175 } 1176 } 1177 } 1178 } 1179 { 1180 const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}}; 1181 if (amode == THIASSEMBLY_TRIDIAGONAL) { 1182 for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */ 1183 const PetscInt l4 = l+4; 1184 const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}}; 1185 #if defined COMPUTE_LOWER_TRIANGULAR 1186 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1187 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1188 {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1189 {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1190 #else 1191 /* Same as above except for the lower-left block */ 1192 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1193 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1194 {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1195 {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1196 #endif 1197 PetscCall(MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES)); 1198 } 1199 } else { 1200 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */ 1201 for (l=0; l<8; l++) { 1202 for (ll=l+1; ll<8; ll++) { 1203 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0]; 1204 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1]; 1205 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0]; 1206 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1]; 1207 } 1208 } 1209 #endif 1210 PetscCall(MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES)); 1211 } 1212 } 1213 } 1214 } 1215 } 1216 PetscCall(THIDARestorePrm(info->da,&prm)); 1217 1218 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1219 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1220 PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 1221 if (thi->verbose) PetscCall(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD)); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1226 { 1227 PetscFunctionBeginUser; 1228 PetscCall(THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL)); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1233 { 1234 PetscFunctionBeginUser; 1235 PetscCall(THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL)); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[]) 1240 { 1241 THI thi; 1242 PetscInt dim,M,N,m,n,s,dof; 1243 DM dac,daf; 1244 DMDAStencilType st; 1245 DM_DA *ddf,*ddc; 1246 1247 PetscFunctionBeginUser; 1248 PetscCall(PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi)); 1249 PetscCheck(thi,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance"); 1250 if (nlevels > 1) { 1251 PetscCall(DMRefineHierarchy(dac0,nlevels-1,hierarchy)); 1252 dac = hierarchy[nlevels-2]; 1253 } else { 1254 dac = dac0; 1255 } 1256 PetscCall(DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st)); 1257 PetscCheck(dim == 2,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs"); 1258 1259 /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */ 1260 PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf)); 1261 PetscCall(DMSetUp(daf)); 1262 1263 daf->ops->creatematrix = dac->ops->creatematrix; 1264 daf->ops->createinterpolation = dac->ops->createinterpolation; 1265 daf->ops->getcoloring = dac->ops->getcoloring; 1266 ddf = (DM_DA*)daf->data; 1267 ddc = (DM_DA*)dac->data; 1268 ddf->interptype = ddc->interptype; 1269 1270 PetscCall(DMDASetFieldName(daf,0,"x-velocity")); 1271 PetscCall(DMDASetFieldName(daf,1,"y-velocity")); 1272 1273 hierarchy[nlevels-1] = daf; 1274 PetscFunctionReturn(0); 1275 } 1276 1277 static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale) 1278 { 1279 PetscInt dim; 1280 1281 PetscFunctionBeginUser; 1282 PetscValidHeaderSpecific(dac,DM_CLASSID,1); 1283 PetscValidHeaderSpecific(daf,DM_CLASSID,2); 1284 PetscValidPointer(A,3); 1285 if (scale) PetscValidPointer(scale,4); 1286 PetscCall(DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0)); 1287 if (dim == 2) { 1288 /* We are in the 2D problem and use normal DMDA interpolation */ 1289 PetscCall(DMCreateInterpolation(dac,daf,A,scale)); 1290 } else { 1291 PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart; 1292 Mat B; 1293 1294 PetscCall(DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 1295 PetscCall(DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm)); 1296 PetscCheck(!zs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"unexpected"); 1297 PetscCall(MatCreate(PetscObjectComm((PetscObject)daf),&B)); 1298 PetscCall(MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my)); 1299 1300 PetscCall(MatSetType(B,MATAIJ)); 1301 PetscCall(MatSeqAIJSetPreallocation(B,1,NULL)); 1302 PetscCall(MatMPIAIJSetPreallocation(B,1,NULL,0,NULL)); 1303 PetscCall(MatGetOwnershipRange(B,&rstart,NULL)); 1304 PetscCall(MatGetOwnershipRangeColumn(B,&cstart,NULL)); 1305 for (i=xs; i<xs+xm; i++) { 1306 for (j=ys; j<ys+ym; j++) { 1307 for (k=zs; k<zs+zm; k++) { 1308 PetscInt i2 = i*ym+j,i3 = i2*zm+k; 1309 PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */ 1310 PetscCall(MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES)); 1311 } 1312 } 1313 } 1314 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1315 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1316 PetscCall(MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A)); 1317 PetscCall(MatDestroy(&B)); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J) 1323 { 1324 Mat A; 1325 PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3]; 1326 ISLocalToGlobalMapping ltog; 1327 1328 PetscFunctionBeginUser; 1329 PetscCall(DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0)); 1330 PetscCheck(dim == 3,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D"); 1331 PetscCall(DMDAGetCorners(da,0,0,0,&zm,&ym,&xm)); 1332 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1333 PetscCall(MatCreate(PetscObjectComm((PetscObject)da),&A)); 1334 PetscCall(MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE)); 1335 PetscCall(MatSetType(A,da->mattype)); 1336 PetscCall(MatSetFromOptions(A)); 1337 PetscCall(MatSeqAIJSetPreallocation(A,3*2,NULL)); 1338 PetscCall(MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL)); 1339 PetscCall(MatSeqBAIJSetPreallocation(A,2,3,NULL)); 1340 PetscCall(MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL)); 1341 PetscCall(MatSeqSBAIJSetPreallocation(A,2,2,NULL)); 1342 PetscCall(MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL)); 1343 PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog)); 1344 PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2])); 1345 PetscCall(MatSetStencil(A,dim,dims,starts,dof)); 1346 *J = A; 1347 PetscFunctionReturn(0); 1348 } 1349 1350 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[]) 1351 { 1352 const PetscInt dof = 2; 1353 Units units = thi->units; 1354 MPI_Comm comm; 1355 PetscViewer viewer; 1356 PetscMPIInt rank,size,tag,nn,nmax; 1357 PetscInt mx,my,mz,r,range[6]; 1358 const PetscScalar *x; 1359 1360 PetscFunctionBeginUser; 1361 PetscCall(PetscObjectGetComm((PetscObject)thi,&comm)); 1362 PetscCall(DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 1363 PetscCallMPI(MPI_Comm_size(comm,&size)); 1364 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1365 PetscCall(PetscViewerASCIIOpen(comm,filename,&viewer)); 1366 PetscCall(PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n")); 1367 PetscCall(PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %" PetscInt_FMT " %d %" PetscInt_FMT " %d %" PetscInt_FMT "\">\n",0,mz-1,0,my-1,0,mx-1)); 1368 1369 PetscCall(DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5)); 1370 PetscCall(PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn)); 1371 PetscCallMPI(MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm)); 1372 tag = ((PetscObject) viewer)->tag; 1373 PetscCall(VecGetArrayRead(X,&x)); 1374 if (rank == 0) { 1375 PetscScalar *array; 1376 PetscCall(PetscMalloc1(nmax,&array)); 1377 for (r=0; r<size; r++) { 1378 PetscInt i,j,k,xs,xm,ys,ym,zs,zm; 1379 const PetscScalar *ptr; 1380 MPI_Status status; 1381 if (r) { 1382 PetscCallMPI(MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE)); 1383 } 1384 zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5]; 1385 PetscCheck(xm*ym*zm*dof <= nmax,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1386 if (r) { 1387 PetscCallMPI(MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status)); 1388 PetscCallMPI(MPI_Get_count(&status,MPIU_SCALAR,&nn)); 1389 PetscCheck(nn == xm*ym*zm*dof,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1390 ptr = array; 1391 } else ptr = x; 1392 PetscCall(PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT "\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1)); 1393 1394 PetscCall(PetscViewerASCIIPrintf(viewer," <Points>\n")); 1395 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1396 for (i=xs; i<xs+xm; i++) { 1397 for (j=ys; j<ys+ym; j++) { 1398 for (k=zs; k<zs+zm; k++) { 1399 PrmNode p; 1400 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz; 1401 thi->initialize(thi,xx,yy,&p); 1402 zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1); 1403 PetscCall(PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz)); 1404 } 1405 } 1406 } 1407 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1408 PetscCall(PetscViewerASCIIPrintf(viewer," </Points>\n")); 1409 1410 PetscCall(PetscViewerASCIIPrintf(viewer," <PointData>\n")); 1411 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1412 for (i=0; i<nn; i+=dof) { 1413 PetscCall(PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0)); 1414 } 1415 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1416 1417 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n")); 1418 for (i=0; i<nn; i+=dof) { 1419 PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT "\n",r)); 1420 } 1421 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1422 PetscCall(PetscViewerASCIIPrintf(viewer," </PointData>\n")); 1423 1424 PetscCall(PetscViewerASCIIPrintf(viewer," </Piece>\n")); 1425 } 1426 PetscCall(PetscFree(array)); 1427 } else { 1428 PetscCallMPI(MPI_Send(range,6,MPIU_INT,0,tag,comm)); 1429 PetscCallMPI(MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm)); 1430 } 1431 PetscCall(VecRestoreArrayRead(X,&x)); 1432 PetscCall(PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n")); 1433 PetscCall(PetscViewerASCIIPrintf(viewer,"</VTKFile>\n")); 1434 PetscCall(PetscViewerDestroy(&viewer)); 1435 PetscFunctionReturn(0); 1436 } 1437 1438 int main(int argc,char *argv[]) 1439 { 1440 MPI_Comm comm; 1441 THI thi; 1442 DM da; 1443 SNES snes; 1444 1445 PetscCall(PetscInitialize(&argc,&argv,0,help)); 1446 comm = PETSC_COMM_WORLD; 1447 1448 PetscCall(THICreate(comm,&thi)); 1449 { 1450 PetscInt M = 3,N = 3,P = 2; 1451 PetscOptionsBegin(comm,NULL,"Grid resolution options",""); 1452 { 1453 PetscCall(PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL)); 1454 N = M; 1455 PetscCall(PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL)); 1456 if (thi->coarse2d) { 1457 PetscCall(PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL)); 1458 } else { 1459 PetscCall(PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL)); 1460 } 1461 } 1462 PetscOptionsEnd(); 1463 if (thi->coarse2d) { 1464 PetscCall(DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da)); 1465 PetscCall(DMSetFromOptions(da)); 1466 PetscCall(DMSetUp(da)); 1467 da->ops->refinehierarchy = DMRefineHierarchy_THI; 1468 da->ops->createinterpolation = DMCreateInterpolation_DA_THI; 1469 1470 PetscCall(PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi)); 1471 } else { 1472 PetscCall(DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da)); 1473 PetscCall(DMSetFromOptions(da)); 1474 PetscCall(DMSetUp(da)); 1475 } 1476 PetscCall(DMDASetFieldName(da,0,"x-velocity")); 1477 PetscCall(DMDASetFieldName(da,1,"y-velocity")); 1478 } 1479 PetscCall(THISetUpDM(thi,da)); 1480 if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal; 1481 1482 { /* Set the fine level matrix type if -da_refine */ 1483 PetscInt rlevel,clevel; 1484 PetscCall(DMGetRefineLevel(da,&rlevel)); 1485 PetscCall(DMGetCoarsenLevel(da,&clevel)); 1486 if (rlevel - clevel > 0) PetscCall(DMSetMatType(da,thi->mattype)); 1487 } 1488 1489 PetscCall(DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi)); 1490 if (thi->tridiagonal) { 1491 PetscCall(DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi)); 1492 } else { 1493 PetscCall(DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi)); 1494 } 1495 PetscCall(DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi)); 1496 PetscCall(DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi)); 1497 1498 PetscCall(DMSetApplicationContext(da,thi)); 1499 1500 PetscCall(SNESCreate(comm,&snes)); 1501 PetscCall(SNESSetDM(snes,da)); 1502 PetscCall(DMDestroy(&da)); 1503 PetscCall(SNESSetComputeInitialGuess(snes,THIInitial,NULL)); 1504 PetscCall(SNESSetFromOptions(snes)); 1505 1506 PetscCall(SNESSolve(snes,NULL,NULL)); 1507 1508 PetscCall(THISolveStatistics(thi,snes,0,"Full")); 1509 1510 { 1511 PetscBool flg; 1512 char filename[PETSC_MAX_PATH_LEN] = ""; 1513 PetscCall(PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg)); 1514 if (flg) { 1515 Vec X; 1516 DM dm; 1517 PetscCall(SNESGetSolution(snes,&X)); 1518 PetscCall(SNESGetDM(snes,&dm)); 1519 PetscCall(THIDAVecView_VTK_XML(thi,dm,X,filename)); 1520 } 1521 } 1522 1523 PetscCall(DMDestroy(&da)); 1524 PetscCall(SNESDestroy(&snes)); 1525 PetscCall(THIDestroy(&thi)); 1526 PetscCall(PetscFinalize()); 1527 return 0; 1528 } 1529 1530 /*TEST 1531 1532 build: 1533 requires: !single 1534 1535 test: 1536 args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc 1537 1538 test: 1539 suffix: 2 1540 nsize: 2 1541 args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1 1542 1543 test: 1544 suffix: 3 1545 nsize: 3 1546 args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current 1547 1548 test: 1549 suffix: 4 1550 nsize: 6 1551 args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1 1552 1553 test: 1554 suffix: 5 1555 nsize: 6 1556 args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}} 1557 1558 TEST*/ 1559