1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\ 2 \n\ 3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\ 4 using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\ 5 to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\ 6 boundary conditions in the x- and y-directions.\n\ 7 \n\ 8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\ 9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\ 10 \n\ 11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\ 12 \n\n"; 13 14 /* 15 The equations for horizontal velocity (u,v) are 16 17 - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0 18 - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0 19 20 where 21 22 eta = B/2 (epsilon + gamma)^((p-2)/2) 23 24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B, 25 written in terms of the second invariant 26 27 gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2 28 29 The surface boundary conditions are the natural conditions. The basal boundary conditions 30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2. 31 32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1). 33 34 The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the 35 map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through 36 the Jacobian of the coordinate transformation from a reference element to the physical element. 37 38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed 39 specially so that columns are never distributed, and are always contiguous in memory. 40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation, 41 and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to 42 use compatible domain decomposition relative to the 3D DMDAs. 43 44 There are two compile-time options: 45 46 NO_SSE2: 47 If the host supports SSE2, we use integration code that has been vectorized with SSE2 48 intrinsics, unless this macro is defined. The intrinsics speed up integration by about 49 30% on my architecture (P8700, gcc-4.5 snapshot). 50 51 COMPUTE_LOWER_TRIANGULAR: 52 The element matrices we assemble are lower-triangular so it is not necessary to compute 53 all entries explicitly. If this macro is defined, the lower-triangular entries are 54 computed explicitly. 55 56 */ 57 58 #if defined(PETSC_APPLE_FRAMEWORK) 59 #import <PETSc/petscsnes.h> 60 #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 61 #else 62 63 #include <petscsnes.h> 64 #include <petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 65 #endif 66 #include <ctype.h> /* toupper() */ 67 68 #if defined(__cplusplus) || defined (PETSC_HAVE_WINDOWS_COMPILERS) || defined (__PGI) 69 /* c++ cannot handle [_restrict_] notation like C does */ 70 #undef PETSC_RESTRICT 71 #define PETSC_RESTRICT 72 #endif 73 74 #if defined __SSE2__ 75 # include <emmintrin.h> 76 #endif 77 78 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */ 79 #if !defined NO_SSE2 \ 80 && !defined PETSC_USE_COMPLEX \ 81 && !defined PETSC_USE_REAL_SINGLE \ 82 && !defined PETSC_USE_REAL___FLOAT128 \ 83 && !defined PETSC_USE_REAL___FP16 \ 84 && defined __SSE2__ 85 #define USE_SSE2_KERNELS 1 86 #else 87 #define USE_SSE2_KERNELS 0 88 #endif 89 90 static PetscClassId THI_CLASSID; 91 92 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType; 93 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0}; 94 PETSC_UNUSED static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1}; 95 PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573}; 96 #define G 0.57735026918962573 97 #define H (0.5*(1.+G)) 98 #define L (0.5*(1.-G)) 99 #define M (-0.5) 100 #define P (0.5) 101 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */ 102 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0}, 103 {0,H,0,0,0,L,0,0}, 104 {0,0,H,0,0,0,L,0}, 105 {0,0,0,H,0,0,0,L}, 106 {L,0,0,0,H,0,0,0}, 107 {0,L,0,0,0,H,0,0}, 108 {0,0,L,0,0,0,H,0}, 109 {0,0,0,L,0,0,0,H}}; 110 static const PetscReal HexQDeriv_Lobatto[8][8][3] = { 111 {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} }, 112 {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} }, 113 {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} }, 114 {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}}, 115 {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} }, 116 {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} }, 117 {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} }, 118 {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}}; 119 /* Stanndard Gauss */ 120 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L}, 121 {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L}, 122 {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L}, 123 {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L}, 124 {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H}, 125 {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H}, 126 {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H}, 127 {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}}; 128 static const PetscReal HexQDeriv_Gauss[8][8][3] = { 129 {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}}, 130 {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}}, 131 {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}}, 132 {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}}, 133 {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}}, 134 {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}}, 135 {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}}, 136 {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}}; 137 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3]; 138 /* Standard 2x2 Gauss quadrature for the bottom layer. */ 139 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L}, 140 {L*H,H*H,H*L,L*L}, 141 {L*L,H*L,H*H,L*H}, 142 {H*L,L*L,L*H,H*H}}; 143 static const PetscReal QuadQDeriv[4][4][2] = { 144 {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}}, 145 {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}}, 146 {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}}, 147 {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}}; 148 #undef G 149 #undef H 150 #undef L 151 #undef M 152 #undef P 153 154 #define HexExtract(x,i,j,k,n) do { \ 155 (n)[0] = (x)[i][j][k]; \ 156 (n)[1] = (x)[i+1][j][k]; \ 157 (n)[2] = (x)[i+1][j+1][k]; \ 158 (n)[3] = (x)[i][j+1][k]; \ 159 (n)[4] = (x)[i][j][k+1]; \ 160 (n)[5] = (x)[i+1][j][k+1]; \ 161 (n)[6] = (x)[i+1][j+1][k+1]; \ 162 (n)[7] = (x)[i][j+1][k+1]; \ 163 } while (0) 164 165 #define HexExtractRef(x,i,j,k,n) do { \ 166 (n)[0] = &(x)[i][j][k]; \ 167 (n)[1] = &(x)[i+1][j][k]; \ 168 (n)[2] = &(x)[i+1][j+1][k]; \ 169 (n)[3] = &(x)[i][j+1][k]; \ 170 (n)[4] = &(x)[i][j][k+1]; \ 171 (n)[5] = &(x)[i+1][j][k+1]; \ 172 (n)[6] = &(x)[i+1][j+1][k+1]; \ 173 (n)[7] = &(x)[i][j+1][k+1]; \ 174 } while (0) 175 176 #define QuadExtract(x,i,j,n) do { \ 177 (n)[0] = (x)[i][j]; \ 178 (n)[1] = (x)[i+1][j]; \ 179 (n)[2] = (x)[i+1][j+1]; \ 180 (n)[3] = (x)[i][j+1]; \ 181 } while (0) 182 183 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[]) 184 { 185 PetscInt i; 186 dz[0] = dz[1] = dz[2] = 0; 187 for (i=0; i<8; i++) { 188 dz[0] += dphi[i][0] * zn[i]; 189 dz[1] += dphi[i][1] * zn[i]; 190 dz[2] += dphi[i][2] * zn[i]; 191 } 192 } 193 194 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw) 195 { 196 const PetscReal jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}; 197 const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}; 198 const PetscReal jdet = jac[0][0]*jac[1][1]*jac[2][2]; 199 PetscInt i; 200 201 for (i=0; i<8; i++) { 202 const PetscReal *dphir = HexQDeriv[q][i]; 203 phi[i] = HexQInterp[q][i]; 204 dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0]; 205 dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1]; 206 dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2]; 207 } 208 *jw = 1.0 * jdet; 209 } 210 211 typedef struct _p_THI *THI; 212 typedef struct _n_Units *Units; 213 214 typedef struct { 215 PetscScalar u,v; 216 } Node; 217 218 typedef struct { 219 PetscScalar b; /* bed */ 220 PetscScalar h; /* thickness */ 221 PetscScalar beta2; /* friction */ 222 } PrmNode; 223 224 typedef struct { 225 PetscReal min,max,cmin,cmax; 226 } PRange; 227 228 typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode; 229 230 struct _p_THI { 231 PETSCHEADER(int); 232 void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p); 233 PetscInt zlevels; 234 PetscReal Lx,Ly,Lz; /* Model domain */ 235 PetscReal alpha; /* Bed angle */ 236 Units units; 237 PetscReal dirichlet_scale; 238 PetscReal ssa_friction_scale; 239 PRange eta; 240 PRange beta2; 241 struct { 242 PetscReal Bd2,eps,exponent; 243 } viscosity; 244 struct { 245 PetscReal irefgam,eps2,exponent,refvel,epsvel; 246 } friction; 247 PetscReal rhog; 248 PetscBool no_slip; 249 PetscBool tridiagonal; 250 PetscBool coarse2d; 251 PetscBool verbose; 252 MatType mattype; 253 }; 254 255 struct _n_Units { 256 /* fundamental */ 257 PetscReal meter; 258 PetscReal kilogram; 259 PetscReal second; 260 /* derived */ 261 PetscReal Pascal; 262 PetscReal year; 263 }; 264 265 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI); 266 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI); 267 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI); 268 269 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[]) 270 { 271 const PetscScalar zm1 = zm-1, 272 znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1, 273 pn[1].b + pn[1].h*(PetscScalar)k/zm1, 274 pn[2].b + pn[2].h*(PetscScalar)k/zm1, 275 pn[3].b + pn[3].h*(PetscScalar)k/zm1, 276 pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1, 277 pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1, 278 pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1, 279 pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1}; 280 PetscInt i; 281 for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]); 282 } 283 284 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */ 285 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p) 286 { 287 Units units = thi->units; 288 PetscReal s = -x*PetscSinReal(thi->alpha); 289 290 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly); 291 p->h = s - p->b; 292 p->beta2 = 1e30; 293 } 294 295 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p) 296 { 297 Units units = thi->units; 298 PetscReal s = -x*PetscSinReal(thi->alpha); 299 300 p->b = s - 1000*units->meter; 301 p->h = s - p->b; 302 /* tau_b = beta2 v is a stress (Pa) */ 303 p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter; 304 } 305 306 /* These are just toys */ 307 308 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */ 309 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 310 { 311 Units units = thi->units; 312 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 313 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 314 p->b = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 315 p->h = s - p->b; 316 p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter; 317 } 318 319 /* Like Z, but with 200 meter cliffs */ 320 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 321 { 322 Units units = thi->units; 323 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 324 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 325 326 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 327 if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter; 328 p->h = s - p->b; 329 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 330 } 331 332 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */ 333 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 334 { 335 Units units = thi->units; 336 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 337 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 338 339 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 340 p->h = s - p->b; 341 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 342 } 343 344 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2) 345 { 346 if (thi->friction.irefgam == 0) { 347 Units units = thi->units; 348 thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year)); 349 thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam; 350 } 351 if (thi->friction.exponent == 0) { 352 *beta2 = rbeta2; 353 *dbeta2 = 0; 354 } else { 355 *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent); 356 *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam; 357 } 358 } 359 360 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta) 361 { 362 PetscReal Bd2,eps,exponent; 363 if (thi->viscosity.Bd2 == 0) { 364 Units units = thi->units; 365 const PetscReal 366 n = 3., /* Glen exponent */ 367 p = 1. + 1./n, /* for Stokes */ 368 A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */ 369 B = PetscPowReal(A,-1./n); /* hardness parameter */ 370 thi->viscosity.Bd2 = B/2; 371 thi->viscosity.exponent = (p-2)/2; 372 thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year); 373 } 374 Bd2 = thi->viscosity.Bd2; 375 exponent = thi->viscosity.exponent; 376 eps = thi->viscosity.eps; 377 *eta = Bd2 * PetscPowReal(eps + gam,exponent); 378 *deta = exponent * (*eta) / (eps + gam); 379 } 380 381 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x) 382 { 383 if (x < *min) *min = x; 384 if (x > *max) *max = x; 385 } 386 387 static void PRangeClear(PRange *p) 388 { 389 p->cmin = p->min = 1e100; 390 p->cmax = p->max = -1e100; 391 } 392 393 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max) 394 { 395 PetscFunctionBeginUser; 396 p->cmin = min; 397 p->cmax = max; 398 if (min < p->min) p->min = min; 399 if (max > p->max) p->max = max; 400 PetscFunctionReturn(0); 401 } 402 403 static PetscErrorCode THIDestroy(THI *thi) 404 { 405 PetscFunctionBeginUser; 406 if (!*thi) PetscFunctionReturn(0); 407 if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; PetscFunctionReturn(0);} 408 CHKERRQ(PetscFree((*thi)->units)); 409 CHKERRQ(PetscFree((*thi)->mattype)); 410 CHKERRQ(PetscHeaderDestroy(thi)); 411 PetscFunctionReturn(0); 412 } 413 414 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi) 415 { 416 static PetscBool registered = PETSC_FALSE; 417 THI thi; 418 Units units; 419 PetscErrorCode ierr; 420 421 PetscFunctionBeginUser; 422 *inthi = 0; 423 if (!registered) { 424 CHKERRQ(PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID)); 425 registered = PETSC_TRUE; 426 } 427 CHKERRQ(PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0)); 428 429 CHKERRQ(PetscNew(&thi->units)); 430 units = thi->units; 431 units->meter = 1e-2; 432 units->second = 1e-7; 433 units->kilogram = 1e-12; 434 435 ierr = PetscOptionsBegin(comm,NULL,"Scaled units options","");CHKERRQ(ierr); 436 { 437 CHKERRQ(PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL)); 438 CHKERRQ(PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL)); 439 CHKERRQ(PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL)); 440 } 441 ierr = PetscOptionsEnd();CHKERRQ(ierr); 442 units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second)); 443 units->year = 31556926. * units->second; /* seconds per year */ 444 445 thi->Lx = 10.e3; 446 thi->Ly = 10.e3; 447 thi->Lz = 1000; 448 thi->dirichlet_scale = 1; 449 thi->verbose = PETSC_FALSE; 450 451 ierr = PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");CHKERRQ(ierr); 452 { 453 QuadratureType quad = QUAD_GAUSS; 454 char homexp[] = "A"; 455 char mtype[256] = MATSBAIJ; 456 PetscReal L,m = 1.0; 457 PetscBool flg; 458 L = thi->Lx; 459 CHKERRQ(PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg)); 460 if (flg) thi->Lx = thi->Ly = L; 461 CHKERRQ(PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL)); 462 CHKERRQ(PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL)); 463 CHKERRQ(PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL)); 464 CHKERRQ(PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL)); 465 switch (homexp[0] = toupper(homexp[0])) { 466 case 'A': 467 thi->initialize = THIInitialize_HOM_A; 468 thi->no_slip = PETSC_TRUE; 469 thi->alpha = 0.5; 470 break; 471 case 'C': 472 thi->initialize = THIInitialize_HOM_C; 473 thi->no_slip = PETSC_FALSE; 474 thi->alpha = 0.1; 475 break; 476 case 'X': 477 thi->initialize = THIInitialize_HOM_X; 478 thi->no_slip = PETSC_FALSE; 479 thi->alpha = 0.3; 480 break; 481 case 'Y': 482 thi->initialize = THIInitialize_HOM_Y; 483 thi->no_slip = PETSC_FALSE; 484 thi->alpha = 0.5; 485 break; 486 case 'Z': 487 thi->initialize = THIInitialize_HOM_Z; 488 thi->no_slip = PETSC_FALSE; 489 thi->alpha = 0.5; 490 break; 491 default: 492 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]); 493 } 494 CHKERRQ(PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL)); 495 switch (quad) { 496 case QUAD_GAUSS: 497 HexQInterp = HexQInterp_Gauss; 498 HexQDeriv = HexQDeriv_Gauss; 499 break; 500 case QUAD_LOBATTO: 501 HexQInterp = HexQInterp_Lobatto; 502 HexQDeriv = HexQDeriv_Lobatto; 503 break; 504 } 505 CHKERRQ(PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL)); 506 507 thi->friction.refvel = 100.; 508 thi->friction.epsvel = 1.; 509 510 CHKERRQ(PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL)); 511 CHKERRQ(PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL)); 512 CHKERRQ(PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL)); 513 514 thi->friction.exponent = (m-1)/2; 515 516 CHKERRQ(PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL)); 517 CHKERRQ(PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL)); 518 CHKERRQ(PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL)); 519 CHKERRQ(PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL)); 520 CHKERRQ(PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL)); 521 CHKERRQ(PetscStrallocpy(mtype,(char**)&thi->mattype)); 522 CHKERRQ(PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL)); 523 } 524 ierr = PetscOptionsEnd();CHKERRQ(ierr); 525 526 /* dimensionalize */ 527 thi->Lx *= units->meter; 528 thi->Ly *= units->meter; 529 thi->Lz *= units->meter; 530 thi->alpha *= PETSC_PI / 180; 531 532 PRangeClear(&thi->eta); 533 PRangeClear(&thi->beta2); 534 535 { 536 PetscReal u = 1000*units->meter/(3e7*units->second), 537 gradu = u / (100*units->meter),eta,deta, 538 rho = 910 * units->kilogram/PetscPowReal(units->meter,3), 539 grav = 9.81 * units->meter/PetscSqr(units->second), 540 driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter; 541 THIViscosity(thi,0.5*gradu*gradu,&eta,&deta); 542 thi->rhog = rho * grav; 543 if (thi->verbose) { 544 CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal)); 545 CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving)); 546 CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving))); 547 THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta); 548 CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving))); 549 } 550 } 551 552 *inthi = thi; 553 PetscFunctionReturn(0); 554 } 555 556 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm) 557 { 558 PrmNode **p; 559 PetscInt i,j,xs,xm,ys,ym,mx,my; 560 561 PetscFunctionBeginUser; 562 CHKERRQ(DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0)); 563 CHKERRQ(DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0)); 564 CHKERRQ(DMDAVecGetArray(da2prm,prm,&p)); 565 for (i=xs; i<xs+xm; i++) { 566 for (j=ys; j<ys+ym; j++) { 567 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my; 568 thi->initialize(thi,xx,yy,&p[i][j]); 569 } 570 } 571 CHKERRQ(DMDAVecRestoreArray(da2prm,prm,&p)); 572 PetscFunctionReturn(0); 573 } 574 575 static PetscErrorCode THISetUpDM(THI thi,DM dm) 576 { 577 PetscInt refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s; 578 DMDAStencilType st; 579 DM da2prm; 580 Vec X; 581 582 PetscFunctionBeginUser; 583 CHKERRQ(DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st)); 584 if (dim == 2) { 585 CHKERRQ(DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st)); 586 } 587 CHKERRQ(DMGetRefineLevel(dm,&refinelevel)); 588 CHKERRQ(DMGetCoarsenLevel(dm,&coarsenlevel)); 589 level = refinelevel - coarsenlevel; 590 CHKERRQ(DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm)); 591 CHKERRQ(DMSetUp(da2prm)); 592 CHKERRQ(DMCreateLocalVector(da2prm,&X)); 593 { 594 PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter; 595 if (dim == 2) { 596 CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g, num elements %D x %D (%D), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My))); 597 } else { 598 CHKERRQ(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %D x %D x %D (%D), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)))); 599 } 600 } 601 CHKERRQ(THIInitializePrm(thi,da2prm,X)); 602 if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */ 603 CHKERRQ(DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi)); 604 } 605 if (thi->coarse2d) { 606 CHKERRQ(DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi)); 607 } 608 CHKERRQ(PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm)); 609 CHKERRQ(PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X)); 610 CHKERRQ(DMDestroy(&da2prm)); 611 CHKERRQ(VecDestroy(&X)); 612 PetscFunctionReturn(0); 613 } 614 615 static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx) 616 { 617 THI thi = (THI)ctx; 618 PetscInt rlevel,clevel; 619 620 PetscFunctionBeginUser; 621 CHKERRQ(THISetUpDM(thi,dmc)); 622 CHKERRQ(DMGetRefineLevel(dmc,&rlevel)); 623 CHKERRQ(DMGetCoarsenLevel(dmc,&clevel)); 624 if (rlevel-clevel == 0) CHKERRQ(DMSetMatType(dmc,MATAIJ)); 625 CHKERRQ(DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi)); 626 PetscFunctionReturn(0); 627 } 628 629 static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx) 630 { 631 THI thi = (THI)ctx; 632 633 PetscFunctionBeginUser; 634 CHKERRQ(THISetUpDM(thi,dmf)); 635 CHKERRQ(DMSetMatType(dmf,thi->mattype)); 636 CHKERRQ(DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi)); 637 /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */ 638 CHKERRQ(DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi)); 639 PetscFunctionReturn(0); 640 } 641 642 static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm) 643 { 644 DM da2prm; 645 Vec X; 646 647 PetscFunctionBeginUser; 648 CHKERRQ(PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm)); 649 PetscCheck(da2prm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 650 CHKERRQ(PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X)); 651 PetscCheck(X,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 652 CHKERRQ(DMDAVecGetArray(da2prm,X,prm)); 653 PetscFunctionReturn(0); 654 } 655 656 static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm) 657 { 658 DM da2prm; 659 Vec X; 660 661 PetscFunctionBeginUser; 662 CHKERRQ(PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm)); 663 PetscCheck(da2prm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 664 CHKERRQ(PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X)); 665 PetscCheck(X,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 666 CHKERRQ(DMDAVecRestoreArray(da2prm,X,prm)); 667 PetscFunctionReturn(0); 668 } 669 670 static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx) 671 { 672 THI thi; 673 PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my; 674 PetscReal hx,hy; 675 PrmNode **prm; 676 Node ***x; 677 DM da; 678 679 PetscFunctionBeginUser; 680 CHKERRQ(SNESGetDM(snes,&da)); 681 CHKERRQ(DMGetApplicationContext(da,&thi)); 682 CHKERRQ(DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 683 CHKERRQ(DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm)); 684 CHKERRQ(DMDAVecGetArray(da,X,&x)); 685 CHKERRQ(THIDAGetPrm(da,&prm)); 686 hx = thi->Lx / mx; 687 hy = thi->Ly / my; 688 for (i=xs; i<xs+xm; i++) { 689 for (j=ys; j<ys+ym; j++) { 690 for (k=zs; k<zs+zm; k++) { 691 const PetscScalar zm1 = zm-1, 692 drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx), 693 drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy); 694 x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1; 695 x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1; 696 } 697 } 698 } 699 CHKERRQ(DMDAVecRestoreArray(da,X,&x)); 700 CHKERRQ(THIDARestorePrm(da,&prm)); 701 PetscFunctionReturn(0); 702 } 703 704 static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta) 705 { 706 PetscInt l,ll; 707 PetscScalar gam; 708 709 du[0] = du[1] = du[2] = 0; 710 dv[0] = dv[1] = dv[2] = 0; 711 *u = 0; 712 *v = 0; 713 for (l=0; l<8; l++) { 714 *u += phi[l] * n[l].u; 715 *v += phi[l] * n[l].v; 716 for (ll=0; ll<3; ll++) { 717 du[ll] += dphi[l][ll] * n[l].u; 718 dv[ll] += dphi[l][ll] * n[l].v; 719 } 720 } 721 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]); 722 THIViscosity(thi,PetscRealPart(gam),eta,deta); 723 } 724 725 static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta) 726 { 727 PetscInt l,ll; 728 PetscScalar gam; 729 730 du[0] = du[1] = 0; 731 dv[0] = dv[1] = 0; 732 *u = 0; 733 *v = 0; 734 for (l=0; l<4; l++) { 735 *u += phi[l] * n[l].u; 736 *v += phi[l] * n[l].v; 737 for (ll=0; ll<2; ll++) { 738 du[ll] += dphi[l][ll] * n[l].u; 739 dv[ll] += dphi[l][ll] * n[l].v; 740 } 741 } 742 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]); 743 THIViscosity(thi,PetscRealPart(gam),eta,deta); 744 } 745 746 static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi) 747 { 748 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l; 749 PetscReal hx,hy,etamin,etamax,beta2min,beta2max; 750 PrmNode **prm; 751 752 PetscFunctionBeginUser; 753 xs = info->zs; 754 ys = info->ys; 755 xm = info->zm; 756 ym = info->ym; 757 zm = info->xm; 758 hx = thi->Lx / info->mz; 759 hy = thi->Ly / info->my; 760 761 etamin = 1e100; 762 etamax = 0; 763 beta2min = 1e100; 764 beta2max = 0; 765 766 CHKERRQ(THIDAGetPrm(info->da,&prm)); 767 768 for (i=xs; i<xs+xm; i++) { 769 for (j=ys; j<ys+ym; j++) { 770 PrmNode pn[4]; 771 QuadExtract(prm,i,j,pn); 772 for (k=0; k<zm-1; k++) { 773 PetscInt ls = 0; 774 Node n[8],*fn[8]; 775 PetscReal zn[8],etabase = 0; 776 PrmHexGetZ(pn,k,zm,zn); 777 HexExtract(x,i,j,k,n); 778 HexExtractRef(f,i,j,k,fn); 779 if (thi->no_slip && k == 0) { 780 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 781 /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */ 782 ls = 4; 783 } 784 for (q=0; q<8; q++) { 785 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 786 PetscScalar du[3],dv[3],u,v; 787 HexGrad(HexQDeriv[q],zn,dz); 788 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 789 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 790 jw /= thi->rhog; /* scales residuals to be O(1) */ 791 if (q == 0) etabase = eta; 792 RangeUpdate(&etamin,&etamax,eta); 793 for (l=ls; l<8; l++) { /* test functions */ 794 const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0}; 795 const PetscReal pp = phi[l],*dp = dphi[l]; 796 fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0]; 797 fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1]; 798 } 799 } 800 if (k == 0) { /* we are on a bottom face */ 801 if (thi->no_slip) { 802 /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary 803 * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature 804 * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the 805 * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in 806 * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after 807 * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the 808 * assembled matrix (see the similar block in THIJacobianLocal). 809 * 810 * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends 811 * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make 812 * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part, 813 * so the solution will exactly satisfy the boundary condition after the first linear iteration. 814 */ 815 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.); 816 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 817 fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u; 818 fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v; 819 } else { /* Integrate over bottom face to apply boundary condition */ 820 for (q=0; q<4; q++) { 821 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 822 PetscScalar u =0,v=0,rbeta2=0; 823 PetscReal beta2,dbeta2; 824 for (l=0; l<4; l++) { 825 u += phi[l]*n[l].u; 826 v += phi[l]*n[l].v; 827 rbeta2 += phi[l]*pn[l].beta2; 828 } 829 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 830 RangeUpdate(&beta2min,&beta2max,beta2); 831 for (l=0; l<4; l++) { 832 const PetscReal pp = phi[l]; 833 fn[ls+l]->u += pp*jw*beta2*u; 834 fn[ls+l]->v += pp*jw*beta2*v; 835 } 836 } 837 } 838 } 839 } 840 } 841 } 842 843 CHKERRQ(THIDARestorePrm(info->da,&prm)); 844 845 CHKERRQ(PRangeMinMax(&thi->eta,etamin,etamax)); 846 CHKERRQ(PRangeMinMax(&thi->beta2,beta2min,beta2max)); 847 PetscFunctionReturn(0); 848 } 849 850 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer) 851 { 852 PetscReal nrm; 853 PetscInt m; 854 PetscMPIInt rank; 855 856 PetscFunctionBeginUser; 857 CHKERRQ(MatNorm(B,NORM_FROBENIUS,&nrm)); 858 CHKERRQ(MatGetSize(B,&m,0)); 859 CHKERRMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank)); 860 if (rank == 0) { 861 PetscScalar val0,val2; 862 CHKERRQ(MatGetValue(B,0,0,&val0)); 863 CHKERRQ(MatGetValue(B,2,2,&val2)); 864 CHKERRQ(PetscViewerASCIIPrintf(viewer,"Matrix dim %D norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax)); 865 } 866 PetscFunctionReturn(0); 867 } 868 869 static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean) 870 { 871 Node ***x; 872 PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz; 873 PetscReal umin = 1e100,umax=-1e100; 874 PetscScalar usum = 0.0,gusum; 875 876 PetscFunctionBeginUser; 877 *min = *max = *mean = 0; 878 CHKERRQ(DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 879 CHKERRQ(DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm)); 880 PetscCheckFalse(zs != 0 || zm != mz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected decomposition"); 881 CHKERRQ(DMDAVecGetArray(da,X,&x)); 882 for (i=xs; i<xs+xm; i++) { 883 for (j=ys; j<ys+ym; j++) { 884 PetscReal u = PetscRealPart(x[i][j][zm-1].u); 885 RangeUpdate(&umin,&umax,u); 886 usum += u; 887 } 888 } 889 CHKERRQ(DMDAVecRestoreArray(da,X,&x)); 890 CHKERRMPI(MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da))); 891 CHKERRMPI(MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da))); 892 CHKERRMPI(MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da))); 893 *mean = PetscRealPart(gusum) / (mx*my); 894 PetscFunctionReturn(0); 895 } 896 897 static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[]) 898 { 899 MPI_Comm comm; 900 Vec X; 901 DM dm; 902 903 PetscFunctionBeginUser; 904 CHKERRQ(PetscObjectGetComm((PetscObject)thi,&comm)); 905 CHKERRQ(SNESGetSolution(snes,&X)); 906 CHKERRQ(SNESGetDM(snes,&dm)); 907 CHKERRQ(PetscPrintf(comm,"Solution statistics after solve: %s\n",name)); 908 { 909 PetscInt its,lits; 910 SNESConvergedReason reason; 911 CHKERRQ(SNESGetIterationNumber(snes,&its)); 912 CHKERRQ(SNESGetConvergedReason(snes,&reason)); 913 CHKERRQ(SNESGetLinearSolveIterations(snes,&lits)); 914 CHKERRQ(PetscPrintf(comm,"%s: Number of SNES iterations = %D, total linear iterations = %D\n",SNESConvergedReasons[reason],its,lits)); 915 } 916 { 917 PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3]; 918 PetscInt i,j,m; 919 const PetscScalar *x; 920 CHKERRQ(VecNorm(X,NORM_2,&nrm2)); 921 CHKERRQ(VecGetLocalSize(X,&m)); 922 CHKERRQ(VecGetArrayRead(X,&x)); 923 for (i=0; i<m; i+=2) { 924 PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v); 925 tmin[0] = PetscMin(u,tmin[0]); 926 tmin[1] = PetscMin(v,tmin[1]); 927 tmin[2] = PetscMin(c,tmin[2]); 928 tmax[0] = PetscMax(u,tmax[0]); 929 tmax[1] = PetscMax(v,tmax[1]); 930 tmax[2] = PetscMax(c,tmax[2]); 931 } 932 CHKERRQ(VecRestoreArrayRead(X,&x)); 933 CHKERRMPI(MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi))); 934 CHKERRMPI(MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi))); 935 /* Dimensionalize to meters/year */ 936 nrm2 *= thi->units->year / thi->units->meter; 937 for (j=0; j<3; j++) { 938 min[j] *= thi->units->year / thi->units->meter; 939 max[j] *= thi->units->year / thi->units->meter; 940 } 941 if (min[0] == 0.0) min[0] = 0.0; 942 CHKERRQ(PetscPrintf(comm,"|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2])); 943 { 944 PetscReal umin,umax,umean; 945 CHKERRQ(THISurfaceStatistics(dm,X,&umin,&umax,&umean)); 946 umin *= thi->units->year / thi->units->meter; 947 umax *= thi->units->year / thi->units->meter; 948 umean *= thi->units->year / thi->units->meter; 949 CHKERRQ(PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean)); 950 } 951 /* These values stay nondimensional */ 952 CHKERRQ(PetscPrintf(comm,"Global eta range %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax)); 953 CHKERRQ(PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax)); 954 } 955 PetscFunctionReturn(0); 956 } 957 958 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi) 959 { 960 PetscInt xs,ys,xm,ym,i,j,q,l,ll; 961 PetscReal hx,hy; 962 PrmNode **prm; 963 964 PetscFunctionBeginUser; 965 xs = info->ys; 966 ys = info->xs; 967 xm = info->ym; 968 ym = info->xm; 969 hx = thi->Lx / info->my; 970 hy = thi->Ly / info->mx; 971 972 CHKERRQ(MatZeroEntries(B)); 973 CHKERRQ(THIDAGetPrm(info->da,&prm)); 974 975 for (i=xs; i<xs+xm; i++) { 976 for (j=ys; j<ys+ym; j++) { 977 Node n[4]; 978 PrmNode pn[4]; 979 PetscScalar Ke[4*2][4*2]; 980 QuadExtract(prm,i,j,pn); 981 QuadExtract(x,i,j,n); 982 CHKERRQ(PetscMemzero(Ke,sizeof(Ke))); 983 for (q=0; q<4; q++) { 984 PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2; 985 PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0; 986 for (l=0; l<4; l++) { 987 phi[l] = QuadQInterp[q][l]; 988 dphi[l][0] = QuadQDeriv[q][l][0]*2./hx; 989 dphi[l][1] = QuadQDeriv[q][l][1]*2./hy; 990 h += phi[l] * pn[l].h; 991 rbeta2 += phi[l] * pn[l].beta2; 992 } 993 jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */ 994 PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 995 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 996 for (l=0; l<4; l++) { 997 const PetscReal pp = phi[l],*dp = dphi[l]; 998 for (ll=0; ll<4; ll++) { 999 const PetscReal ppl = phi[ll],*dpl = dphi[ll]; 1000 PetscScalar dgdu,dgdv; 1001 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1]; 1002 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0]; 1003 /* Picard part */ 1004 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1005 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1006 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1007 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1008 /* extra Newton terms */ 1009 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale; 1010 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale; 1011 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale; 1012 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale; 1013 } 1014 } 1015 } 1016 { 1017 const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}}; 1018 CHKERRQ(MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES)); 1019 } 1020 } 1021 } 1022 CHKERRQ(THIDARestorePrm(info->da,&prm)); 1023 1024 CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1025 CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1026 CHKERRQ(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 1027 if (thi->verbose) CHKERRQ(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD)); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode) 1032 { 1033 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll; 1034 PetscReal hx,hy; 1035 PrmNode **prm; 1036 1037 PetscFunctionBeginUser; 1038 xs = info->zs; 1039 ys = info->ys; 1040 xm = info->zm; 1041 ym = info->ym; 1042 zm = info->xm; 1043 hx = thi->Lx / info->mz; 1044 hy = thi->Ly / info->my; 1045 1046 CHKERRQ(MatZeroEntries(B)); 1047 CHKERRQ(MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE)); 1048 CHKERRQ(THIDAGetPrm(info->da,&prm)); 1049 1050 for (i=xs; i<xs+xm; i++) { 1051 for (j=ys; j<ys+ym; j++) { 1052 PrmNode pn[4]; 1053 QuadExtract(prm,i,j,pn); 1054 for (k=0; k<zm-1; k++) { 1055 Node n[8]; 1056 PetscReal zn[8],etabase = 0; 1057 PetscScalar Ke[8*2][8*2]; 1058 PetscInt ls = 0; 1059 1060 PrmHexGetZ(pn,k,zm,zn); 1061 HexExtract(x,i,j,k,n); 1062 CHKERRQ(PetscMemzero(Ke,sizeof(Ke))); 1063 if (thi->no_slip && k == 0) { 1064 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 1065 ls = 4; 1066 } 1067 for (q=0; q<8; q++) { 1068 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 1069 PetscScalar du[3],dv[3],u,v; 1070 HexGrad(HexQDeriv[q],zn,dz); 1071 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 1072 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 1073 jw /= thi->rhog; /* residuals are scaled by this factor */ 1074 if (q == 0) etabase = eta; 1075 for (l=ls; l<8; l++) { /* test functions */ 1076 const PetscReal *PETSC_RESTRICT dp = dphi[l]; 1077 #if USE_SSE2_KERNELS 1078 /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */ 1079 __m128d 1080 p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5), 1081 p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)), 1082 du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]), 1083 dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]), 1084 jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta), 1085 dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]), 1086 dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta), 1087 p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */ 1088 p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */ 1089 pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */ 1090 du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */ 1091 t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */ 1092 t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */ 1093 1094 #endif 1095 #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */ 1096 for (ll=ls; ll<8; ll++) { /* trial functions */ 1097 #else 1098 for (ll=l; ll<8; ll++) { 1099 #endif 1100 const PetscReal *PETSC_RESTRICT dpl = dphi[ll]; 1101 if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */ 1102 #if !USE_SSE2_KERNELS 1103 /* The analytic Jacobian in nice, easy-to-read form */ 1104 { 1105 PetscScalar dgdu,dgdv; 1106 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2]; 1107 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2]; 1108 /* Picard part */ 1109 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2]; 1110 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1111 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1112 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2]; 1113 /* extra Newton terms */ 1114 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2]; 1115 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2]; 1116 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2]; 1117 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2]; 1118 } 1119 #else 1120 /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed 1121 * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost 1122 * by 25 to 30 percent. */ 1123 { 1124 __m128d 1125 keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]), 1126 kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]), 1127 dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]), 1128 t0,t3,pdgduv; 1129 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01), 1130 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10), 1131 _mm_mul_pd(dp2jweta,dpl2)))); 1132 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01), 1133 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10), 1134 _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1)))))); 1135 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)), 1136 _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))), 1137 _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10), 1138 _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */ 1139 t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */ 1140 t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */ 1141 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0), 1142 _mm_add_pd(_mm_mul_pd(dp1,t3), 1143 _mm_mul_pd(t0,_mm_mul_pd(dp2,du2)))))); 1144 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0), 1145 _mm_add_pd(_mm_mul_pd(dp0,t3), 1146 _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2)))))); 1147 } 1148 #endif 1149 } 1150 } 1151 } 1152 if (k == 0) { /* on a bottom face */ 1153 if (thi->no_slip) { 1154 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1); 1155 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 1156 Ke[0][0] = thi->dirichlet_scale*diagu; 1157 Ke[1][1] = thi->dirichlet_scale*diagv; 1158 } else { 1159 for (q=0; q<4; q++) { 1160 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 1161 PetscScalar u =0,v=0,rbeta2=0; 1162 PetscReal beta2,dbeta2; 1163 for (l=0; l<4; l++) { 1164 u += phi[l]*n[l].u; 1165 v += phi[l]*n[l].v; 1166 rbeta2 += phi[l]*pn[l].beta2; 1167 } 1168 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 1169 for (l=0; l<4; l++) { 1170 const PetscReal pp = phi[l]; 1171 for (ll=0; ll<4; ll++) { 1172 const PetscReal ppl = phi[ll]; 1173 Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl; 1174 Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl; 1175 Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl; 1176 Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl; 1177 } 1178 } 1179 } 1180 } 1181 } 1182 { 1183 const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}}; 1184 if (amode == THIASSEMBLY_TRIDIAGONAL) { 1185 for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */ 1186 const PetscInt l4 = l+4; 1187 const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}}; 1188 #if defined COMPUTE_LOWER_TRIANGULAR 1189 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1190 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1191 {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1192 {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1193 #else 1194 /* Same as above except for the lower-left block */ 1195 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1196 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1197 {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1198 {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1199 #endif 1200 CHKERRQ(MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES)); 1201 } 1202 } else { 1203 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */ 1204 for (l=0; l<8; l++) { 1205 for (ll=l+1; ll<8; ll++) { 1206 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0]; 1207 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1]; 1208 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0]; 1209 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1]; 1210 } 1211 } 1212 #endif 1213 CHKERRQ(MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES)); 1214 } 1215 } 1216 } 1217 } 1218 } 1219 CHKERRQ(THIDARestorePrm(info->da,&prm)); 1220 1221 CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1222 CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1223 CHKERRQ(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 1224 if (thi->verbose) CHKERRQ(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD)); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1229 { 1230 PetscFunctionBeginUser; 1231 CHKERRQ(THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL)); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1236 { 1237 PetscFunctionBeginUser; 1238 CHKERRQ(THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL)); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[]) 1243 { 1244 THI thi; 1245 PetscInt dim,M,N,m,n,s,dof; 1246 DM dac,daf; 1247 DMDAStencilType st; 1248 DM_DA *ddf,*ddc; 1249 1250 PetscFunctionBeginUser; 1251 CHKERRQ(PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi)); 1252 PetscCheck(thi,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance"); 1253 if (nlevels > 1) { 1254 CHKERRQ(DMRefineHierarchy(dac0,nlevels-1,hierarchy)); 1255 dac = hierarchy[nlevels-2]; 1256 } else { 1257 dac = dac0; 1258 } 1259 CHKERRQ(DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st)); 1260 PetscCheckFalse(dim != 2,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs"); 1261 1262 /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */ 1263 CHKERRQ(DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf)); 1264 CHKERRQ(DMSetUp(daf)); 1265 1266 daf->ops->creatematrix = dac->ops->creatematrix; 1267 daf->ops->createinterpolation = dac->ops->createinterpolation; 1268 daf->ops->getcoloring = dac->ops->getcoloring; 1269 ddf = (DM_DA*)daf->data; 1270 ddc = (DM_DA*)dac->data; 1271 ddf->interptype = ddc->interptype; 1272 1273 CHKERRQ(DMDASetFieldName(daf,0,"x-velocity")); 1274 CHKERRQ(DMDASetFieldName(daf,1,"y-velocity")); 1275 1276 hierarchy[nlevels-1] = daf; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale) 1281 { 1282 PetscInt dim; 1283 1284 PetscFunctionBeginUser; 1285 PetscValidHeaderSpecific(dac,DM_CLASSID,1); 1286 PetscValidHeaderSpecific(daf,DM_CLASSID,2); 1287 PetscValidPointer(A,3); 1288 if (scale) PetscValidPointer(scale,4); 1289 CHKERRQ(DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0)); 1290 if (dim == 2) { 1291 /* We are in the 2D problem and use normal DMDA interpolation */ 1292 CHKERRQ(DMCreateInterpolation(dac,daf,A,scale)); 1293 } else { 1294 PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart; 1295 Mat B; 1296 1297 CHKERRQ(DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 1298 CHKERRQ(DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm)); 1299 PetscCheck(!zs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"unexpected"); 1300 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)daf),&B)); 1301 CHKERRQ(MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my)); 1302 1303 CHKERRQ(MatSetType(B,MATAIJ)); 1304 CHKERRQ(MatSeqAIJSetPreallocation(B,1,NULL)); 1305 CHKERRQ(MatMPIAIJSetPreallocation(B,1,NULL,0,NULL)); 1306 CHKERRQ(MatGetOwnershipRange(B,&rstart,NULL)); 1307 CHKERRQ(MatGetOwnershipRangeColumn(B,&cstart,NULL)); 1308 for (i=xs; i<xs+xm; i++) { 1309 for (j=ys; j<ys+ym; j++) { 1310 for (k=zs; k<zs+zm; k++) { 1311 PetscInt i2 = i*ym+j,i3 = i2*zm+k; 1312 PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */ 1313 CHKERRQ(MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES)); 1314 } 1315 } 1316 } 1317 CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1318 CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1319 CHKERRQ(MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A)); 1320 CHKERRQ(MatDestroy(&B)); 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J) 1326 { 1327 Mat A; 1328 PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3]; 1329 ISLocalToGlobalMapping ltog; 1330 1331 PetscFunctionBeginUser; 1332 CHKERRQ(DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0)); 1333 PetscCheckFalse(dim != 3,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D"); 1334 CHKERRQ(DMDAGetCorners(da,0,0,0,&zm,&ym,&xm)); 1335 CHKERRQ(DMGetLocalToGlobalMapping(da,<og)); 1336 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)da),&A)); 1337 CHKERRQ(MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE)); 1338 CHKERRQ(MatSetType(A,da->mattype)); 1339 CHKERRQ(MatSetFromOptions(A)); 1340 CHKERRQ(MatSeqAIJSetPreallocation(A,3*2,NULL)); 1341 CHKERRQ(MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL)); 1342 CHKERRQ(MatSeqBAIJSetPreallocation(A,2,3,NULL)); 1343 CHKERRQ(MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL)); 1344 CHKERRQ(MatSeqSBAIJSetPreallocation(A,2,2,NULL)); 1345 CHKERRQ(MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL)); 1346 CHKERRQ(MatSetLocalToGlobalMapping(A,ltog,ltog)); 1347 CHKERRQ(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2])); 1348 CHKERRQ(MatSetStencil(A,dim,dims,starts,dof)); 1349 *J = A; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[]) 1354 { 1355 const PetscInt dof = 2; 1356 Units units = thi->units; 1357 MPI_Comm comm; 1358 PetscViewer viewer; 1359 PetscMPIInt rank,size,tag,nn,nmax; 1360 PetscInt mx,my,mz,r,range[6]; 1361 const PetscScalar *x; 1362 1363 PetscFunctionBeginUser; 1364 CHKERRQ(PetscObjectGetComm((PetscObject)thi,&comm)); 1365 CHKERRQ(DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 1366 CHKERRMPI(MPI_Comm_size(comm,&size)); 1367 CHKERRMPI(MPI_Comm_rank(comm,&rank)); 1368 CHKERRQ(PetscViewerASCIIOpen(comm,filename,&viewer)); 1369 CHKERRQ(PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n")); 1370 CHKERRQ(PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %D %d %D %d %D\">\n",0,mz-1,0,my-1,0,mx-1)); 1371 1372 CHKERRQ(DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5)); 1373 CHKERRQ(PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn)); 1374 CHKERRMPI(MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm)); 1375 tag = ((PetscObject) viewer)->tag; 1376 CHKERRQ(VecGetArrayRead(X,&x)); 1377 if (rank == 0) { 1378 PetscScalar *array; 1379 CHKERRQ(PetscMalloc1(nmax,&array)); 1380 for (r=0; r<size; r++) { 1381 PetscInt i,j,k,xs,xm,ys,ym,zs,zm; 1382 const PetscScalar *ptr; 1383 MPI_Status status; 1384 if (r) { 1385 CHKERRMPI(MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE)); 1386 } 1387 zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5]; 1388 PetscCheckFalse(xm*ym*zm*dof > nmax,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1389 if (r) { 1390 CHKERRMPI(MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status)); 1391 CHKERRMPI(MPI_Get_count(&status,MPIU_SCALAR,&nn)); 1392 PetscCheckFalse(nn != xm*ym*zm*dof,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1393 ptr = array; 1394 } else ptr = x; 1395 CHKERRQ(PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1)); 1396 1397 CHKERRQ(PetscViewerASCIIPrintf(viewer," <Points>\n")); 1398 CHKERRQ(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1399 for (i=xs; i<xs+xm; i++) { 1400 for (j=ys; j<ys+ym; j++) { 1401 for (k=zs; k<zs+zm; k++) { 1402 PrmNode p; 1403 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz; 1404 thi->initialize(thi,xx,yy,&p); 1405 zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1); 1406 CHKERRQ(PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz)); 1407 } 1408 } 1409 } 1410 CHKERRQ(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1411 CHKERRQ(PetscViewerASCIIPrintf(viewer," </Points>\n")); 1412 1413 CHKERRQ(PetscViewerASCIIPrintf(viewer," <PointData>\n")); 1414 CHKERRQ(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1415 for (i=0; i<nn; i+=dof) { 1416 CHKERRQ(PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0)); 1417 } 1418 CHKERRQ(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1419 1420 CHKERRQ(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n")); 1421 for (i=0; i<nn; i+=dof) { 1422 CHKERRQ(PetscViewerASCIIPrintf(viewer,"%D\n",r)); 1423 } 1424 CHKERRQ(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1425 CHKERRQ(PetscViewerASCIIPrintf(viewer," </PointData>\n")); 1426 1427 CHKERRQ(PetscViewerASCIIPrintf(viewer," </Piece>\n")); 1428 } 1429 CHKERRQ(PetscFree(array)); 1430 } else { 1431 CHKERRMPI(MPI_Send(range,6,MPIU_INT,0,tag,comm)); 1432 CHKERRMPI(MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm)); 1433 } 1434 CHKERRQ(VecRestoreArrayRead(X,&x)); 1435 CHKERRQ(PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n")); 1436 CHKERRQ(PetscViewerASCIIPrintf(viewer,"</VTKFile>\n")); 1437 CHKERRQ(PetscViewerDestroy(&viewer)); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 int main(int argc,char *argv[]) 1442 { 1443 MPI_Comm comm; 1444 THI thi; 1445 PetscErrorCode ierr; 1446 DM da; 1447 SNES snes; 1448 1449 ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr; 1450 comm = PETSC_COMM_WORLD; 1451 1452 CHKERRQ(THICreate(comm,&thi)); 1453 { 1454 PetscInt M = 3,N = 3,P = 2; 1455 ierr = PetscOptionsBegin(comm,NULL,"Grid resolution options","");CHKERRQ(ierr); 1456 { 1457 CHKERRQ(PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL)); 1458 N = M; 1459 CHKERRQ(PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL)); 1460 if (thi->coarse2d) { 1461 CHKERRQ(PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL)); 1462 } else { 1463 CHKERRQ(PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL)); 1464 } 1465 } 1466 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1467 if (thi->coarse2d) { 1468 CHKERRQ(DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da)); 1469 CHKERRQ(DMSetFromOptions(da)); 1470 CHKERRQ(DMSetUp(da)); 1471 da->ops->refinehierarchy = DMRefineHierarchy_THI; 1472 da->ops->createinterpolation = DMCreateInterpolation_DA_THI; 1473 1474 CHKERRQ(PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi)); 1475 } else { 1476 CHKERRQ(DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da)); 1477 CHKERRQ(DMSetFromOptions(da)); 1478 CHKERRQ(DMSetUp(da)); 1479 } 1480 CHKERRQ(DMDASetFieldName(da,0,"x-velocity")); 1481 CHKERRQ(DMDASetFieldName(da,1,"y-velocity")); 1482 } 1483 CHKERRQ(THISetUpDM(thi,da)); 1484 if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal; 1485 1486 { /* Set the fine level matrix type if -da_refine */ 1487 PetscInt rlevel,clevel; 1488 CHKERRQ(DMGetRefineLevel(da,&rlevel)); 1489 CHKERRQ(DMGetCoarsenLevel(da,&clevel)); 1490 if (rlevel - clevel > 0) CHKERRQ(DMSetMatType(da,thi->mattype)); 1491 } 1492 1493 CHKERRQ(DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi)); 1494 if (thi->tridiagonal) { 1495 CHKERRQ(DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi)); 1496 } else { 1497 CHKERRQ(DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi)); 1498 } 1499 CHKERRQ(DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi)); 1500 CHKERRQ(DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi)); 1501 1502 CHKERRQ(DMSetApplicationContext(da,thi)); 1503 1504 CHKERRQ(SNESCreate(comm,&snes)); 1505 CHKERRQ(SNESSetDM(snes,da)); 1506 CHKERRQ(DMDestroy(&da)); 1507 CHKERRQ(SNESSetComputeInitialGuess(snes,THIInitial,NULL)); 1508 CHKERRQ(SNESSetFromOptions(snes)); 1509 1510 CHKERRQ(SNESSolve(snes,NULL,NULL)); 1511 1512 CHKERRQ(THISolveStatistics(thi,snes,0,"Full")); 1513 1514 { 1515 PetscBool flg; 1516 char filename[PETSC_MAX_PATH_LEN] = ""; 1517 CHKERRQ(PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg)); 1518 if (flg) { 1519 Vec X; 1520 DM dm; 1521 CHKERRQ(SNESGetSolution(snes,&X)); 1522 CHKERRQ(SNESGetDM(snes,&dm)); 1523 CHKERRQ(THIDAVecView_VTK_XML(thi,dm,X,filename)); 1524 } 1525 } 1526 1527 CHKERRQ(DMDestroy(&da)); 1528 CHKERRQ(SNESDestroy(&snes)); 1529 CHKERRQ(THIDestroy(&thi)); 1530 ierr = PetscFinalize(); 1531 return ierr; 1532 } 1533 1534 /*TEST 1535 1536 build: 1537 requires: !single 1538 1539 test: 1540 args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc 1541 1542 test: 1543 suffix: 2 1544 nsize: 2 1545 args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1 1546 1547 test: 1548 suffix: 3 1549 nsize: 3 1550 args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current 1551 1552 test: 1553 suffix: 4 1554 nsize: 6 1555 args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1 1556 1557 test: 1558 suffix: 5 1559 nsize: 6 1560 args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}} 1561 1562 TEST*/ 1563