1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\ 2 \n\ 3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\ 4 using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\ 5 to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\ 6 boundary conditions in the x- and y-directions.\n\ 7 \n\ 8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\ 9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\ 10 \n\ 11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\ 12 \n\n"; 13 14 /* 15 The equations for horizontal velocity (u,v) are 16 17 - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0 18 - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0 19 20 where 21 22 eta = B/2 (epsilon + gamma)^((p-2)/2) 23 24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B, 25 written in terms of the second invariant 26 27 gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2 28 29 The surface boundary conditions are the natural conditions. The basal boundary conditions 30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2. 31 32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1). 33 34 The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the 35 map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through 36 the Jacobian of the coordinate transformation from a reference element to the physical element. 37 38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed 39 specially so that columns are never distributed, and are always contiguous in memory. 40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation, 41 and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to 42 use compatible domain decomposition relative to the 3D DMDAs. 43 44 There are two compile-time options: 45 46 NO_SSE2: 47 If the host supports SSE2, we use integration code that has been vectorized with SSE2 48 intrinsics, unless this macro is defined. The intrinsics speed up integration by about 49 30% on my architecture (P8700, gcc-4.5 snapshot). 50 51 COMPUTE_LOWER_TRIANGULAR: 52 The element matrices we assemble are lower-triangular so it is not necessary to compute 53 all entries explicitly. If this macro is defined, the lower-triangular entries are 54 computed explicitly. 55 56 */ 57 58 #if defined(PETSC_APPLE_FRAMEWORK) 59 #import <PETSc/petscsnes.h> 60 #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 61 #else 62 63 #include <petscsnes.h> 64 #include <petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 65 #endif 66 #include <ctype.h> /* toupper() */ 67 68 #if defined(__cplusplus) || defined (PETSC_HAVE_WINDOWS_COMPILERS) || defined (__PGI) 69 /* c++ cannot handle [_restrict_] notation like C does */ 70 #undef PETSC_RESTRICT 71 #define PETSC_RESTRICT 72 #endif 73 74 #if defined __SSE2__ 75 # include <emmintrin.h> 76 #endif 77 78 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */ 79 #if !defined NO_SSE2 \ 80 && !defined PETSC_USE_COMPLEX \ 81 && !defined PETSC_USE_REAL_SINGLE \ 82 && !defined PETSC_USE_REAL___FLOAT128 \ 83 && !defined PETSC_USE_REAL___FP16 \ 84 && defined __SSE2__ 85 #define USE_SSE2_KERNELS 1 86 #else 87 #define USE_SSE2_KERNELS 0 88 #endif 89 90 static PetscClassId THI_CLASSID; 91 92 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType; 93 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0}; 94 PETSC_UNUSED static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1}; 95 PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573}; 96 #define G 0.57735026918962573 97 #define H (0.5*(1.+G)) 98 #define L (0.5*(1.-G)) 99 #define M (-0.5) 100 #define P (0.5) 101 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */ 102 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0}, 103 {0,H,0,0,0,L,0,0}, 104 {0,0,H,0,0,0,L,0}, 105 {0,0,0,H,0,0,0,L}, 106 {L,0,0,0,H,0,0,0}, 107 {0,L,0,0,0,H,0,0}, 108 {0,0,L,0,0,0,H,0}, 109 {0,0,0,L,0,0,0,H}}; 110 static const PetscReal HexQDeriv_Lobatto[8][8][3] = { 111 {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} }, 112 {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} }, 113 {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} }, 114 {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}}, 115 {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} }, 116 {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} }, 117 {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} }, 118 {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}}; 119 /* Stanndard Gauss */ 120 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L}, 121 {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L}, 122 {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L}, 123 {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L}, 124 {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H}, 125 {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H}, 126 {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H}, 127 {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}}; 128 static const PetscReal HexQDeriv_Gauss[8][8][3] = { 129 {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}}, 130 {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}}, 131 {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}}, 132 {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}}, 133 {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}}, 134 {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}}, 135 {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}}, 136 {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}}; 137 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3]; 138 /* Standard 2x2 Gauss quadrature for the bottom layer. */ 139 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L}, 140 {L*H,H*H,H*L,L*L}, 141 {L*L,H*L,H*H,L*H}, 142 {H*L,L*L,L*H,H*H}}; 143 static const PetscReal QuadQDeriv[4][4][2] = { 144 {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}}, 145 {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}}, 146 {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}}, 147 {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}}; 148 #undef G 149 #undef H 150 #undef L 151 #undef M 152 #undef P 153 154 #define HexExtract(x,i,j,k,n) do { \ 155 (n)[0] = (x)[i][j][k]; \ 156 (n)[1] = (x)[i+1][j][k]; \ 157 (n)[2] = (x)[i+1][j+1][k]; \ 158 (n)[3] = (x)[i][j+1][k]; \ 159 (n)[4] = (x)[i][j][k+1]; \ 160 (n)[5] = (x)[i+1][j][k+1]; \ 161 (n)[6] = (x)[i+1][j+1][k+1]; \ 162 (n)[7] = (x)[i][j+1][k+1]; \ 163 } while (0) 164 165 #define HexExtractRef(x,i,j,k,n) do { \ 166 (n)[0] = &(x)[i][j][k]; \ 167 (n)[1] = &(x)[i+1][j][k]; \ 168 (n)[2] = &(x)[i+1][j+1][k]; \ 169 (n)[3] = &(x)[i][j+1][k]; \ 170 (n)[4] = &(x)[i][j][k+1]; \ 171 (n)[5] = &(x)[i+1][j][k+1]; \ 172 (n)[6] = &(x)[i+1][j+1][k+1]; \ 173 (n)[7] = &(x)[i][j+1][k+1]; \ 174 } while (0) 175 176 #define QuadExtract(x,i,j,n) do { \ 177 (n)[0] = (x)[i][j]; \ 178 (n)[1] = (x)[i+1][j]; \ 179 (n)[2] = (x)[i+1][j+1]; \ 180 (n)[3] = (x)[i][j+1]; \ 181 } while (0) 182 183 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[]) 184 { 185 PetscInt i; 186 dz[0] = dz[1] = dz[2] = 0; 187 for (i=0; i<8; i++) { 188 dz[0] += dphi[i][0] * zn[i]; 189 dz[1] += dphi[i][1] * zn[i]; 190 dz[2] += dphi[i][2] * zn[i]; 191 } 192 } 193 194 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw) 195 { 196 const PetscReal jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}; 197 const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}; 198 const PetscReal jdet = jac[0][0]*jac[1][1]*jac[2][2]; 199 PetscInt i; 200 201 for (i=0; i<8; i++) { 202 const PetscReal *dphir = HexQDeriv[q][i]; 203 phi[i] = HexQInterp[q][i]; 204 dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0]; 205 dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1]; 206 dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2]; 207 } 208 *jw = 1.0 * jdet; 209 } 210 211 typedef struct _p_THI *THI; 212 typedef struct _n_Units *Units; 213 214 typedef struct { 215 PetscScalar u,v; 216 } Node; 217 218 typedef struct { 219 PetscScalar b; /* bed */ 220 PetscScalar h; /* thickness */ 221 PetscScalar beta2; /* friction */ 222 } PrmNode; 223 224 typedef struct { 225 PetscReal min,max,cmin,cmax; 226 } PRange; 227 228 typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode; 229 230 struct _p_THI { 231 PETSCHEADER(int); 232 void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p); 233 PetscInt zlevels; 234 PetscReal Lx,Ly,Lz; /* Model domain */ 235 PetscReal alpha; /* Bed angle */ 236 Units units; 237 PetscReal dirichlet_scale; 238 PetscReal ssa_friction_scale; 239 PRange eta; 240 PRange beta2; 241 struct { 242 PetscReal Bd2,eps,exponent; 243 } viscosity; 244 struct { 245 PetscReal irefgam,eps2,exponent,refvel,epsvel; 246 } friction; 247 PetscReal rhog; 248 PetscBool no_slip; 249 PetscBool tridiagonal; 250 PetscBool coarse2d; 251 PetscBool verbose; 252 MatType mattype; 253 }; 254 255 struct _n_Units { 256 /* fundamental */ 257 PetscReal meter; 258 PetscReal kilogram; 259 PetscReal second; 260 /* derived */ 261 PetscReal Pascal; 262 PetscReal year; 263 }; 264 265 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI); 266 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI); 267 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI); 268 269 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[]) 270 { 271 const PetscScalar zm1 = zm-1, 272 znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1, 273 pn[1].b + pn[1].h*(PetscScalar)k/zm1, 274 pn[2].b + pn[2].h*(PetscScalar)k/zm1, 275 pn[3].b + pn[3].h*(PetscScalar)k/zm1, 276 pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1, 277 pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1, 278 pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1, 279 pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1}; 280 PetscInt i; 281 for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]); 282 } 283 284 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */ 285 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p) 286 { 287 Units units = thi->units; 288 PetscReal s = -x*PetscSinReal(thi->alpha); 289 290 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly); 291 p->h = s - p->b; 292 p->beta2 = 1e30; 293 } 294 295 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p) 296 { 297 Units units = thi->units; 298 PetscReal s = -x*PetscSinReal(thi->alpha); 299 300 p->b = s - 1000*units->meter; 301 p->h = s - p->b; 302 /* tau_b = beta2 v is a stress (Pa) */ 303 p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter; 304 } 305 306 /* These are just toys */ 307 308 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */ 309 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 310 { 311 Units units = thi->units; 312 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 313 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 314 p->b = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 315 p->h = s - p->b; 316 p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter; 317 } 318 319 /* Like Z, but with 200 meter cliffs */ 320 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 321 { 322 Units units = thi->units; 323 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 324 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 325 326 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 327 if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter; 328 p->h = s - p->b; 329 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 330 } 331 332 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */ 333 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 334 { 335 Units units = thi->units; 336 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 337 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 338 339 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 340 p->h = s - p->b; 341 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 342 } 343 344 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2) 345 { 346 if (thi->friction.irefgam == 0) { 347 Units units = thi->units; 348 thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year)); 349 thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam; 350 } 351 if (thi->friction.exponent == 0) { 352 *beta2 = rbeta2; 353 *dbeta2 = 0; 354 } else { 355 *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent); 356 *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam; 357 } 358 } 359 360 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta) 361 { 362 PetscReal Bd2,eps,exponent; 363 if (thi->viscosity.Bd2 == 0) { 364 Units units = thi->units; 365 const PetscReal 366 n = 3., /* Glen exponent */ 367 p = 1. + 1./n, /* for Stokes */ 368 A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */ 369 B = PetscPowReal(A,-1./n); /* hardness parameter */ 370 thi->viscosity.Bd2 = B/2; 371 thi->viscosity.exponent = (p-2)/2; 372 thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year); 373 } 374 Bd2 = thi->viscosity.Bd2; 375 exponent = thi->viscosity.exponent; 376 eps = thi->viscosity.eps; 377 *eta = Bd2 * PetscPowReal(eps + gam,exponent); 378 *deta = exponent * (*eta) / (eps + gam); 379 } 380 381 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x) 382 { 383 if (x < *min) *min = x; 384 if (x > *max) *max = x; 385 } 386 387 static void PRangeClear(PRange *p) 388 { 389 p->cmin = p->min = 1e100; 390 p->cmax = p->max = -1e100; 391 } 392 393 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max) 394 { 395 396 PetscFunctionBeginUser; 397 p->cmin = min; 398 p->cmax = max; 399 if (min < p->min) p->min = min; 400 if (max > p->max) p->max = max; 401 PetscFunctionReturn(0); 402 } 403 404 static PetscErrorCode THIDestroy(THI *thi) 405 { 406 PetscErrorCode ierr; 407 408 PetscFunctionBeginUser; 409 if (!*thi) PetscFunctionReturn(0); 410 if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; PetscFunctionReturn(0);} 411 ierr = PetscFree((*thi)->units);CHKERRQ(ierr); 412 ierr = PetscFree((*thi)->mattype);CHKERRQ(ierr); 413 ierr = PetscHeaderDestroy(thi);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi) 418 { 419 static PetscBool registered = PETSC_FALSE; 420 THI thi; 421 Units units; 422 PetscErrorCode ierr; 423 424 PetscFunctionBeginUser; 425 *inthi = 0; 426 if (!registered) { 427 ierr = PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID);CHKERRQ(ierr); 428 registered = PETSC_TRUE; 429 } 430 ierr = PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0);CHKERRQ(ierr); 431 432 ierr = PetscNew(&thi->units);CHKERRQ(ierr); 433 units = thi->units; 434 units->meter = 1e-2; 435 units->second = 1e-7; 436 units->kilogram = 1e-12; 437 438 ierr = PetscOptionsBegin(comm,NULL,"Scaled units options","");CHKERRQ(ierr); 439 { 440 ierr = PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);CHKERRQ(ierr); 441 ierr = PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);CHKERRQ(ierr); 442 ierr = PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);CHKERRQ(ierr); 443 } 444 ierr = PetscOptionsEnd();CHKERRQ(ierr); 445 units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second)); 446 units->year = 31556926. * units->second; /* seconds per year */ 447 448 thi->Lx = 10.e3; 449 thi->Ly = 10.e3; 450 thi->Lz = 1000; 451 thi->dirichlet_scale = 1; 452 thi->verbose = PETSC_FALSE; 453 454 ierr = PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");CHKERRQ(ierr); 455 { 456 QuadratureType quad = QUAD_GAUSS; 457 char homexp[] = "A"; 458 char mtype[256] = MATSBAIJ; 459 PetscReal L,m = 1.0; 460 PetscBool flg; 461 L = thi->Lx; 462 ierr = PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);CHKERRQ(ierr); 463 if (flg) thi->Lx = thi->Ly = L; 464 ierr = PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);CHKERRQ(ierr); 465 ierr = PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);CHKERRQ(ierr); 466 ierr = PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);CHKERRQ(ierr); 467 ierr = PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);CHKERRQ(ierr); 468 switch (homexp[0] = toupper(homexp[0])) { 469 case 'A': 470 thi->initialize = THIInitialize_HOM_A; 471 thi->no_slip = PETSC_TRUE; 472 thi->alpha = 0.5; 473 break; 474 case 'C': 475 thi->initialize = THIInitialize_HOM_C; 476 thi->no_slip = PETSC_FALSE; 477 thi->alpha = 0.1; 478 break; 479 case 'X': 480 thi->initialize = THIInitialize_HOM_X; 481 thi->no_slip = PETSC_FALSE; 482 thi->alpha = 0.3; 483 break; 484 case 'Y': 485 thi->initialize = THIInitialize_HOM_Y; 486 thi->no_slip = PETSC_FALSE; 487 thi->alpha = 0.5; 488 break; 489 case 'Z': 490 thi->initialize = THIInitialize_HOM_Z; 491 thi->no_slip = PETSC_FALSE; 492 thi->alpha = 0.5; 493 break; 494 default: 495 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]); 496 } 497 ierr = PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);CHKERRQ(ierr); 498 switch (quad) { 499 case QUAD_GAUSS: 500 HexQInterp = HexQInterp_Gauss; 501 HexQDeriv = HexQDeriv_Gauss; 502 break; 503 case QUAD_LOBATTO: 504 HexQInterp = HexQInterp_Lobatto; 505 HexQDeriv = HexQDeriv_Lobatto; 506 break; 507 } 508 ierr = PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);CHKERRQ(ierr); 509 510 thi->friction.refvel = 100.; 511 thi->friction.epsvel = 1.; 512 513 ierr = PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL);CHKERRQ(ierr); 514 ierr = PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL);CHKERRQ(ierr); 515 ierr = PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);CHKERRQ(ierr); 516 517 thi->friction.exponent = (m-1)/2; 518 519 ierr = PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);CHKERRQ(ierr); 520 ierr = PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);CHKERRQ(ierr); 521 ierr = PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL);CHKERRQ(ierr); 522 ierr = PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL);CHKERRQ(ierr); 523 ierr = PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);CHKERRQ(ierr); 524 ierr = PetscStrallocpy(mtype,(char**)&thi->mattype);CHKERRQ(ierr); 525 ierr = PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);CHKERRQ(ierr); 526 } 527 ierr = PetscOptionsEnd();CHKERRQ(ierr); 528 529 /* dimensionalize */ 530 thi->Lx *= units->meter; 531 thi->Ly *= units->meter; 532 thi->Lz *= units->meter; 533 thi->alpha *= PETSC_PI / 180; 534 535 PRangeClear(&thi->eta); 536 PRangeClear(&thi->beta2); 537 538 { 539 PetscReal u = 1000*units->meter/(3e7*units->second), 540 gradu = u / (100*units->meter),eta,deta, 541 rho = 910 * units->kilogram/PetscPowReal(units->meter,3), 542 grav = 9.81 * units->meter/PetscSqr(units->second), 543 driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter; 544 THIViscosity(thi,0.5*gradu*gradu,&eta,&deta); 545 thi->rhog = rho * grav; 546 if (thi->verbose) { 547 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal);CHKERRQ(ierr); 548 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving);CHKERRQ(ierr); 549 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving));CHKERRQ(ierr); 550 THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta); 551 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving));CHKERRQ(ierr); 552 } 553 } 554 555 *inthi = thi; 556 PetscFunctionReturn(0); 557 } 558 559 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm) 560 { 561 PrmNode **p; 562 PetscInt i,j,xs,xm,ys,ym,mx,my; 563 PetscErrorCode ierr; 564 565 PetscFunctionBeginUser; 566 ierr = DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);CHKERRQ(ierr); 567 ierr = DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr); 568 ierr = DMDAVecGetArray(da2prm,prm,&p);CHKERRQ(ierr); 569 for (i=xs; i<xs+xm; i++) { 570 for (j=ys; j<ys+ym; j++) { 571 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my; 572 thi->initialize(thi,xx,yy,&p[i][j]); 573 } 574 } 575 ierr = DMDAVecRestoreArray(da2prm,prm,&p);CHKERRQ(ierr); 576 PetscFunctionReturn(0); 577 } 578 579 static PetscErrorCode THISetUpDM(THI thi,DM dm) 580 { 581 PetscErrorCode ierr; 582 PetscInt refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s; 583 DMDAStencilType st; 584 DM da2prm; 585 Vec X; 586 587 PetscFunctionBeginUser; 588 ierr = DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st);CHKERRQ(ierr); 589 if (dim == 2) { 590 ierr = DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st);CHKERRQ(ierr); 591 } 592 ierr = DMGetRefineLevel(dm,&refinelevel);CHKERRQ(ierr); 593 ierr = DMGetCoarsenLevel(dm,&coarsenlevel);CHKERRQ(ierr); 594 level = refinelevel - coarsenlevel; 595 ierr = DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm);CHKERRQ(ierr); 596 ierr = DMSetUp(da2prm);CHKERRQ(ierr); 597 ierr = DMCreateLocalVector(da2prm,&X);CHKERRQ(ierr); 598 { 599 PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter; 600 if (dim == 2) { 601 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g, num elements %D x %D (%D), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My));CHKERRQ(ierr); 602 } else { 603 ierr = PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %D x %D x %D (%D), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)));CHKERRQ(ierr); 604 } 605 } 606 ierr = THIInitializePrm(thi,da2prm,X);CHKERRQ(ierr); 607 if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */ 608 ierr = DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);CHKERRQ(ierr); 609 } 610 if (thi->coarse2d) { 611 ierr = DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi);CHKERRQ(ierr); 612 } 613 ierr = PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm);CHKERRQ(ierr); 614 ierr = PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X);CHKERRQ(ierr); 615 ierr = DMDestroy(&da2prm);CHKERRQ(ierr); 616 ierr = VecDestroy(&X);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx) 621 { 622 THI thi = (THI)ctx; 623 PetscErrorCode ierr; 624 PetscInt rlevel,clevel; 625 626 PetscFunctionBeginUser; 627 ierr = THISetUpDM(thi,dmc);CHKERRQ(ierr); 628 ierr = DMGetRefineLevel(dmc,&rlevel);CHKERRQ(ierr); 629 ierr = DMGetCoarsenLevel(dmc,&clevel);CHKERRQ(ierr); 630 if (rlevel-clevel == 0) {ierr = DMSetMatType(dmc,MATAIJ);CHKERRQ(ierr);} 631 ierr = DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx) 636 { 637 THI thi = (THI)ctx; 638 PetscErrorCode ierr; 639 640 PetscFunctionBeginUser; 641 ierr = THISetUpDM(thi,dmf);CHKERRQ(ierr); 642 ierr = DMSetMatType(dmf,thi->mattype);CHKERRQ(ierr); 643 ierr = DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi);CHKERRQ(ierr); 644 /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */ 645 ierr = DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm) 650 { 651 PetscErrorCode ierr; 652 DM da2prm; 653 Vec X; 654 655 PetscFunctionBeginUser; 656 ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);CHKERRQ(ierr); 657 if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 658 ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);CHKERRQ(ierr); 659 if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 660 ierr = DMDAVecGetArray(da2prm,X,prm);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm) 665 { 666 PetscErrorCode ierr; 667 DM da2prm; 668 Vec X; 669 670 PetscFunctionBeginUser; 671 ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm);CHKERRQ(ierr); 672 if (!da2prm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 673 ierr = PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X);CHKERRQ(ierr); 674 if (!X) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 675 ierr = DMDAVecRestoreArray(da2prm,X,prm);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx) 680 { 681 THI thi; 682 PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my; 683 PetscReal hx,hy; 684 PrmNode **prm; 685 Node ***x; 686 PetscErrorCode ierr; 687 DM da; 688 689 PetscFunctionBeginUser; 690 ierr = SNESGetDM(snes,&da);CHKERRQ(ierr); 691 ierr = DMGetApplicationContext(da,&thi);CHKERRQ(ierr); 692 ierr = DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr); 693 ierr = DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr); 694 ierr = DMDAVecGetArray(da,X,&x);CHKERRQ(ierr); 695 ierr = THIDAGetPrm(da,&prm);CHKERRQ(ierr); 696 hx = thi->Lx / mx; 697 hy = thi->Ly / my; 698 for (i=xs; i<xs+xm; i++) { 699 for (j=ys; j<ys+ym; j++) { 700 for (k=zs; k<zs+zm; k++) { 701 const PetscScalar zm1 = zm-1, 702 drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx), 703 drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy); 704 x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1; 705 x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1; 706 } 707 } 708 } 709 ierr = DMDAVecRestoreArray(da,X,&x);CHKERRQ(ierr); 710 ierr = THIDARestorePrm(da,&prm);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta) 715 { 716 PetscInt l,ll; 717 PetscScalar gam; 718 719 du[0] = du[1] = du[2] = 0; 720 dv[0] = dv[1] = dv[2] = 0; 721 *u = 0; 722 *v = 0; 723 for (l=0; l<8; l++) { 724 *u += phi[l] * n[l].u; 725 *v += phi[l] * n[l].v; 726 for (ll=0; ll<3; ll++) { 727 du[ll] += dphi[l][ll] * n[l].u; 728 dv[ll] += dphi[l][ll] * n[l].v; 729 } 730 } 731 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]); 732 THIViscosity(thi,PetscRealPart(gam),eta,deta); 733 } 734 735 static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta) 736 { 737 PetscInt l,ll; 738 PetscScalar gam; 739 740 du[0] = du[1] = 0; 741 dv[0] = dv[1] = 0; 742 *u = 0; 743 *v = 0; 744 for (l=0; l<4; l++) { 745 *u += phi[l] * n[l].u; 746 *v += phi[l] * n[l].v; 747 for (ll=0; ll<2; ll++) { 748 du[ll] += dphi[l][ll] * n[l].u; 749 dv[ll] += dphi[l][ll] * n[l].v; 750 } 751 } 752 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]); 753 THIViscosity(thi,PetscRealPart(gam),eta,deta); 754 } 755 756 static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi) 757 { 758 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l; 759 PetscReal hx,hy,etamin,etamax,beta2min,beta2max; 760 PrmNode **prm; 761 PetscErrorCode ierr; 762 763 PetscFunctionBeginUser; 764 xs = info->zs; 765 ys = info->ys; 766 xm = info->zm; 767 ym = info->ym; 768 zm = info->xm; 769 hx = thi->Lx / info->mz; 770 hy = thi->Ly / info->my; 771 772 etamin = 1e100; 773 etamax = 0; 774 beta2min = 1e100; 775 beta2max = 0; 776 777 ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr); 778 779 for (i=xs; i<xs+xm; i++) { 780 for (j=ys; j<ys+ym; j++) { 781 PrmNode pn[4]; 782 QuadExtract(prm,i,j,pn); 783 for (k=0; k<zm-1; k++) { 784 PetscInt ls = 0; 785 Node n[8],*fn[8]; 786 PetscReal zn[8],etabase = 0; 787 PrmHexGetZ(pn,k,zm,zn); 788 HexExtract(x,i,j,k,n); 789 HexExtractRef(f,i,j,k,fn); 790 if (thi->no_slip && k == 0) { 791 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 792 /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */ 793 ls = 4; 794 } 795 for (q=0; q<8; q++) { 796 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 797 PetscScalar du[3],dv[3],u,v; 798 HexGrad(HexQDeriv[q],zn,dz); 799 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 800 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 801 jw /= thi->rhog; /* scales residuals to be O(1) */ 802 if (q == 0) etabase = eta; 803 RangeUpdate(&etamin,&etamax,eta); 804 for (l=ls; l<8; l++) { /* test functions */ 805 const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0}; 806 const PetscReal pp = phi[l],*dp = dphi[l]; 807 fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0]; 808 fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1]; 809 } 810 } 811 if (k == 0) { /* we are on a bottom face */ 812 if (thi->no_slip) { 813 /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary 814 * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature 815 * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the 816 * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in 817 * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after 818 * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the 819 * assembled matrix (see the similar block in THIJacobianLocal). 820 * 821 * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends 822 * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make 823 * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part, 824 * so the solution will exactly satisfy the boundary condition after the first linear iteration. 825 */ 826 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.); 827 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 828 fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u; 829 fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v; 830 } else { /* Integrate over bottom face to apply boundary condition */ 831 for (q=0; q<4; q++) { 832 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 833 PetscScalar u =0,v=0,rbeta2=0; 834 PetscReal beta2,dbeta2; 835 for (l=0; l<4; l++) { 836 u += phi[l]*n[l].u; 837 v += phi[l]*n[l].v; 838 rbeta2 += phi[l]*pn[l].beta2; 839 } 840 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 841 RangeUpdate(&beta2min,&beta2max,beta2); 842 for (l=0; l<4; l++) { 843 const PetscReal pp = phi[l]; 844 fn[ls+l]->u += pp*jw*beta2*u; 845 fn[ls+l]->v += pp*jw*beta2*v; 846 } 847 } 848 } 849 } 850 } 851 } 852 } 853 854 ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr); 855 856 ierr = PRangeMinMax(&thi->eta,etamin,etamax);CHKERRQ(ierr); 857 ierr = PRangeMinMax(&thi->beta2,beta2min,beta2max);CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer) 862 { 863 PetscErrorCode ierr; 864 PetscReal nrm; 865 PetscInt m; 866 PetscMPIInt rank; 867 868 PetscFunctionBeginUser; 869 ierr = MatNorm(B,NORM_FROBENIUS,&nrm);CHKERRQ(ierr); 870 ierr = MatGetSize(B,&m,0);CHKERRQ(ierr); 871 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank);CHKERRQ(ierr); 872 if (!rank) { 873 PetscScalar val0,val2; 874 ierr = MatGetValue(B,0,0,&val0);CHKERRQ(ierr); 875 ierr = MatGetValue(B,2,2,&val2);CHKERRQ(ierr); 876 ierr = PetscViewerASCIIPrintf(viewer,"Matrix dim %D norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax);CHKERRQ(ierr); 877 } 878 PetscFunctionReturn(0); 879 } 880 881 static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean) 882 { 883 PetscErrorCode ierr; 884 Node ***x; 885 PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz; 886 PetscReal umin = 1e100,umax=-1e100; 887 PetscScalar usum = 0.0,gusum; 888 889 PetscFunctionBeginUser; 890 *min = *max = *mean = 0; 891 ierr = DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr); 892 ierr = DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr); 893 if (zs != 0 || zm != mz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected decomposition"); 894 ierr = DMDAVecGetArray(da,X,&x);CHKERRQ(ierr); 895 for (i=xs; i<xs+xm; i++) { 896 for (j=ys; j<ys+ym; j++) { 897 PetscReal u = PetscRealPart(x[i][j][zm-1].u); 898 RangeUpdate(&umin,&umax,u); 899 usum += u; 900 } 901 } 902 ierr = DMDAVecRestoreArray(da,X,&x);CHKERRQ(ierr); 903 ierr = MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da));CHKERRQ(ierr); 904 ierr = MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da));CHKERRQ(ierr); 905 ierr = MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da));CHKERRQ(ierr); 906 *mean = PetscRealPart(gusum) / (mx*my); 907 PetscFunctionReturn(0); 908 } 909 910 static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[]) 911 { 912 MPI_Comm comm; 913 Vec X; 914 DM dm; 915 PetscErrorCode ierr; 916 917 PetscFunctionBeginUser; 918 ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr); 919 ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr); 920 ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr); 921 ierr = PetscPrintf(comm,"Solution statistics after solve: %s\n",name);CHKERRQ(ierr); 922 { 923 PetscInt its,lits; 924 SNESConvergedReason reason; 925 ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); 926 ierr = SNESGetConvergedReason(snes,&reason);CHKERRQ(ierr); 927 ierr = SNESGetLinearSolveIterations(snes,&lits);CHKERRQ(ierr); 928 ierr = PetscPrintf(comm,"%s: Number of SNES iterations = %D, total linear iterations = %D\n",SNESConvergedReasons[reason],its,lits);CHKERRQ(ierr); 929 } 930 { 931 PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3]; 932 PetscInt i,j,m; 933 const PetscScalar *x; 934 ierr = VecNorm(X,NORM_2,&nrm2);CHKERRQ(ierr); 935 ierr = VecGetLocalSize(X,&m);CHKERRQ(ierr); 936 ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr); 937 for (i=0; i<m; i+=2) { 938 PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v); 939 tmin[0] = PetscMin(u,tmin[0]); 940 tmin[1] = PetscMin(v,tmin[1]); 941 tmin[2] = PetscMin(c,tmin[2]); 942 tmax[0] = PetscMax(u,tmax[0]); 943 tmax[1] = PetscMax(v,tmax[1]); 944 tmax[2] = PetscMax(c,tmax[2]); 945 } 946 ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr); 947 ierr = MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi));CHKERRQ(ierr); 948 ierr = MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi));CHKERRQ(ierr); 949 /* Dimensionalize to meters/year */ 950 nrm2 *= thi->units->year / thi->units->meter; 951 for (j=0; j<3; j++) { 952 min[j] *= thi->units->year / thi->units->meter; 953 max[j] *= thi->units->year / thi->units->meter; 954 } 955 if (min[0] == 0.0) min[0] = 0.0; 956 ierr = PetscPrintf(comm,"|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2]);CHKERRQ(ierr); 957 { 958 PetscReal umin,umax,umean; 959 ierr = THISurfaceStatistics(dm,X,&umin,&umax,&umean);CHKERRQ(ierr); 960 umin *= thi->units->year / thi->units->meter; 961 umax *= thi->units->year / thi->units->meter; 962 umean *= thi->units->year / thi->units->meter; 963 ierr = PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean);CHKERRQ(ierr); 964 } 965 /* These values stay nondimensional */ 966 ierr = PetscPrintf(comm,"Global eta range %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax);CHKERRQ(ierr); 967 ierr = PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax);CHKERRQ(ierr); 968 } 969 PetscFunctionReturn(0); 970 } 971 972 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi) 973 { 974 PetscInt xs,ys,xm,ym,i,j,q,l,ll; 975 PetscReal hx,hy; 976 PrmNode **prm; 977 PetscErrorCode ierr; 978 979 PetscFunctionBeginUser; 980 xs = info->ys; 981 ys = info->xs; 982 xm = info->ym; 983 ym = info->xm; 984 hx = thi->Lx / info->my; 985 hy = thi->Ly / info->mx; 986 987 ierr = MatZeroEntries(B);CHKERRQ(ierr); 988 ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr); 989 990 for (i=xs; i<xs+xm; i++) { 991 for (j=ys; j<ys+ym; j++) { 992 Node n[4]; 993 PrmNode pn[4]; 994 PetscScalar Ke[4*2][4*2]; 995 QuadExtract(prm,i,j,pn); 996 QuadExtract(x,i,j,n); 997 ierr = PetscMemzero(Ke,sizeof(Ke));CHKERRQ(ierr); 998 for (q=0; q<4; q++) { 999 PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2; 1000 PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0; 1001 for (l=0; l<4; l++) { 1002 phi[l] = QuadQInterp[q][l]; 1003 dphi[l][0] = QuadQDeriv[q][l][0]*2./hx; 1004 dphi[l][1] = QuadQDeriv[q][l][1]*2./hy; 1005 h += phi[l] * pn[l].h; 1006 rbeta2 += phi[l] * pn[l].beta2; 1007 } 1008 jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */ 1009 PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 1010 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 1011 for (l=0; l<4; l++) { 1012 const PetscReal pp = phi[l],*dp = dphi[l]; 1013 for (ll=0; ll<4; ll++) { 1014 const PetscReal ppl = phi[ll],*dpl = dphi[ll]; 1015 PetscScalar dgdu,dgdv; 1016 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1]; 1017 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0]; 1018 /* Picard part */ 1019 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1020 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1021 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1022 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1023 /* extra Newton terms */ 1024 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale; 1025 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale; 1026 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale; 1027 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale; 1028 } 1029 } 1030 } 1031 { 1032 const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}}; 1033 ierr = MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES);CHKERRQ(ierr); 1034 } 1035 } 1036 } 1037 ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr); 1038 1039 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1040 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1041 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1042 if (thi->verbose) {ierr = THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} 1043 PetscFunctionReturn(0); 1044 } 1045 1046 static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode) 1047 { 1048 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll; 1049 PetscReal hx,hy; 1050 PrmNode **prm; 1051 PetscErrorCode ierr; 1052 1053 PetscFunctionBeginUser; 1054 xs = info->zs; 1055 ys = info->ys; 1056 xm = info->zm; 1057 ym = info->ym; 1058 zm = info->xm; 1059 hx = thi->Lx / info->mz; 1060 hy = thi->Ly / info->my; 1061 1062 ierr = MatZeroEntries(B);CHKERRQ(ierr); 1063 ierr = MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 1064 ierr = THIDAGetPrm(info->da,&prm);CHKERRQ(ierr); 1065 1066 for (i=xs; i<xs+xm; i++) { 1067 for (j=ys; j<ys+ym; j++) { 1068 PrmNode pn[4]; 1069 QuadExtract(prm,i,j,pn); 1070 for (k=0; k<zm-1; k++) { 1071 Node n[8]; 1072 PetscReal zn[8],etabase = 0; 1073 PetscScalar Ke[8*2][8*2]; 1074 PetscInt ls = 0; 1075 1076 PrmHexGetZ(pn,k,zm,zn); 1077 HexExtract(x,i,j,k,n); 1078 ierr = PetscMemzero(Ke,sizeof(Ke));CHKERRQ(ierr); 1079 if (thi->no_slip && k == 0) { 1080 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 1081 ls = 4; 1082 } 1083 for (q=0; q<8; q++) { 1084 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 1085 PetscScalar du[3],dv[3],u,v; 1086 HexGrad(HexQDeriv[q],zn,dz); 1087 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 1088 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 1089 jw /= thi->rhog; /* residuals are scaled by this factor */ 1090 if (q == 0) etabase = eta; 1091 for (l=ls; l<8; l++) { /* test functions */ 1092 const PetscReal *PETSC_RESTRICT dp = dphi[l]; 1093 #if USE_SSE2_KERNELS 1094 /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */ 1095 __m128d 1096 p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5), 1097 p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)), 1098 du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]), 1099 dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]), 1100 jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta), 1101 dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]), 1102 dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta), 1103 p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */ 1104 p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */ 1105 pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */ 1106 du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */ 1107 t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */ 1108 t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */ 1109 1110 #endif 1111 #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */ 1112 for (ll=ls; ll<8; ll++) { /* trial functions */ 1113 #else 1114 for (ll=l; ll<8; ll++) { 1115 #endif 1116 const PetscReal *PETSC_RESTRICT dpl = dphi[ll]; 1117 if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */ 1118 #if !USE_SSE2_KERNELS 1119 /* The analytic Jacobian in nice, easy-to-read form */ 1120 { 1121 PetscScalar dgdu,dgdv; 1122 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2]; 1123 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2]; 1124 /* Picard part */ 1125 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2]; 1126 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1127 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1128 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2]; 1129 /* extra Newton terms */ 1130 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2]; 1131 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2]; 1132 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2]; 1133 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2]; 1134 } 1135 #else 1136 /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed 1137 * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost 1138 * by 25 to 30 percent. */ 1139 { 1140 __m128d 1141 keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]), 1142 kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]), 1143 dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]), 1144 t0,t3,pdgduv; 1145 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01), 1146 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10), 1147 _mm_mul_pd(dp2jweta,dpl2)))); 1148 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01), 1149 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10), 1150 _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1)))))); 1151 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)), 1152 _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))), 1153 _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10), 1154 _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */ 1155 t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */ 1156 t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */ 1157 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0), 1158 _mm_add_pd(_mm_mul_pd(dp1,t3), 1159 _mm_mul_pd(t0,_mm_mul_pd(dp2,du2)))))); 1160 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0), 1161 _mm_add_pd(_mm_mul_pd(dp0,t3), 1162 _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2)))))); 1163 } 1164 #endif 1165 } 1166 } 1167 } 1168 if (k == 0) { /* on a bottom face */ 1169 if (thi->no_slip) { 1170 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1); 1171 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 1172 Ke[0][0] = thi->dirichlet_scale*diagu; 1173 Ke[1][1] = thi->dirichlet_scale*diagv; 1174 } else { 1175 for (q=0; q<4; q++) { 1176 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 1177 PetscScalar u =0,v=0,rbeta2=0; 1178 PetscReal beta2,dbeta2; 1179 for (l=0; l<4; l++) { 1180 u += phi[l]*n[l].u; 1181 v += phi[l]*n[l].v; 1182 rbeta2 += phi[l]*pn[l].beta2; 1183 } 1184 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 1185 for (l=0; l<4; l++) { 1186 const PetscReal pp = phi[l]; 1187 for (ll=0; ll<4; ll++) { 1188 const PetscReal ppl = phi[ll]; 1189 Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl; 1190 Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl; 1191 Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl; 1192 Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl; 1193 } 1194 } 1195 } 1196 } 1197 } 1198 { 1199 const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}}; 1200 if (amode == THIASSEMBLY_TRIDIAGONAL) { 1201 for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */ 1202 const PetscInt l4 = l+4; 1203 const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}}; 1204 #if defined COMPUTE_LOWER_TRIANGULAR 1205 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1206 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1207 {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1208 {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1209 #else 1210 /* Same as above except for the lower-left block */ 1211 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1212 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1213 {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1214 {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1215 #endif 1216 ierr = MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES);CHKERRQ(ierr); 1217 } 1218 } else { 1219 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */ 1220 for (l=0; l<8; l++) { 1221 for (ll=l+1; ll<8; ll++) { 1222 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0]; 1223 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1]; 1224 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0]; 1225 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1]; 1226 } 1227 } 1228 #endif 1229 ierr = MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES);CHKERRQ(ierr); 1230 } 1231 } 1232 } 1233 } 1234 } 1235 ierr = THIDARestorePrm(info->da,&prm);CHKERRQ(ierr); 1236 1237 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1238 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1239 ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 1240 if (thi->verbose) {ierr = THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);} 1241 PetscFunctionReturn(0); 1242 } 1243 1244 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1245 { 1246 PetscErrorCode ierr; 1247 1248 PetscFunctionBeginUser; 1249 ierr = THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1254 { 1255 PetscErrorCode ierr; 1256 1257 PetscFunctionBeginUser; 1258 ierr = THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[]) 1263 { 1264 PetscErrorCode ierr; 1265 THI thi; 1266 PetscInt dim,M,N,m,n,s,dof; 1267 DM dac,daf; 1268 DMDAStencilType st; 1269 DM_DA *ddf,*ddc; 1270 1271 PetscFunctionBeginUser; 1272 ierr = PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi);CHKERRQ(ierr); 1273 if (!thi) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance"); 1274 if (nlevels > 1) { 1275 ierr = DMRefineHierarchy(dac0,nlevels-1,hierarchy);CHKERRQ(ierr); 1276 dac = hierarchy[nlevels-2]; 1277 } else { 1278 dac = dac0; 1279 } 1280 ierr = DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st);CHKERRQ(ierr); 1281 if (dim != 2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs"); 1282 1283 /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */ 1284 ierr = DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf);CHKERRQ(ierr); 1285 ierr = DMSetUp(daf);CHKERRQ(ierr); 1286 1287 daf->ops->creatematrix = dac->ops->creatematrix; 1288 daf->ops->createinterpolation = dac->ops->createinterpolation; 1289 daf->ops->getcoloring = dac->ops->getcoloring; 1290 ddf = (DM_DA*)daf->data; 1291 ddc = (DM_DA*)dac->data; 1292 ddf->interptype = ddc->interptype; 1293 1294 ierr = DMDASetFieldName(daf,0,"x-velocity");CHKERRQ(ierr); 1295 ierr = DMDASetFieldName(daf,1,"y-velocity");CHKERRQ(ierr); 1296 1297 hierarchy[nlevels-1] = daf; 1298 PetscFunctionReturn(0); 1299 } 1300 1301 static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale) 1302 { 1303 PetscErrorCode ierr; 1304 PetscInt dim; 1305 1306 PetscFunctionBeginUser; 1307 PetscValidHeaderSpecific(dac,DM_CLASSID,1); 1308 PetscValidHeaderSpecific(daf,DM_CLASSID,2); 1309 PetscValidPointer(A,3); 1310 if (scale) PetscValidPointer(scale,4); 1311 ierr = DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr); 1312 if (dim == 2) { 1313 /* We are in the 2D problem and use normal DMDA interpolation */ 1314 ierr = DMCreateInterpolation(dac,daf,A,scale);CHKERRQ(ierr); 1315 } else { 1316 PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart; 1317 Mat B; 1318 1319 ierr = DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr); 1320 ierr = DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm);CHKERRQ(ierr); 1321 if (zs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"unexpected"); 1322 ierr = MatCreate(PetscObjectComm((PetscObject)daf),&B);CHKERRQ(ierr); 1323 ierr = MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my);CHKERRQ(ierr); 1324 1325 ierr = MatSetType(B,MATAIJ);CHKERRQ(ierr); 1326 ierr = MatSeqAIJSetPreallocation(B,1,NULL);CHKERRQ(ierr); 1327 ierr = MatMPIAIJSetPreallocation(B,1,NULL,0,NULL);CHKERRQ(ierr); 1328 ierr = MatGetOwnershipRange(B,&rstart,NULL);CHKERRQ(ierr); 1329 ierr = MatGetOwnershipRangeColumn(B,&cstart,NULL);CHKERRQ(ierr); 1330 for (i=xs; i<xs+xm; i++) { 1331 for (j=ys; j<ys+ym; j++) { 1332 for (k=zs; k<zs+zm; k++) { 1333 PetscInt i2 = i*ym+j,i3 = i2*zm+k; 1334 PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */ 1335 ierr = MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES);CHKERRQ(ierr); 1336 } 1337 } 1338 } 1339 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1340 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1341 ierr = MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A);CHKERRQ(ierr); 1342 ierr = MatDestroy(&B);CHKERRQ(ierr); 1343 } 1344 PetscFunctionReturn(0); 1345 } 1346 1347 static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J) 1348 { 1349 PetscErrorCode ierr; 1350 Mat A; 1351 PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3]; 1352 ISLocalToGlobalMapping ltog; 1353 1354 PetscFunctionBeginUser; 1355 ierr = DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr); 1356 if (dim != 3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D"); 1357 ierr = DMDAGetCorners(da,0,0,0,&zm,&ym,&xm);CHKERRQ(ierr); 1358 ierr = DMGetLocalToGlobalMapping(da,<og);CHKERRQ(ierr); 1359 ierr = MatCreate(PetscObjectComm((PetscObject)da),&A);CHKERRQ(ierr); 1360 ierr = MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 1361 ierr = MatSetType(A,da->mattype);CHKERRQ(ierr); 1362 ierr = MatSetFromOptions(A);CHKERRQ(ierr); 1363 ierr = MatSeqAIJSetPreallocation(A,3*2,NULL);CHKERRQ(ierr); 1364 ierr = MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL);CHKERRQ(ierr); 1365 ierr = MatSeqBAIJSetPreallocation(A,2,3,NULL);CHKERRQ(ierr); 1366 ierr = MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL);CHKERRQ(ierr); 1367 ierr = MatSeqSBAIJSetPreallocation(A,2,2,NULL);CHKERRQ(ierr); 1368 ierr = MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL);CHKERRQ(ierr); 1369 ierr = MatSetLocalToGlobalMapping(A,ltog,ltog);CHKERRQ(ierr); 1370 ierr = DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);CHKERRQ(ierr); 1371 ierr = MatSetStencil(A,dim,dims,starts,dof);CHKERRQ(ierr); 1372 *J = A; 1373 PetscFunctionReturn(0); 1374 } 1375 1376 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[]) 1377 { 1378 const PetscInt dof = 2; 1379 Units units = thi->units; 1380 MPI_Comm comm; 1381 PetscErrorCode ierr; 1382 PetscViewer viewer; 1383 PetscMPIInt rank,size,tag,nn,nmax; 1384 PetscInt mx,my,mz,r,range[6]; 1385 const PetscScalar *x; 1386 1387 PetscFunctionBeginUser; 1388 ierr = PetscObjectGetComm((PetscObject)thi,&comm);CHKERRQ(ierr); 1389 ierr = DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0);CHKERRQ(ierr); 1390 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1391 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1392 ierr = PetscViewerASCIIOpen(comm,filename,&viewer);CHKERRQ(ierr); 1393 ierr = PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");CHKERRQ(ierr); 1394 ierr = PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %D %d %D %d %D\">\n",0,mz-1,0,my-1,0,mx-1);CHKERRQ(ierr); 1395 1396 ierr = DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5);CHKERRQ(ierr); 1397 ierr = PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn);CHKERRQ(ierr); 1398 ierr = MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);CHKERRQ(ierr); 1399 tag = ((PetscObject) viewer)->tag; 1400 ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr); 1401 if (!rank) { 1402 PetscScalar *array; 1403 ierr = PetscMalloc1(nmax,&array);CHKERRQ(ierr); 1404 for (r=0; r<size; r++) { 1405 PetscInt i,j,k,xs,xm,ys,ym,zs,zm; 1406 const PetscScalar *ptr; 1407 MPI_Status status; 1408 if (r) { 1409 ierr = MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 1410 } 1411 zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5]; 1412 if (xm*ym*zm*dof > nmax) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1413 if (r) { 1414 ierr = MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);CHKERRQ(ierr); 1415 ierr = MPI_Get_count(&status,MPIU_SCALAR,&nn);CHKERRQ(ierr); 1416 if (nn != xm*ym*zm*dof) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1417 ptr = array; 1418 } else ptr = x; 1419 ierr = PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);CHKERRQ(ierr); 1420 1421 ierr = PetscViewerASCIIPrintf(viewer," <Points>\n");CHKERRQ(ierr); 1422 ierr = PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr); 1423 for (i=xs; i<xs+xm; i++) { 1424 for (j=ys; j<ys+ym; j++) { 1425 for (k=zs; k<zs+zm; k++) { 1426 PrmNode p; 1427 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz; 1428 thi->initialize(thi,xx,yy,&p); 1429 zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1); 1430 ierr = PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz);CHKERRQ(ierr); 1431 } 1432 } 1433 } 1434 ierr = PetscViewerASCIIPrintf(viewer," </DataArray>\n");CHKERRQ(ierr); 1435 ierr = PetscViewerASCIIPrintf(viewer," </Points>\n");CHKERRQ(ierr); 1436 1437 ierr = PetscViewerASCIIPrintf(viewer," <PointData>\n");CHKERRQ(ierr); 1438 ierr = PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");CHKERRQ(ierr); 1439 for (i=0; i<nn; i+=dof) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0);CHKERRQ(ierr); 1441 } 1442 ierr = PetscViewerASCIIPrintf(viewer," </DataArray>\n");CHKERRQ(ierr); 1443 1444 ierr = PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");CHKERRQ(ierr); 1445 for (i=0; i<nn; i+=dof) { 1446 ierr = PetscViewerASCIIPrintf(viewer,"%D\n",r);CHKERRQ(ierr); 1447 } 1448 ierr = PetscViewerASCIIPrintf(viewer," </DataArray>\n");CHKERRQ(ierr); 1449 ierr = PetscViewerASCIIPrintf(viewer," </PointData>\n");CHKERRQ(ierr); 1450 1451 ierr = PetscViewerASCIIPrintf(viewer," </Piece>\n");CHKERRQ(ierr); 1452 } 1453 ierr = PetscFree(array);CHKERRQ(ierr); 1454 } else { 1455 ierr = MPI_Send(range,6,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 1456 ierr = MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm);CHKERRQ(ierr); 1457 } 1458 ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n");CHKERRQ(ierr); 1460 ierr = PetscViewerASCIIPrintf(viewer,"</VTKFile>\n");CHKERRQ(ierr); 1461 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 1462 PetscFunctionReturn(0); 1463 } 1464 1465 int main(int argc,char *argv[]) 1466 { 1467 MPI_Comm comm; 1468 THI thi; 1469 PetscErrorCode ierr; 1470 DM da; 1471 SNES snes; 1472 1473 ierr = PetscInitialize(&argc,&argv,0,help);if (ierr) return ierr; 1474 comm = PETSC_COMM_WORLD; 1475 1476 ierr = THICreate(comm,&thi);CHKERRQ(ierr); 1477 { 1478 PetscInt M = 3,N = 3,P = 2; 1479 ierr = PetscOptionsBegin(comm,NULL,"Grid resolution options","");CHKERRQ(ierr); 1480 { 1481 ierr = PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);CHKERRQ(ierr); 1482 N = M; 1483 ierr = PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);CHKERRQ(ierr); 1484 if (thi->coarse2d) { 1485 ierr = PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL);CHKERRQ(ierr); 1486 } else { 1487 ierr = PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);CHKERRQ(ierr); 1488 } 1489 } 1490 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1491 if (thi->coarse2d) { 1492 ierr = DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da);CHKERRQ(ierr); 1493 ierr = DMSetFromOptions(da);CHKERRQ(ierr); 1494 ierr = DMSetUp(da);CHKERRQ(ierr); 1495 da->ops->refinehierarchy = DMRefineHierarchy_THI; 1496 da->ops->createinterpolation = DMCreateInterpolation_DA_THI; 1497 1498 ierr = PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi);CHKERRQ(ierr); 1499 } else { 1500 ierr = DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);CHKERRQ(ierr); 1501 ierr = DMSetFromOptions(da);CHKERRQ(ierr); 1502 ierr = DMSetUp(da);CHKERRQ(ierr); 1503 } 1504 ierr = DMDASetFieldName(da,0,"x-velocity");CHKERRQ(ierr); 1505 ierr = DMDASetFieldName(da,1,"y-velocity");CHKERRQ(ierr); 1506 } 1507 ierr = THISetUpDM(thi,da);CHKERRQ(ierr); 1508 if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal; 1509 1510 { /* Set the fine level matrix type if -da_refine */ 1511 PetscInt rlevel,clevel; 1512 ierr = DMGetRefineLevel(da,&rlevel);CHKERRQ(ierr); 1513 ierr = DMGetCoarsenLevel(da,&clevel);CHKERRQ(ierr); 1514 if (rlevel - clevel > 0) {ierr = DMSetMatType(da,thi->mattype);CHKERRQ(ierr);} 1515 } 1516 1517 ierr = DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi);CHKERRQ(ierr); 1518 if (thi->tridiagonal) { 1519 ierr = DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi);CHKERRQ(ierr); 1520 } else { 1521 ierr = DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi);CHKERRQ(ierr); 1522 } 1523 ierr = DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi);CHKERRQ(ierr); 1524 ierr = DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi);CHKERRQ(ierr); 1525 1526 ierr = DMSetApplicationContext(da,thi);CHKERRQ(ierr); 1527 1528 ierr = SNESCreate(comm,&snes);CHKERRQ(ierr); 1529 ierr = SNESSetDM(snes,da);CHKERRQ(ierr); 1530 ierr = DMDestroy(&da);CHKERRQ(ierr); 1531 ierr = SNESSetComputeInitialGuess(snes,THIInitial,NULL);CHKERRQ(ierr); 1532 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 1533 1534 ierr = SNESSolve(snes,NULL,NULL);CHKERRQ(ierr); 1535 1536 ierr = THISolveStatistics(thi,snes,0,"Full");CHKERRQ(ierr); 1537 1538 { 1539 PetscBool flg; 1540 char filename[PETSC_MAX_PATH_LEN] = ""; 1541 ierr = PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg);CHKERRQ(ierr); 1542 if (flg) { 1543 Vec X; 1544 DM dm; 1545 ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr); 1546 ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr); 1547 ierr = THIDAVecView_VTK_XML(thi,dm,X,filename);CHKERRQ(ierr); 1548 } 1549 } 1550 1551 ierr = DMDestroy(&da);CHKERRQ(ierr); 1552 ierr = SNESDestroy(&snes);CHKERRQ(ierr); 1553 ierr = THIDestroy(&thi);CHKERRQ(ierr); 1554 ierr = PetscFinalize(); 1555 return ierr; 1556 } 1557 1558 1559 /*TEST 1560 1561 build: 1562 requires: c99 !single 1563 1564 test: 1565 args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc 1566 1567 test: 1568 suffix: 2 1569 nsize: 2 1570 args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1 1571 1572 test: 1573 suffix: 3 1574 nsize: 3 1575 args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current 1576 1577 test: 1578 suffix: 4 1579 nsize: 6 1580 args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1 1581 1582 test: 1583 suffix: 5 1584 nsize: 6 1585 args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}} 1586 1587 TEST*/ 1588