1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\ 2 \n\ 3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\ 4 using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\ 5 to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\ 6 boundary conditions in the x- and y-directions.\n\ 7 \n\ 8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\ 9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\ 10 \n\ 11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\ 12 \n\n"; 13 14 /* 15 The equations for horizontal velocity (u,v) are 16 17 - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0 18 - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0 19 20 where 21 22 eta = B/2 (epsilon + gamma)^((p-2)/2) 23 24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B, 25 written in terms of the second invariant 26 27 gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2 28 29 The surface boundary conditions are the natural conditions. The basal boundary conditions 30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2. 31 32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1). 33 34 The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the 35 map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through 36 the Jacobian of the coordinate transformation from a reference element to the physical element. 37 38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed 39 specially so that columns are never distributed, and are always contiguous in memory. 40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation, 41 and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to 42 use compatible domain decomposition relative to the 3D DMDAs. 43 44 There are two compile-time options: 45 46 NO_SSE2: 47 If the host supports SSE2, we use integration code that has been vectorized with SSE2 48 intrinsics, unless this macro is defined. The intrinsics speed up integration by about 49 30% on my architecture (P8700, gcc-4.5 snapshot). 50 51 COMPUTE_LOWER_TRIANGULAR: 52 The element matrices we assemble are lower-triangular so it is not necessary to compute 53 all entries explicitly. If this macro is defined, the lower-triangular entries are 54 computed explicitly. 55 56 */ 57 58 #if defined(PETSC_APPLE_FRAMEWORK) 59 #import <PETSc/petscsnes.h> 60 #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 61 #else 62 63 #include <petscsnes.h> 64 #include <petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 65 #endif 66 #include <ctype.h> /* toupper() */ 67 68 #if defined(__cplusplus) || defined (PETSC_HAVE_WINDOWS_COMPILERS) || defined (__PGI) 69 /* c++ cannot handle [_restrict_] notation like C does */ 70 #undef PETSC_RESTRICT 71 #define PETSC_RESTRICT 72 #endif 73 74 #if defined __SSE2__ 75 # include <emmintrin.h> 76 #endif 77 78 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */ 79 #if !defined NO_SSE2 \ 80 && !defined PETSC_USE_COMPLEX \ 81 && !defined PETSC_USE_REAL_SINGLE \ 82 && !defined PETSC_USE_REAL___FLOAT128 \ 83 && !defined PETSC_USE_REAL___FP16 \ 84 && defined __SSE2__ 85 #define USE_SSE2_KERNELS 1 86 #else 87 #define USE_SSE2_KERNELS 0 88 #endif 89 90 static PetscClassId THI_CLASSID; 91 92 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType; 93 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0}; 94 PETSC_UNUSED static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1}; 95 PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573}; 96 #define G 0.57735026918962573 97 #define H (0.5*(1.+G)) 98 #define L (0.5*(1.-G)) 99 #define M (-0.5) 100 #define P (0.5) 101 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */ 102 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0}, 103 {0,H,0,0,0,L,0,0}, 104 {0,0,H,0,0,0,L,0}, 105 {0,0,0,H,0,0,0,L}, 106 {L,0,0,0,H,0,0,0}, 107 {0,L,0,0,0,H,0,0}, 108 {0,0,L,0,0,0,H,0}, 109 {0,0,0,L,0,0,0,H}}; 110 static const PetscReal HexQDeriv_Lobatto[8][8][3] = { 111 {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} }, 112 {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} }, 113 {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} }, 114 {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}}, 115 {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} }, 116 {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} }, 117 {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} }, 118 {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}}; 119 /* Stanndard Gauss */ 120 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L}, 121 {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L}, 122 {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L}, 123 {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L}, 124 {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H}, 125 {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H}, 126 {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H}, 127 {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}}; 128 static const PetscReal HexQDeriv_Gauss[8][8][3] = { 129 {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}}, 130 {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}}, 131 {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}}, 132 {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}}, 133 {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}}, 134 {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}}, 135 {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}}, 136 {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}}; 137 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3]; 138 /* Standard 2x2 Gauss quadrature for the bottom layer. */ 139 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L}, 140 {L*H,H*H,H*L,L*L}, 141 {L*L,H*L,H*H,L*H}, 142 {H*L,L*L,L*H,H*H}}; 143 static const PetscReal QuadQDeriv[4][4][2] = { 144 {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}}, 145 {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}}, 146 {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}}, 147 {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}}; 148 #undef G 149 #undef H 150 #undef L 151 #undef M 152 #undef P 153 154 #define HexExtract(x,i,j,k,n) do { \ 155 (n)[0] = (x)[i][j][k]; \ 156 (n)[1] = (x)[i+1][j][k]; \ 157 (n)[2] = (x)[i+1][j+1][k]; \ 158 (n)[3] = (x)[i][j+1][k]; \ 159 (n)[4] = (x)[i][j][k+1]; \ 160 (n)[5] = (x)[i+1][j][k+1]; \ 161 (n)[6] = (x)[i+1][j+1][k+1]; \ 162 (n)[7] = (x)[i][j+1][k+1]; \ 163 } while (0) 164 165 #define HexExtractRef(x,i,j,k,n) do { \ 166 (n)[0] = &(x)[i][j][k]; \ 167 (n)[1] = &(x)[i+1][j][k]; \ 168 (n)[2] = &(x)[i+1][j+1][k]; \ 169 (n)[3] = &(x)[i][j+1][k]; \ 170 (n)[4] = &(x)[i][j][k+1]; \ 171 (n)[5] = &(x)[i+1][j][k+1]; \ 172 (n)[6] = &(x)[i+1][j+1][k+1]; \ 173 (n)[7] = &(x)[i][j+1][k+1]; \ 174 } while (0) 175 176 #define QuadExtract(x,i,j,n) do { \ 177 (n)[0] = (x)[i][j]; \ 178 (n)[1] = (x)[i+1][j]; \ 179 (n)[2] = (x)[i+1][j+1]; \ 180 (n)[3] = (x)[i][j+1]; \ 181 } while (0) 182 183 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[]) 184 { 185 PetscInt i; 186 dz[0] = dz[1] = dz[2] = 0; 187 for (i=0; i<8; i++) { 188 dz[0] += dphi[i][0] * zn[i]; 189 dz[1] += dphi[i][1] * zn[i]; 190 dz[2] += dphi[i][2] * zn[i]; 191 } 192 } 193 194 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw) 195 { 196 const PetscReal jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}; 197 const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}; 198 const PetscReal jdet = jac[0][0]*jac[1][1]*jac[2][2]; 199 PetscInt i; 200 201 for (i=0; i<8; i++) { 202 const PetscReal *dphir = HexQDeriv[q][i]; 203 phi[i] = HexQInterp[q][i]; 204 dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0]; 205 dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1]; 206 dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2]; 207 } 208 *jw = 1.0 * jdet; 209 } 210 211 typedef struct _p_THI *THI; 212 typedef struct _n_Units *Units; 213 214 typedef struct { 215 PetscScalar u,v; 216 } Node; 217 218 typedef struct { 219 PetscScalar b; /* bed */ 220 PetscScalar h; /* thickness */ 221 PetscScalar beta2; /* friction */ 222 } PrmNode; 223 224 typedef struct { 225 PetscReal min,max,cmin,cmax; 226 } PRange; 227 228 typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode; 229 230 struct _p_THI { 231 PETSCHEADER(int); 232 void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p); 233 PetscInt zlevels; 234 PetscReal Lx,Ly,Lz; /* Model domain */ 235 PetscReal alpha; /* Bed angle */ 236 Units units; 237 PetscReal dirichlet_scale; 238 PetscReal ssa_friction_scale; 239 PRange eta; 240 PRange beta2; 241 struct { 242 PetscReal Bd2,eps,exponent; 243 } viscosity; 244 struct { 245 PetscReal irefgam,eps2,exponent,refvel,epsvel; 246 } friction; 247 PetscReal rhog; 248 PetscBool no_slip; 249 PetscBool tridiagonal; 250 PetscBool coarse2d; 251 PetscBool verbose; 252 MatType mattype; 253 }; 254 255 struct _n_Units { 256 /* fundamental */ 257 PetscReal meter; 258 PetscReal kilogram; 259 PetscReal second; 260 /* derived */ 261 PetscReal Pascal; 262 PetscReal year; 263 }; 264 265 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI); 266 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI); 267 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI); 268 269 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[]) 270 { 271 const PetscScalar zm1 = zm-1, 272 znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1, 273 pn[1].b + pn[1].h*(PetscScalar)k/zm1, 274 pn[2].b + pn[2].h*(PetscScalar)k/zm1, 275 pn[3].b + pn[3].h*(PetscScalar)k/zm1, 276 pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1, 277 pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1, 278 pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1, 279 pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1}; 280 PetscInt i; 281 for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]); 282 } 283 284 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */ 285 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p) 286 { 287 Units units = thi->units; 288 PetscReal s = -x*PetscSinReal(thi->alpha); 289 290 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly); 291 p->h = s - p->b; 292 p->beta2 = 1e30; 293 } 294 295 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p) 296 { 297 Units units = thi->units; 298 PetscReal s = -x*PetscSinReal(thi->alpha); 299 300 p->b = s - 1000*units->meter; 301 p->h = s - p->b; 302 /* tau_b = beta2 v is a stress (Pa) */ 303 p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter; 304 } 305 306 /* These are just toys */ 307 308 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */ 309 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 310 { 311 Units units = thi->units; 312 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 313 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 314 p->b = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 315 p->h = s - p->b; 316 p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter; 317 } 318 319 /* Like Z, but with 200 meter cliffs */ 320 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 321 { 322 Units units = thi->units; 323 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 324 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 325 326 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 327 if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter; 328 p->h = s - p->b; 329 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 330 } 331 332 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */ 333 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 334 { 335 Units units = thi->units; 336 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 337 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 338 339 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 340 p->h = s - p->b; 341 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 342 } 343 344 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2) 345 { 346 if (thi->friction.irefgam == 0) { 347 Units units = thi->units; 348 thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year)); 349 thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam; 350 } 351 if (thi->friction.exponent == 0) { 352 *beta2 = rbeta2; 353 *dbeta2 = 0; 354 } else { 355 *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent); 356 *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam; 357 } 358 } 359 360 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta) 361 { 362 PetscReal Bd2,eps,exponent; 363 if (thi->viscosity.Bd2 == 0) { 364 Units units = thi->units; 365 const PetscReal 366 n = 3., /* Glen exponent */ 367 p = 1. + 1./n, /* for Stokes */ 368 A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */ 369 B = PetscPowReal(A,-1./n); /* hardness parameter */ 370 thi->viscosity.Bd2 = B/2; 371 thi->viscosity.exponent = (p-2)/2; 372 thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year); 373 } 374 Bd2 = thi->viscosity.Bd2; 375 exponent = thi->viscosity.exponent; 376 eps = thi->viscosity.eps; 377 *eta = Bd2 * PetscPowReal(eps + gam,exponent); 378 *deta = exponent * (*eta) / (eps + gam); 379 } 380 381 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x) 382 { 383 if (x < *min) *min = x; 384 if (x > *max) *max = x; 385 } 386 387 static void PRangeClear(PRange *p) 388 { 389 p->cmin = p->min = 1e100; 390 p->cmax = p->max = -1e100; 391 } 392 393 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max) 394 { 395 PetscFunctionBeginUser; 396 p->cmin = min; 397 p->cmax = max; 398 if (min < p->min) p->min = min; 399 if (max > p->max) p->max = max; 400 PetscFunctionReturn(0); 401 } 402 403 static PetscErrorCode THIDestroy(THI *thi) 404 { 405 PetscFunctionBeginUser; 406 if (!*thi) PetscFunctionReturn(0); 407 if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; PetscFunctionReturn(0);} 408 PetscCall(PetscFree((*thi)->units)); 409 PetscCall(PetscFree((*thi)->mattype)); 410 PetscCall(PetscHeaderDestroy(thi)); 411 PetscFunctionReturn(0); 412 } 413 414 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi) 415 { 416 static PetscBool registered = PETSC_FALSE; 417 THI thi; 418 Units units; 419 420 PetscFunctionBeginUser; 421 *inthi = 0; 422 if (!registered) { 423 PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID)); 424 registered = PETSC_TRUE; 425 } 426 PetscCall(PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0)); 427 428 PetscCall(PetscNew(&thi->units)); 429 units = thi->units; 430 units->meter = 1e-2; 431 units->second = 1e-7; 432 units->kilogram = 1e-12; 433 434 PetscOptionsBegin(comm,NULL,"Scaled units options",""); 435 { 436 PetscCall(PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL)); 437 PetscCall(PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL)); 438 PetscCall(PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL)); 439 } 440 PetscOptionsEnd(); 441 units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second)); 442 units->year = 31556926. * units->second; /* seconds per year */ 443 444 thi->Lx = 10.e3; 445 thi->Ly = 10.e3; 446 thi->Lz = 1000; 447 thi->dirichlet_scale = 1; 448 thi->verbose = PETSC_FALSE; 449 450 PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options",""); 451 { 452 QuadratureType quad = QUAD_GAUSS; 453 char homexp[] = "A"; 454 char mtype[256] = MATSBAIJ; 455 PetscReal L,m = 1.0; 456 PetscBool flg; 457 L = thi->Lx; 458 PetscCall(PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg)); 459 if (flg) thi->Lx = thi->Ly = L; 460 PetscCall(PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL)); 461 PetscCall(PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL)); 462 PetscCall(PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL)); 463 PetscCall(PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL)); 464 switch (homexp[0] = toupper(homexp[0])) { 465 case 'A': 466 thi->initialize = THIInitialize_HOM_A; 467 thi->no_slip = PETSC_TRUE; 468 thi->alpha = 0.5; 469 break; 470 case 'C': 471 thi->initialize = THIInitialize_HOM_C; 472 thi->no_slip = PETSC_FALSE; 473 thi->alpha = 0.1; 474 break; 475 case 'X': 476 thi->initialize = THIInitialize_HOM_X; 477 thi->no_slip = PETSC_FALSE; 478 thi->alpha = 0.3; 479 break; 480 case 'Y': 481 thi->initialize = THIInitialize_HOM_Y; 482 thi->no_slip = PETSC_FALSE; 483 thi->alpha = 0.5; 484 break; 485 case 'Z': 486 thi->initialize = THIInitialize_HOM_Z; 487 thi->no_slip = PETSC_FALSE; 488 thi->alpha = 0.5; 489 break; 490 default: 491 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]); 492 } 493 PetscCall(PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL)); 494 switch (quad) { 495 case QUAD_GAUSS: 496 HexQInterp = HexQInterp_Gauss; 497 HexQDeriv = HexQDeriv_Gauss; 498 break; 499 case QUAD_LOBATTO: 500 HexQInterp = HexQInterp_Lobatto; 501 HexQDeriv = HexQDeriv_Lobatto; 502 break; 503 } 504 PetscCall(PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL)); 505 506 thi->friction.refvel = 100.; 507 thi->friction.epsvel = 1.; 508 509 PetscCall(PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL)); 510 PetscCall(PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL)); 511 PetscCall(PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL)); 512 513 thi->friction.exponent = (m-1)/2; 514 515 PetscCall(PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL)); 516 PetscCall(PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL)); 517 PetscCall(PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL)); 518 PetscCall(PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL)); 519 PetscCall(PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL)); 520 PetscCall(PetscStrallocpy(mtype,(char**)&thi->mattype)); 521 PetscCall(PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL)); 522 } 523 PetscOptionsEnd(); 524 525 /* dimensionalize */ 526 thi->Lx *= units->meter; 527 thi->Ly *= units->meter; 528 thi->Lz *= units->meter; 529 thi->alpha *= PETSC_PI / 180; 530 531 PRangeClear(&thi->eta); 532 PRangeClear(&thi->beta2); 533 534 { 535 PetscReal u = 1000*units->meter/(3e7*units->second), 536 gradu = u / (100*units->meter),eta,deta, 537 rho = 910 * units->kilogram/PetscPowReal(units->meter,3), 538 grav = 9.81 * units->meter/PetscSqr(units->second), 539 driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter; 540 THIViscosity(thi,0.5*gradu*gradu,&eta,&deta); 541 thi->rhog = rho * grav; 542 if (thi->verbose) { 543 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal)); 544 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving)); 545 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving))); 546 THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta); 547 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving))); 548 } 549 } 550 551 *inthi = thi; 552 PetscFunctionReturn(0); 553 } 554 555 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm) 556 { 557 PrmNode **p; 558 PetscInt i,j,xs,xm,ys,ym,mx,my; 559 560 PetscFunctionBeginUser; 561 PetscCall(DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0)); 562 PetscCall(DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0)); 563 PetscCall(DMDAVecGetArray(da2prm,prm,&p)); 564 for (i=xs; i<xs+xm; i++) { 565 for (j=ys; j<ys+ym; j++) { 566 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my; 567 thi->initialize(thi,xx,yy,&p[i][j]); 568 } 569 } 570 PetscCall(DMDAVecRestoreArray(da2prm,prm,&p)); 571 PetscFunctionReturn(0); 572 } 573 574 static PetscErrorCode THISetUpDM(THI thi,DM dm) 575 { 576 PetscInt refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s; 577 DMDAStencilType st; 578 DM da2prm; 579 Vec X; 580 581 PetscFunctionBeginUser; 582 PetscCall(DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st)); 583 if (dim == 2) { 584 PetscCall(DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st)); 585 } 586 PetscCall(DMGetRefineLevel(dm,&refinelevel)); 587 PetscCall(DMGetCoarsenLevel(dm,&coarsenlevel)); 588 level = refinelevel - coarsenlevel; 589 PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm)); 590 PetscCall(DMSetUp(da2prm)); 591 PetscCall(DMCreateLocalVector(da2prm,&X)); 592 { 593 PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter; 594 if (dim == 2) { 595 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g, num elements %D x %D (%D), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My))); 596 } else { 597 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %D x %D x %D (%D), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)))); 598 } 599 } 600 PetscCall(THIInitializePrm(thi,da2prm,X)); 601 if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */ 602 PetscCall(DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi)); 603 } 604 if (thi->coarse2d) { 605 PetscCall(DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi)); 606 } 607 PetscCall(PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm)); 608 PetscCall(PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X)); 609 PetscCall(DMDestroy(&da2prm)); 610 PetscCall(VecDestroy(&X)); 611 PetscFunctionReturn(0); 612 } 613 614 static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx) 615 { 616 THI thi = (THI)ctx; 617 PetscInt rlevel,clevel; 618 619 PetscFunctionBeginUser; 620 PetscCall(THISetUpDM(thi,dmc)); 621 PetscCall(DMGetRefineLevel(dmc,&rlevel)); 622 PetscCall(DMGetCoarsenLevel(dmc,&clevel)); 623 if (rlevel-clevel == 0) PetscCall(DMSetMatType(dmc,MATAIJ)); 624 PetscCall(DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi)); 625 PetscFunctionReturn(0); 626 } 627 628 static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx) 629 { 630 THI thi = (THI)ctx; 631 632 PetscFunctionBeginUser; 633 PetscCall(THISetUpDM(thi,dmf)); 634 PetscCall(DMSetMatType(dmf,thi->mattype)); 635 PetscCall(DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi)); 636 /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */ 637 PetscCall(DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi)); 638 PetscFunctionReturn(0); 639 } 640 641 static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm) 642 { 643 DM da2prm; 644 Vec X; 645 646 PetscFunctionBeginUser; 647 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm)); 648 PetscCheck(da2prm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 649 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X)); 650 PetscCheck(X,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 651 PetscCall(DMDAVecGetArray(da2prm,X,prm)); 652 PetscFunctionReturn(0); 653 } 654 655 static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm) 656 { 657 DM da2prm; 658 Vec X; 659 660 PetscFunctionBeginUser; 661 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm)); 662 PetscCheck(da2prm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 663 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X)); 664 PetscCheck(X,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 665 PetscCall(DMDAVecRestoreArray(da2prm,X,prm)); 666 PetscFunctionReturn(0); 667 } 668 669 static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx) 670 { 671 THI thi; 672 PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my; 673 PetscReal hx,hy; 674 PrmNode **prm; 675 Node ***x; 676 DM da; 677 678 PetscFunctionBeginUser; 679 PetscCall(SNESGetDM(snes,&da)); 680 PetscCall(DMGetApplicationContext(da,&thi)); 681 PetscCall(DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 682 PetscCall(DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm)); 683 PetscCall(DMDAVecGetArray(da,X,&x)); 684 PetscCall(THIDAGetPrm(da,&prm)); 685 hx = thi->Lx / mx; 686 hy = thi->Ly / my; 687 for (i=xs; i<xs+xm; i++) { 688 for (j=ys; j<ys+ym; j++) { 689 for (k=zs; k<zs+zm; k++) { 690 const PetscScalar zm1 = zm-1, 691 drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx), 692 drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy); 693 x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1; 694 x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1; 695 } 696 } 697 } 698 PetscCall(DMDAVecRestoreArray(da,X,&x)); 699 PetscCall(THIDARestorePrm(da,&prm)); 700 PetscFunctionReturn(0); 701 } 702 703 static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta) 704 { 705 PetscInt l,ll; 706 PetscScalar gam; 707 708 du[0] = du[1] = du[2] = 0; 709 dv[0] = dv[1] = dv[2] = 0; 710 *u = 0; 711 *v = 0; 712 for (l=0; l<8; l++) { 713 *u += phi[l] * n[l].u; 714 *v += phi[l] * n[l].v; 715 for (ll=0; ll<3; ll++) { 716 du[ll] += dphi[l][ll] * n[l].u; 717 dv[ll] += dphi[l][ll] * n[l].v; 718 } 719 } 720 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]); 721 THIViscosity(thi,PetscRealPart(gam),eta,deta); 722 } 723 724 static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta) 725 { 726 PetscInt l,ll; 727 PetscScalar gam; 728 729 du[0] = du[1] = 0; 730 dv[0] = dv[1] = 0; 731 *u = 0; 732 *v = 0; 733 for (l=0; l<4; l++) { 734 *u += phi[l] * n[l].u; 735 *v += phi[l] * n[l].v; 736 for (ll=0; ll<2; ll++) { 737 du[ll] += dphi[l][ll] * n[l].u; 738 dv[ll] += dphi[l][ll] * n[l].v; 739 } 740 } 741 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]); 742 THIViscosity(thi,PetscRealPart(gam),eta,deta); 743 } 744 745 static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi) 746 { 747 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l; 748 PetscReal hx,hy,etamin,etamax,beta2min,beta2max; 749 PrmNode **prm; 750 751 PetscFunctionBeginUser; 752 xs = info->zs; 753 ys = info->ys; 754 xm = info->zm; 755 ym = info->ym; 756 zm = info->xm; 757 hx = thi->Lx / info->mz; 758 hy = thi->Ly / info->my; 759 760 etamin = 1e100; 761 etamax = 0; 762 beta2min = 1e100; 763 beta2max = 0; 764 765 PetscCall(THIDAGetPrm(info->da,&prm)); 766 767 for (i=xs; i<xs+xm; i++) { 768 for (j=ys; j<ys+ym; j++) { 769 PrmNode pn[4]; 770 QuadExtract(prm,i,j,pn); 771 for (k=0; k<zm-1; k++) { 772 PetscInt ls = 0; 773 Node n[8],*fn[8]; 774 PetscReal zn[8],etabase = 0; 775 PrmHexGetZ(pn,k,zm,zn); 776 HexExtract(x,i,j,k,n); 777 HexExtractRef(f,i,j,k,fn); 778 if (thi->no_slip && k == 0) { 779 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 780 /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */ 781 ls = 4; 782 } 783 for (q=0; q<8; q++) { 784 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 785 PetscScalar du[3],dv[3],u,v; 786 HexGrad(HexQDeriv[q],zn,dz); 787 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 788 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 789 jw /= thi->rhog; /* scales residuals to be O(1) */ 790 if (q == 0) etabase = eta; 791 RangeUpdate(&etamin,&etamax,eta); 792 for (l=ls; l<8; l++) { /* test functions */ 793 const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0}; 794 const PetscReal pp = phi[l],*dp = dphi[l]; 795 fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0]; 796 fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1]; 797 } 798 } 799 if (k == 0) { /* we are on a bottom face */ 800 if (thi->no_slip) { 801 /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary 802 * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature 803 * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the 804 * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in 805 * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after 806 * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the 807 * assembled matrix (see the similar block in THIJacobianLocal). 808 * 809 * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends 810 * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make 811 * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part, 812 * so the solution will exactly satisfy the boundary condition after the first linear iteration. 813 */ 814 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.); 815 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 816 fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u; 817 fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v; 818 } else { /* Integrate over bottom face to apply boundary condition */ 819 for (q=0; q<4; q++) { 820 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 821 PetscScalar u =0,v=0,rbeta2=0; 822 PetscReal beta2,dbeta2; 823 for (l=0; l<4; l++) { 824 u += phi[l]*n[l].u; 825 v += phi[l]*n[l].v; 826 rbeta2 += phi[l]*pn[l].beta2; 827 } 828 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 829 RangeUpdate(&beta2min,&beta2max,beta2); 830 for (l=0; l<4; l++) { 831 const PetscReal pp = phi[l]; 832 fn[ls+l]->u += pp*jw*beta2*u; 833 fn[ls+l]->v += pp*jw*beta2*v; 834 } 835 } 836 } 837 } 838 } 839 } 840 } 841 842 PetscCall(THIDARestorePrm(info->da,&prm)); 843 844 PetscCall(PRangeMinMax(&thi->eta,etamin,etamax)); 845 PetscCall(PRangeMinMax(&thi->beta2,beta2min,beta2max)); 846 PetscFunctionReturn(0); 847 } 848 849 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer) 850 { 851 PetscReal nrm; 852 PetscInt m; 853 PetscMPIInt rank; 854 855 PetscFunctionBeginUser; 856 PetscCall(MatNorm(B,NORM_FROBENIUS,&nrm)); 857 PetscCall(MatGetSize(B,&m,0)); 858 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank)); 859 if (rank == 0) { 860 PetscScalar val0,val2; 861 PetscCall(MatGetValue(B,0,0,&val0)); 862 PetscCall(MatGetValue(B,2,2,&val2)); 863 PetscCall(PetscViewerASCIIPrintf(viewer,"Matrix dim %D norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax)); 864 } 865 PetscFunctionReturn(0); 866 } 867 868 static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean) 869 { 870 Node ***x; 871 PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz; 872 PetscReal umin = 1e100,umax=-1e100; 873 PetscScalar usum = 0.0,gusum; 874 875 PetscFunctionBeginUser; 876 *min = *max = *mean = 0; 877 PetscCall(DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 878 PetscCall(DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm)); 879 PetscCheck(zs == 0 && zm == mz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected decomposition"); 880 PetscCall(DMDAVecGetArray(da,X,&x)); 881 for (i=xs; i<xs+xm; i++) { 882 for (j=ys; j<ys+ym; j++) { 883 PetscReal u = PetscRealPart(x[i][j][zm-1].u); 884 RangeUpdate(&umin,&umax,u); 885 usum += u; 886 } 887 } 888 PetscCall(DMDAVecRestoreArray(da,X,&x)); 889 PetscCallMPI(MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da))); 890 PetscCallMPI(MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da))); 891 PetscCallMPI(MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da))); 892 *mean = PetscRealPart(gusum) / (mx*my); 893 PetscFunctionReturn(0); 894 } 895 896 static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[]) 897 { 898 MPI_Comm comm; 899 Vec X; 900 DM dm; 901 902 PetscFunctionBeginUser; 903 PetscCall(PetscObjectGetComm((PetscObject)thi,&comm)); 904 PetscCall(SNESGetSolution(snes,&X)); 905 PetscCall(SNESGetDM(snes,&dm)); 906 PetscCall(PetscPrintf(comm,"Solution statistics after solve: %s\n",name)); 907 { 908 PetscInt its,lits; 909 SNESConvergedReason reason; 910 PetscCall(SNESGetIterationNumber(snes,&its)); 911 PetscCall(SNESGetConvergedReason(snes,&reason)); 912 PetscCall(SNESGetLinearSolveIterations(snes,&lits)); 913 PetscCall(PetscPrintf(comm,"%s: Number of SNES iterations = %D, total linear iterations = %D\n",SNESConvergedReasons[reason],its,lits)); 914 } 915 { 916 PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3]; 917 PetscInt i,j,m; 918 const PetscScalar *x; 919 PetscCall(VecNorm(X,NORM_2,&nrm2)); 920 PetscCall(VecGetLocalSize(X,&m)); 921 PetscCall(VecGetArrayRead(X,&x)); 922 for (i=0; i<m; i+=2) { 923 PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v); 924 tmin[0] = PetscMin(u,tmin[0]); 925 tmin[1] = PetscMin(v,tmin[1]); 926 tmin[2] = PetscMin(c,tmin[2]); 927 tmax[0] = PetscMax(u,tmax[0]); 928 tmax[1] = PetscMax(v,tmax[1]); 929 tmax[2] = PetscMax(c,tmax[2]); 930 } 931 PetscCall(VecRestoreArrayRead(X,&x)); 932 PetscCallMPI(MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi))); 933 PetscCallMPI(MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi))); 934 /* Dimensionalize to meters/year */ 935 nrm2 *= thi->units->year / thi->units->meter; 936 for (j=0; j<3; j++) { 937 min[j] *= thi->units->year / thi->units->meter; 938 max[j] *= thi->units->year / thi->units->meter; 939 } 940 if (min[0] == 0.0) min[0] = 0.0; 941 PetscCall(PetscPrintf(comm,"|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2])); 942 { 943 PetscReal umin,umax,umean; 944 PetscCall(THISurfaceStatistics(dm,X,&umin,&umax,&umean)); 945 umin *= thi->units->year / thi->units->meter; 946 umax *= thi->units->year / thi->units->meter; 947 umean *= thi->units->year / thi->units->meter; 948 PetscCall(PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean)); 949 } 950 /* These values stay nondimensional */ 951 PetscCall(PetscPrintf(comm,"Global eta range %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax)); 952 PetscCall(PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax)); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi) 958 { 959 PetscInt xs,ys,xm,ym,i,j,q,l,ll; 960 PetscReal hx,hy; 961 PrmNode **prm; 962 963 PetscFunctionBeginUser; 964 xs = info->ys; 965 ys = info->xs; 966 xm = info->ym; 967 ym = info->xm; 968 hx = thi->Lx / info->my; 969 hy = thi->Ly / info->mx; 970 971 PetscCall(MatZeroEntries(B)); 972 PetscCall(THIDAGetPrm(info->da,&prm)); 973 974 for (i=xs; i<xs+xm; i++) { 975 for (j=ys; j<ys+ym; j++) { 976 Node n[4]; 977 PrmNode pn[4]; 978 PetscScalar Ke[4*2][4*2]; 979 QuadExtract(prm,i,j,pn); 980 QuadExtract(x,i,j,n); 981 PetscCall(PetscMemzero(Ke,sizeof(Ke))); 982 for (q=0; q<4; q++) { 983 PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2; 984 PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0; 985 for (l=0; l<4; l++) { 986 phi[l] = QuadQInterp[q][l]; 987 dphi[l][0] = QuadQDeriv[q][l][0]*2./hx; 988 dphi[l][1] = QuadQDeriv[q][l][1]*2./hy; 989 h += phi[l] * pn[l].h; 990 rbeta2 += phi[l] * pn[l].beta2; 991 } 992 jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */ 993 PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 994 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 995 for (l=0; l<4; l++) { 996 const PetscReal pp = phi[l],*dp = dphi[l]; 997 for (ll=0; ll<4; ll++) { 998 const PetscReal ppl = phi[ll],*dpl = dphi[ll]; 999 PetscScalar dgdu,dgdv; 1000 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1]; 1001 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0]; 1002 /* Picard part */ 1003 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1004 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1005 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1006 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1007 /* extra Newton terms */ 1008 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale; 1009 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale; 1010 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale; 1011 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale; 1012 } 1013 } 1014 } 1015 { 1016 const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}}; 1017 PetscCall(MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES)); 1018 } 1019 } 1020 } 1021 PetscCall(THIDARestorePrm(info->da,&prm)); 1022 1023 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1024 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1025 PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 1026 if (thi->verbose) PetscCall(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD)); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode) 1031 { 1032 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll; 1033 PetscReal hx,hy; 1034 PrmNode **prm; 1035 1036 PetscFunctionBeginUser; 1037 xs = info->zs; 1038 ys = info->ys; 1039 xm = info->zm; 1040 ym = info->ym; 1041 zm = info->xm; 1042 hx = thi->Lx / info->mz; 1043 hy = thi->Ly / info->my; 1044 1045 PetscCall(MatZeroEntries(B)); 1046 PetscCall(MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE)); 1047 PetscCall(THIDAGetPrm(info->da,&prm)); 1048 1049 for (i=xs; i<xs+xm; i++) { 1050 for (j=ys; j<ys+ym; j++) { 1051 PrmNode pn[4]; 1052 QuadExtract(prm,i,j,pn); 1053 for (k=0; k<zm-1; k++) { 1054 Node n[8]; 1055 PetscReal zn[8],etabase = 0; 1056 PetscScalar Ke[8*2][8*2]; 1057 PetscInt ls = 0; 1058 1059 PrmHexGetZ(pn,k,zm,zn); 1060 HexExtract(x,i,j,k,n); 1061 PetscCall(PetscMemzero(Ke,sizeof(Ke))); 1062 if (thi->no_slip && k == 0) { 1063 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 1064 ls = 4; 1065 } 1066 for (q=0; q<8; q++) { 1067 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 1068 PetscScalar du[3],dv[3],u,v; 1069 HexGrad(HexQDeriv[q],zn,dz); 1070 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 1071 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 1072 jw /= thi->rhog; /* residuals are scaled by this factor */ 1073 if (q == 0) etabase = eta; 1074 for (l=ls; l<8; l++) { /* test functions */ 1075 const PetscReal *PETSC_RESTRICT dp = dphi[l]; 1076 #if USE_SSE2_KERNELS 1077 /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */ 1078 __m128d 1079 p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5), 1080 p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)), 1081 du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]), 1082 dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]), 1083 jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta), 1084 dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]), 1085 dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta), 1086 p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */ 1087 p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */ 1088 pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */ 1089 du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */ 1090 t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */ 1091 t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */ 1092 1093 #endif 1094 #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */ 1095 for (ll=ls; ll<8; ll++) { /* trial functions */ 1096 #else 1097 for (ll=l; ll<8; ll++) { 1098 #endif 1099 const PetscReal *PETSC_RESTRICT dpl = dphi[ll]; 1100 if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */ 1101 #if !USE_SSE2_KERNELS 1102 /* The analytic Jacobian in nice, easy-to-read form */ 1103 { 1104 PetscScalar dgdu,dgdv; 1105 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2]; 1106 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2]; 1107 /* Picard part */ 1108 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2]; 1109 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1110 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1111 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2]; 1112 /* extra Newton terms */ 1113 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2]; 1114 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2]; 1115 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2]; 1116 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2]; 1117 } 1118 #else 1119 /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed 1120 * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost 1121 * by 25 to 30 percent. */ 1122 { 1123 __m128d 1124 keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]), 1125 kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]), 1126 dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]), 1127 t0,t3,pdgduv; 1128 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01), 1129 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10), 1130 _mm_mul_pd(dp2jweta,dpl2)))); 1131 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01), 1132 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10), 1133 _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1)))))); 1134 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)), 1135 _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))), 1136 _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10), 1137 _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */ 1138 t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */ 1139 t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */ 1140 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0), 1141 _mm_add_pd(_mm_mul_pd(dp1,t3), 1142 _mm_mul_pd(t0,_mm_mul_pd(dp2,du2)))))); 1143 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0), 1144 _mm_add_pd(_mm_mul_pd(dp0,t3), 1145 _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2)))))); 1146 } 1147 #endif 1148 } 1149 } 1150 } 1151 if (k == 0) { /* on a bottom face */ 1152 if (thi->no_slip) { 1153 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1); 1154 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 1155 Ke[0][0] = thi->dirichlet_scale*diagu; 1156 Ke[1][1] = thi->dirichlet_scale*diagv; 1157 } else { 1158 for (q=0; q<4; q++) { 1159 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 1160 PetscScalar u =0,v=0,rbeta2=0; 1161 PetscReal beta2,dbeta2; 1162 for (l=0; l<4; l++) { 1163 u += phi[l]*n[l].u; 1164 v += phi[l]*n[l].v; 1165 rbeta2 += phi[l]*pn[l].beta2; 1166 } 1167 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 1168 for (l=0; l<4; l++) { 1169 const PetscReal pp = phi[l]; 1170 for (ll=0; ll<4; ll++) { 1171 const PetscReal ppl = phi[ll]; 1172 Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl; 1173 Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl; 1174 Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl; 1175 Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl; 1176 } 1177 } 1178 } 1179 } 1180 } 1181 { 1182 const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}}; 1183 if (amode == THIASSEMBLY_TRIDIAGONAL) { 1184 for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */ 1185 const PetscInt l4 = l+4; 1186 const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}}; 1187 #if defined COMPUTE_LOWER_TRIANGULAR 1188 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1189 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1190 {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1191 {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1192 #else 1193 /* Same as above except for the lower-left block */ 1194 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1195 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1196 {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1197 {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1198 #endif 1199 PetscCall(MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES)); 1200 } 1201 } else { 1202 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */ 1203 for (l=0; l<8; l++) { 1204 for (ll=l+1; ll<8; ll++) { 1205 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0]; 1206 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1]; 1207 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0]; 1208 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1]; 1209 } 1210 } 1211 #endif 1212 PetscCall(MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES)); 1213 } 1214 } 1215 } 1216 } 1217 } 1218 PetscCall(THIDARestorePrm(info->da,&prm)); 1219 1220 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1221 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1222 PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 1223 if (thi->verbose) PetscCall(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD)); 1224 PetscFunctionReturn(0); 1225 } 1226 1227 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1228 { 1229 PetscFunctionBeginUser; 1230 PetscCall(THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL)); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1235 { 1236 PetscFunctionBeginUser; 1237 PetscCall(THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL)); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[]) 1242 { 1243 THI thi; 1244 PetscInt dim,M,N,m,n,s,dof; 1245 DM dac,daf; 1246 DMDAStencilType st; 1247 DM_DA *ddf,*ddc; 1248 1249 PetscFunctionBeginUser; 1250 PetscCall(PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi)); 1251 PetscCheck(thi,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance"); 1252 if (nlevels > 1) { 1253 PetscCall(DMRefineHierarchy(dac0,nlevels-1,hierarchy)); 1254 dac = hierarchy[nlevels-2]; 1255 } else { 1256 dac = dac0; 1257 } 1258 PetscCall(DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st)); 1259 PetscCheck(dim == 2,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs"); 1260 1261 /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */ 1262 PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf)); 1263 PetscCall(DMSetUp(daf)); 1264 1265 daf->ops->creatematrix = dac->ops->creatematrix; 1266 daf->ops->createinterpolation = dac->ops->createinterpolation; 1267 daf->ops->getcoloring = dac->ops->getcoloring; 1268 ddf = (DM_DA*)daf->data; 1269 ddc = (DM_DA*)dac->data; 1270 ddf->interptype = ddc->interptype; 1271 1272 PetscCall(DMDASetFieldName(daf,0,"x-velocity")); 1273 PetscCall(DMDASetFieldName(daf,1,"y-velocity")); 1274 1275 hierarchy[nlevels-1] = daf; 1276 PetscFunctionReturn(0); 1277 } 1278 1279 static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale) 1280 { 1281 PetscInt dim; 1282 1283 PetscFunctionBeginUser; 1284 PetscValidHeaderSpecific(dac,DM_CLASSID,1); 1285 PetscValidHeaderSpecific(daf,DM_CLASSID,2); 1286 PetscValidPointer(A,3); 1287 if (scale) PetscValidPointer(scale,4); 1288 PetscCall(DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0)); 1289 if (dim == 2) { 1290 /* We are in the 2D problem and use normal DMDA interpolation */ 1291 PetscCall(DMCreateInterpolation(dac,daf,A,scale)); 1292 } else { 1293 PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart; 1294 Mat B; 1295 1296 PetscCall(DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 1297 PetscCall(DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm)); 1298 PetscCheck(!zs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"unexpected"); 1299 PetscCall(MatCreate(PetscObjectComm((PetscObject)daf),&B)); 1300 PetscCall(MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my)); 1301 1302 PetscCall(MatSetType(B,MATAIJ)); 1303 PetscCall(MatSeqAIJSetPreallocation(B,1,NULL)); 1304 PetscCall(MatMPIAIJSetPreallocation(B,1,NULL,0,NULL)); 1305 PetscCall(MatGetOwnershipRange(B,&rstart,NULL)); 1306 PetscCall(MatGetOwnershipRangeColumn(B,&cstart,NULL)); 1307 for (i=xs; i<xs+xm; i++) { 1308 for (j=ys; j<ys+ym; j++) { 1309 for (k=zs; k<zs+zm; k++) { 1310 PetscInt i2 = i*ym+j,i3 = i2*zm+k; 1311 PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */ 1312 PetscCall(MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES)); 1313 } 1314 } 1315 } 1316 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1317 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1318 PetscCall(MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A)); 1319 PetscCall(MatDestroy(&B)); 1320 } 1321 PetscFunctionReturn(0); 1322 } 1323 1324 static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J) 1325 { 1326 Mat A; 1327 PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3]; 1328 ISLocalToGlobalMapping ltog; 1329 1330 PetscFunctionBeginUser; 1331 PetscCall(DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0)); 1332 PetscCheck(dim == 3,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D"); 1333 PetscCall(DMDAGetCorners(da,0,0,0,&zm,&ym,&xm)); 1334 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1335 PetscCall(MatCreate(PetscObjectComm((PetscObject)da),&A)); 1336 PetscCall(MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE)); 1337 PetscCall(MatSetType(A,da->mattype)); 1338 PetscCall(MatSetFromOptions(A)); 1339 PetscCall(MatSeqAIJSetPreallocation(A,3*2,NULL)); 1340 PetscCall(MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL)); 1341 PetscCall(MatSeqBAIJSetPreallocation(A,2,3,NULL)); 1342 PetscCall(MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL)); 1343 PetscCall(MatSeqSBAIJSetPreallocation(A,2,2,NULL)); 1344 PetscCall(MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL)); 1345 PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog)); 1346 PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2])); 1347 PetscCall(MatSetStencil(A,dim,dims,starts,dof)); 1348 *J = A; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[]) 1353 { 1354 const PetscInt dof = 2; 1355 Units units = thi->units; 1356 MPI_Comm comm; 1357 PetscViewer viewer; 1358 PetscMPIInt rank,size,tag,nn,nmax; 1359 PetscInt mx,my,mz,r,range[6]; 1360 const PetscScalar *x; 1361 1362 PetscFunctionBeginUser; 1363 PetscCall(PetscObjectGetComm((PetscObject)thi,&comm)); 1364 PetscCall(DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 1365 PetscCallMPI(MPI_Comm_size(comm,&size)); 1366 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1367 PetscCall(PetscViewerASCIIOpen(comm,filename,&viewer)); 1368 PetscCall(PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n")); 1369 PetscCall(PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %D %d %D %d %D\">\n",0,mz-1,0,my-1,0,mx-1)); 1370 1371 PetscCall(DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5)); 1372 PetscCall(PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn)); 1373 PetscCallMPI(MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm)); 1374 tag = ((PetscObject) viewer)->tag; 1375 PetscCall(VecGetArrayRead(X,&x)); 1376 if (rank == 0) { 1377 PetscScalar *array; 1378 PetscCall(PetscMalloc1(nmax,&array)); 1379 for (r=0; r<size; r++) { 1380 PetscInt i,j,k,xs,xm,ys,ym,zs,zm; 1381 const PetscScalar *ptr; 1382 MPI_Status status; 1383 if (r) { 1384 PetscCallMPI(MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE)); 1385 } 1386 zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5]; 1387 PetscCheck(xm*ym*zm*dof <= nmax,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1388 if (r) { 1389 PetscCallMPI(MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status)); 1390 PetscCallMPI(MPI_Get_count(&status,MPIU_SCALAR,&nn)); 1391 PetscCheck(nn == xm*ym*zm*dof,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1392 ptr = array; 1393 } else ptr = x; 1394 PetscCall(PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1)); 1395 1396 PetscCall(PetscViewerASCIIPrintf(viewer," <Points>\n")); 1397 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1398 for (i=xs; i<xs+xm; i++) { 1399 for (j=ys; j<ys+ym; j++) { 1400 for (k=zs; k<zs+zm; k++) { 1401 PrmNode p; 1402 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz; 1403 thi->initialize(thi,xx,yy,&p); 1404 zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1); 1405 PetscCall(PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz)); 1406 } 1407 } 1408 } 1409 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1410 PetscCall(PetscViewerASCIIPrintf(viewer," </Points>\n")); 1411 1412 PetscCall(PetscViewerASCIIPrintf(viewer," <PointData>\n")); 1413 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1414 for (i=0; i<nn; i+=dof) { 1415 PetscCall(PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0)); 1416 } 1417 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1418 1419 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n")); 1420 for (i=0; i<nn; i+=dof) { 1421 PetscCall(PetscViewerASCIIPrintf(viewer,"%D\n",r)); 1422 } 1423 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1424 PetscCall(PetscViewerASCIIPrintf(viewer," </PointData>\n")); 1425 1426 PetscCall(PetscViewerASCIIPrintf(viewer," </Piece>\n")); 1427 } 1428 PetscCall(PetscFree(array)); 1429 } else { 1430 PetscCallMPI(MPI_Send(range,6,MPIU_INT,0,tag,comm)); 1431 PetscCallMPI(MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm)); 1432 } 1433 PetscCall(VecRestoreArrayRead(X,&x)); 1434 PetscCall(PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n")); 1435 PetscCall(PetscViewerASCIIPrintf(viewer,"</VTKFile>\n")); 1436 PetscCall(PetscViewerDestroy(&viewer)); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 int main(int argc,char *argv[]) 1441 { 1442 MPI_Comm comm; 1443 THI thi; 1444 DM da; 1445 SNES snes; 1446 1447 PetscCall(PetscInitialize(&argc,&argv,0,help)); 1448 comm = PETSC_COMM_WORLD; 1449 1450 PetscCall(THICreate(comm,&thi)); 1451 { 1452 PetscInt M = 3,N = 3,P = 2; 1453 PetscOptionsBegin(comm,NULL,"Grid resolution options",""); 1454 { 1455 PetscCall(PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL)); 1456 N = M; 1457 PetscCall(PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL)); 1458 if (thi->coarse2d) { 1459 PetscCall(PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL)); 1460 } else { 1461 PetscCall(PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL)); 1462 } 1463 } 1464 PetscOptionsEnd(); 1465 if (thi->coarse2d) { 1466 PetscCall(DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da)); 1467 PetscCall(DMSetFromOptions(da)); 1468 PetscCall(DMSetUp(da)); 1469 da->ops->refinehierarchy = DMRefineHierarchy_THI; 1470 da->ops->createinterpolation = DMCreateInterpolation_DA_THI; 1471 1472 PetscCall(PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi)); 1473 } else { 1474 PetscCall(DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da)); 1475 PetscCall(DMSetFromOptions(da)); 1476 PetscCall(DMSetUp(da)); 1477 } 1478 PetscCall(DMDASetFieldName(da,0,"x-velocity")); 1479 PetscCall(DMDASetFieldName(da,1,"y-velocity")); 1480 } 1481 PetscCall(THISetUpDM(thi,da)); 1482 if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal; 1483 1484 { /* Set the fine level matrix type if -da_refine */ 1485 PetscInt rlevel,clevel; 1486 PetscCall(DMGetRefineLevel(da,&rlevel)); 1487 PetscCall(DMGetCoarsenLevel(da,&clevel)); 1488 if (rlevel - clevel > 0) PetscCall(DMSetMatType(da,thi->mattype)); 1489 } 1490 1491 PetscCall(DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi)); 1492 if (thi->tridiagonal) { 1493 PetscCall(DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi)); 1494 } else { 1495 PetscCall(DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi)); 1496 } 1497 PetscCall(DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi)); 1498 PetscCall(DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi)); 1499 1500 PetscCall(DMSetApplicationContext(da,thi)); 1501 1502 PetscCall(SNESCreate(comm,&snes)); 1503 PetscCall(SNESSetDM(snes,da)); 1504 PetscCall(DMDestroy(&da)); 1505 PetscCall(SNESSetComputeInitialGuess(snes,THIInitial,NULL)); 1506 PetscCall(SNESSetFromOptions(snes)); 1507 1508 PetscCall(SNESSolve(snes,NULL,NULL)); 1509 1510 PetscCall(THISolveStatistics(thi,snes,0,"Full")); 1511 1512 { 1513 PetscBool flg; 1514 char filename[PETSC_MAX_PATH_LEN] = ""; 1515 PetscCall(PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg)); 1516 if (flg) { 1517 Vec X; 1518 DM dm; 1519 PetscCall(SNESGetSolution(snes,&X)); 1520 PetscCall(SNESGetDM(snes,&dm)); 1521 PetscCall(THIDAVecView_VTK_XML(thi,dm,X,filename)); 1522 } 1523 } 1524 1525 PetscCall(DMDestroy(&da)); 1526 PetscCall(SNESDestroy(&snes)); 1527 PetscCall(THIDestroy(&thi)); 1528 PetscCall(PetscFinalize()); 1529 return 0; 1530 } 1531 1532 /*TEST 1533 1534 build: 1535 requires: !single 1536 1537 test: 1538 args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc 1539 1540 test: 1541 suffix: 2 1542 nsize: 2 1543 args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1 1544 1545 test: 1546 suffix: 3 1547 nsize: 3 1548 args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current 1549 1550 test: 1551 suffix: 4 1552 nsize: 6 1553 args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1 1554 1555 test: 1556 suffix: 5 1557 nsize: 6 1558 args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}} 1559 1560 TEST*/ 1561