1 static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D.\n\ 2 \n\ 3 Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\ 4 using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\ 5 to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\ 6 boundary conditions in the x- and y-directions.\n\ 7 \n\ 8 Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\ 9 can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\ 10 \n\ 11 A VTK StructuredGrid output file can be written using the option -o filename.vts\n\ 12 \n\n"; 13 14 /* 15 The equations for horizontal velocity (u,v) are 16 17 - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0 18 - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0 19 20 where 21 22 eta = B/2 (epsilon + gamma)^((p-2)/2) 23 24 is the nonlinear effective viscosity with regularization epsilon and hardness parameter B, 25 written in terms of the second invariant 26 27 gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2 28 29 The surface boundary conditions are the natural conditions. The basal boundary conditions 30 are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2. 31 32 In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1). 33 34 The discretization is Q1 finite elements, managed by a DMDA. The grid is never distorted in the 35 map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through 36 the Jacobian of the coordinate transformation from a reference element to the physical element. 37 38 Since ice-flow is tightly coupled in the z-direction (within columns), the DMDA is managed 39 specially so that columns are never distributed, and are always contiguous in memory. 40 This amounts to reversing the meaning of X,Y,Z compared to the DMDA's internal interpretation, 41 and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DMDAs which are made to 42 use compatible domain decomposition relative to the 3D DMDAs. 43 44 There are two compile-time options: 45 46 NO_SSE2: 47 If the host supports SSE2, we use integration code that has been vectorized with SSE2 48 intrinsics, unless this macro is defined. The intrinsics speed up integration by about 49 30% on my architecture (P8700, gcc-4.5 snapshot). 50 51 COMPUTE_LOWER_TRIANGULAR: 52 The element matrices we assemble are lower-triangular so it is not necessary to compute 53 all entries explicitly. If this macro is defined, the lower-triangular entries are 54 computed explicitly. 55 56 */ 57 58 #if defined(PETSC_APPLE_FRAMEWORK) 59 #import <PETSc/petscsnes.h> 60 #import <PETSc/petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 61 #else 62 63 #include <petscsnes.h> 64 #include <petsc/private/dmdaimpl.h> /* There is not yet a public interface to manipulate dm->ops */ 65 #endif 66 #include <ctype.h> /* toupper() */ 67 68 #if defined(__cplusplus) || defined (PETSC_HAVE_WINDOWS_COMPILERS) || defined (__PGI) 69 /* c++ cannot handle [_restrict_] notation like C does */ 70 #undef PETSC_RESTRICT 71 #define PETSC_RESTRICT 72 #endif 73 74 #if defined __SSE2__ 75 # include <emmintrin.h> 76 #endif 77 78 /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */ 79 #if !defined NO_SSE2 \ 80 && !defined PETSC_USE_COMPLEX \ 81 && !defined PETSC_USE_REAL_SINGLE \ 82 && !defined PETSC_USE_REAL___FLOAT128 \ 83 && !defined PETSC_USE_REAL___FP16 \ 84 && defined __SSE2__ 85 #define USE_SSE2_KERNELS 1 86 #else 87 #define USE_SSE2_KERNELS 0 88 #endif 89 90 static PetscClassId THI_CLASSID; 91 92 typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType; 93 static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0}; 94 PETSC_UNUSED static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1}; 95 PETSC_UNUSED static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573}; 96 #define G 0.57735026918962573 97 #define H (0.5*(1.+G)) 98 #define L (0.5*(1.-G)) 99 #define M (-0.5) 100 #define P (0.5) 101 /* Special quadrature: Lobatto in horizontal, Gauss in vertical */ 102 static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0}, 103 {0,H,0,0,0,L,0,0}, 104 {0,0,H,0,0,0,L,0}, 105 {0,0,0,H,0,0,0,L}, 106 {L,0,0,0,H,0,0,0}, 107 {0,L,0,0,0,H,0,0}, 108 {0,0,L,0,0,0,H,0}, 109 {0,0,0,L,0,0,0,H}}; 110 static const PetscReal HexQDeriv_Lobatto[8][8][3] = { 111 {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} }, 112 {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} }, 113 {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} }, 114 {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}}, 115 {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} }, 116 {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} }, 117 {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} }, 118 {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}}; 119 /* Stanndard Gauss */ 120 static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L}, 121 {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L}, 122 {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L}, 123 {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L}, 124 {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H}, 125 {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H}, 126 {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H}, 127 {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}}; 128 static const PetscReal HexQDeriv_Gauss[8][8][3] = { 129 {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}}, 130 {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}}, 131 {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}}, 132 {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}}, 133 {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}}, 134 {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}}, 135 {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}}, 136 {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}}; 137 static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3]; 138 /* Standard 2x2 Gauss quadrature for the bottom layer. */ 139 static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L}, 140 {L*H,H*H,H*L,L*L}, 141 {L*L,H*L,H*H,L*H}, 142 {H*L,L*L,L*H,H*H}}; 143 static const PetscReal QuadQDeriv[4][4][2] = { 144 {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}}, 145 {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}}, 146 {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}}, 147 {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}}; 148 #undef G 149 #undef H 150 #undef L 151 #undef M 152 #undef P 153 154 #define HexExtract(x,i,j,k,n) do { \ 155 (n)[0] = (x)[i][j][k]; \ 156 (n)[1] = (x)[i+1][j][k]; \ 157 (n)[2] = (x)[i+1][j+1][k]; \ 158 (n)[3] = (x)[i][j+1][k]; \ 159 (n)[4] = (x)[i][j][k+1]; \ 160 (n)[5] = (x)[i+1][j][k+1]; \ 161 (n)[6] = (x)[i+1][j+1][k+1]; \ 162 (n)[7] = (x)[i][j+1][k+1]; \ 163 } while (0) 164 165 #define HexExtractRef(x,i,j,k,n) do { \ 166 (n)[0] = &(x)[i][j][k]; \ 167 (n)[1] = &(x)[i+1][j][k]; \ 168 (n)[2] = &(x)[i+1][j+1][k]; \ 169 (n)[3] = &(x)[i][j+1][k]; \ 170 (n)[4] = &(x)[i][j][k+1]; \ 171 (n)[5] = &(x)[i+1][j][k+1]; \ 172 (n)[6] = &(x)[i+1][j+1][k+1]; \ 173 (n)[7] = &(x)[i][j+1][k+1]; \ 174 } while (0) 175 176 #define QuadExtract(x,i,j,n) do { \ 177 (n)[0] = (x)[i][j]; \ 178 (n)[1] = (x)[i+1][j]; \ 179 (n)[2] = (x)[i+1][j+1]; \ 180 (n)[3] = (x)[i][j+1]; \ 181 } while (0) 182 183 static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[]) 184 { 185 PetscInt i; 186 dz[0] = dz[1] = dz[2] = 0; 187 for (i=0; i<8; i++) { 188 dz[0] += dphi[i][0] * zn[i]; 189 dz[1] += dphi[i][1] * zn[i]; 190 dz[2] += dphi[i][2] * zn[i]; 191 } 192 } 193 194 static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[PETSC_RESTRICT],PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscReal *PETSC_RESTRICT jw) 195 { 196 const PetscReal jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}; 197 const PetscReal ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}; 198 const PetscReal jdet = jac[0][0]*jac[1][1]*jac[2][2]; 199 PetscInt i; 200 201 for (i=0; i<8; i++) { 202 const PetscReal *dphir = HexQDeriv[q][i]; 203 phi[i] = HexQInterp[q][i]; 204 dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0]; 205 dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1]; 206 dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2]; 207 } 208 *jw = 1.0 * jdet; 209 } 210 211 typedef struct _p_THI *THI; 212 typedef struct _n_Units *Units; 213 214 typedef struct { 215 PetscScalar u,v; 216 } Node; 217 218 typedef struct { 219 PetscScalar b; /* bed */ 220 PetscScalar h; /* thickness */ 221 PetscScalar beta2; /* friction */ 222 } PrmNode; 223 224 typedef struct { 225 PetscReal min,max,cmin,cmax; 226 } PRange; 227 228 typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode; 229 230 struct _p_THI { 231 PETSCHEADER(int); 232 void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p); 233 PetscInt zlevels; 234 PetscReal Lx,Ly,Lz; /* Model domain */ 235 PetscReal alpha; /* Bed angle */ 236 Units units; 237 PetscReal dirichlet_scale; 238 PetscReal ssa_friction_scale; 239 PRange eta; 240 PRange beta2; 241 struct { 242 PetscReal Bd2,eps,exponent; 243 } viscosity; 244 struct { 245 PetscReal irefgam,eps2,exponent,refvel,epsvel; 246 } friction; 247 PetscReal rhog; 248 PetscBool no_slip; 249 PetscBool tridiagonal; 250 PetscBool coarse2d; 251 PetscBool verbose; 252 MatType mattype; 253 }; 254 255 struct _n_Units { 256 /* fundamental */ 257 PetscReal meter; 258 PetscReal kilogram; 259 PetscReal second; 260 /* derived */ 261 PetscReal Pascal; 262 PetscReal year; 263 }; 264 265 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo*,Node***,Mat,Mat,THI); 266 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo*,Node***,Mat,Mat,THI); 267 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo*,Node**,Mat,Mat,THI); 268 269 static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[]) 270 { 271 const PetscScalar zm1 = zm-1, 272 znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1, 273 pn[1].b + pn[1].h*(PetscScalar)k/zm1, 274 pn[2].b + pn[2].h*(PetscScalar)k/zm1, 275 pn[3].b + pn[3].h*(PetscScalar)k/zm1, 276 pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1, 277 pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1, 278 pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1, 279 pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1}; 280 PetscInt i; 281 for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]); 282 } 283 284 /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */ 285 static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p) 286 { 287 Units units = thi->units; 288 PetscReal s = -x*PetscSinReal(thi->alpha); 289 290 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x*2*PETSC_PI/thi->Lx) * PetscSinReal(y*2*PETSC_PI/thi->Ly); 291 p->h = s - p->b; 292 p->beta2 = 1e30; 293 } 294 295 static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p) 296 { 297 Units units = thi->units; 298 PetscReal s = -x*PetscSinReal(thi->alpha); 299 300 p->b = s - 1000*units->meter; 301 p->h = s - p->b; 302 /* tau_b = beta2 v is a stress (Pa) */ 303 p->beta2 = 1000 * (1 + PetscSinReal(x*2*PETSC_PI/thi->Lx)*PetscSinReal(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter; 304 } 305 306 /* These are just toys */ 307 308 /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */ 309 static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 310 { 311 Units units = thi->units; 312 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 313 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 314 p->b = s - 1000*units->meter + 500*units->meter*PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 315 p->h = s - p->b; 316 p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter; 317 } 318 319 /* Like Z, but with 200 meter cliffs */ 320 static void THIInitialize_HOM_Y(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 321 { 322 Units units = thi->units; 323 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 324 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 325 326 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 327 if (PetscRealPart(p->b) > -700*units->meter) p->b += 200*units->meter; 328 p->h = s - p->b; 329 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 330 } 331 332 /* Same bed as A, smoothly varying slipperiness, similar to MATLAB's "sombrero" (uncorrelated with bathymetry) */ 333 static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p) 334 { 335 Units units = thi->units; 336 PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */ 337 PetscReal r = PetscSqrtReal(x*x + y*y),s = -x*PetscSinReal(thi->alpha); 338 339 p->b = s - 1000*units->meter + 500*units->meter * PetscSinReal(x + PETSC_PI) * PetscSinReal(y + PETSC_PI); 340 p->h = s - p->b; 341 p->beta2 = 1000 * (1. + PetscSinReal(PetscSqrtReal(16*r))/PetscSqrtReal(1e-2 + 16*r)*PetscCosReal(x*3/2)*PetscCosReal(y*3/2)) * units->Pascal * units->year / units->meter; 342 } 343 344 static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2) 345 { 346 if (thi->friction.irefgam == 0) { 347 Units units = thi->units; 348 thi->friction.irefgam = 1./(0.5*PetscSqr(thi->friction.refvel * units->meter / units->year)); 349 thi->friction.eps2 = 0.5*PetscSqr(thi->friction.epsvel * units->meter / units->year) * thi->friction.irefgam; 350 } 351 if (thi->friction.exponent == 0) { 352 *beta2 = rbeta2; 353 *dbeta2 = 0; 354 } else { 355 *beta2 = rbeta2 * PetscPowReal(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent); 356 *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam; 357 } 358 } 359 360 static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta) 361 { 362 PetscReal Bd2,eps,exponent; 363 if (thi->viscosity.Bd2 == 0) { 364 Units units = thi->units; 365 const PetscReal 366 n = 3., /* Glen exponent */ 367 p = 1. + 1./n, /* for Stokes */ 368 A = 1.e-16 * PetscPowReal(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */ 369 B = PetscPowReal(A,-1./n); /* hardness parameter */ 370 thi->viscosity.Bd2 = B/2; 371 thi->viscosity.exponent = (p-2)/2; 372 thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year); 373 } 374 Bd2 = thi->viscosity.Bd2; 375 exponent = thi->viscosity.exponent; 376 eps = thi->viscosity.eps; 377 *eta = Bd2 * PetscPowReal(eps + gam,exponent); 378 *deta = exponent * (*eta) / (eps + gam); 379 } 380 381 static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x) 382 { 383 if (x < *min) *min = x; 384 if (x > *max) *max = x; 385 } 386 387 static void PRangeClear(PRange *p) 388 { 389 p->cmin = p->min = 1e100; 390 p->cmax = p->max = -1e100; 391 } 392 393 static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max) 394 { 395 PetscFunctionBeginUser; 396 p->cmin = min; 397 p->cmax = max; 398 if (min < p->min) p->min = min; 399 if (max > p->max) p->max = max; 400 PetscFunctionReturn(0); 401 } 402 403 static PetscErrorCode THIDestroy(THI *thi) 404 { 405 PetscFunctionBeginUser; 406 if (!*thi) PetscFunctionReturn(0); 407 if (--((PetscObject)(*thi))->refct > 0) {*thi = 0; PetscFunctionReturn(0);} 408 PetscCall(PetscFree((*thi)->units)); 409 PetscCall(PetscFree((*thi)->mattype)); 410 PetscCall(PetscHeaderDestroy(thi)); 411 PetscFunctionReturn(0); 412 } 413 414 static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi) 415 { 416 static PetscBool registered = PETSC_FALSE; 417 THI thi; 418 Units units; 419 PetscErrorCode ierr; 420 421 PetscFunctionBeginUser; 422 *inthi = 0; 423 if (!registered) { 424 PetscCall(PetscClassIdRegister("Toy Hydrostatic Ice",&THI_CLASSID)); 425 registered = PETSC_TRUE; 426 } 427 PetscCall(PetscHeaderCreate(thi,THI_CLASSID,"THI","Toy Hydrostatic Ice","",comm,THIDestroy,0)); 428 429 PetscCall(PetscNew(&thi->units)); 430 units = thi->units; 431 units->meter = 1e-2; 432 units->second = 1e-7; 433 units->kilogram = 1e-12; 434 435 ierr = PetscOptionsBegin(comm,NULL,"Scaled units options","");PetscCall(ierr); 436 { 437 PetscCall(PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL)); 438 PetscCall(PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL)); 439 PetscCall(PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL)); 440 } 441 ierr = PetscOptionsEnd();PetscCall(ierr); 442 units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second)); 443 units->year = 31556926. * units->second; /* seconds per year */ 444 445 thi->Lx = 10.e3; 446 thi->Ly = 10.e3; 447 thi->Lz = 1000; 448 thi->dirichlet_scale = 1; 449 thi->verbose = PETSC_FALSE; 450 451 ierr = PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");PetscCall(ierr); 452 { 453 QuadratureType quad = QUAD_GAUSS; 454 char homexp[] = "A"; 455 char mtype[256] = MATSBAIJ; 456 PetscReal L,m = 1.0; 457 PetscBool flg; 458 L = thi->Lx; 459 PetscCall(PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg)); 460 if (flg) thi->Lx = thi->Ly = L; 461 PetscCall(PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL)); 462 PetscCall(PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL)); 463 PetscCall(PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL)); 464 PetscCall(PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL)); 465 switch (homexp[0] = toupper(homexp[0])) { 466 case 'A': 467 thi->initialize = THIInitialize_HOM_A; 468 thi->no_slip = PETSC_TRUE; 469 thi->alpha = 0.5; 470 break; 471 case 'C': 472 thi->initialize = THIInitialize_HOM_C; 473 thi->no_slip = PETSC_FALSE; 474 thi->alpha = 0.1; 475 break; 476 case 'X': 477 thi->initialize = THIInitialize_HOM_X; 478 thi->no_slip = PETSC_FALSE; 479 thi->alpha = 0.3; 480 break; 481 case 'Y': 482 thi->initialize = THIInitialize_HOM_Y; 483 thi->no_slip = PETSC_FALSE; 484 thi->alpha = 0.5; 485 break; 486 case 'Z': 487 thi->initialize = THIInitialize_HOM_Z; 488 thi->no_slip = PETSC_FALSE; 489 thi->alpha = 0.5; 490 break; 491 default: 492 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]); 493 } 494 PetscCall(PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL)); 495 switch (quad) { 496 case QUAD_GAUSS: 497 HexQInterp = HexQInterp_Gauss; 498 HexQDeriv = HexQDeriv_Gauss; 499 break; 500 case QUAD_LOBATTO: 501 HexQInterp = HexQInterp_Lobatto; 502 HexQDeriv = HexQDeriv_Lobatto; 503 break; 504 } 505 PetscCall(PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL)); 506 507 thi->friction.refvel = 100.; 508 thi->friction.epsvel = 1.; 509 510 PetscCall(PetscOptionsReal("-thi_friction_refvel","Reference velocity for sliding","",thi->friction.refvel,&thi->friction.refvel,NULL)); 511 PetscCall(PetscOptionsReal("-thi_friction_epsvel","Regularization velocity for sliding","",thi->friction.epsvel,&thi->friction.epsvel,NULL)); 512 PetscCall(PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL)); 513 514 thi->friction.exponent = (m-1)/2; 515 516 PetscCall(PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL)); 517 PetscCall(PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL)); 518 PetscCall(PetscOptionsBool("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL)); 519 PetscCall(PetscOptionsBool("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL)); 520 PetscCall(PetscOptionsFList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL)); 521 PetscCall(PetscStrallocpy(mtype,(char**)&thi->mattype)); 522 PetscCall(PetscOptionsBool("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL)); 523 } 524 ierr = PetscOptionsEnd();PetscCall(ierr); 525 526 /* dimensionalize */ 527 thi->Lx *= units->meter; 528 thi->Ly *= units->meter; 529 thi->Lz *= units->meter; 530 thi->alpha *= PETSC_PI / 180; 531 532 PRangeClear(&thi->eta); 533 PRangeClear(&thi->beta2); 534 535 { 536 PetscReal u = 1000*units->meter/(3e7*units->second), 537 gradu = u / (100*units->meter),eta,deta, 538 rho = 910 * units->kilogram/PetscPowReal(units->meter,3), 539 grav = 9.81 * units->meter/PetscSqr(units->second), 540 driving = rho * grav * PetscSinReal(thi->alpha) * 1000*units->meter; 541 THIViscosity(thi,0.5*gradu*gradu,&eta,&deta); 542 thi->rhog = rho * grav; 543 if (thi->verbose) { 544 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",(double)units->meter,(double)units->second,(double)units->kilogram,(double)units->Pascal)); 545 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",(double)thi->Lx,(double)thi->Ly,(double)thi->Lz,(double)(rho*grav*1e3*units->meter),(double)driving)); 546 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)u,(double)gradu,(double)eta,(double)(2*eta*gradu),(double)(2*eta*gradu/driving))); 547 THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta); 548 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",(double)(1e-3*u),(double)(1e-3*gradu),(double)eta,(double)(2*eta*1e-3*gradu),(double)(2*eta*1e-3*gradu/driving))); 549 } 550 } 551 552 *inthi = thi; 553 PetscFunctionReturn(0); 554 } 555 556 static PetscErrorCode THIInitializePrm(THI thi,DM da2prm,Vec prm) 557 { 558 PrmNode **p; 559 PetscInt i,j,xs,xm,ys,ym,mx,my; 560 561 PetscFunctionBeginUser; 562 PetscCall(DMDAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0)); 563 PetscCall(DMDAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0,0,0)); 564 PetscCall(DMDAVecGetArray(da2prm,prm,&p)); 565 for (i=xs; i<xs+xm; i++) { 566 for (j=ys; j<ys+ym; j++) { 567 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my; 568 thi->initialize(thi,xx,yy,&p[i][j]); 569 } 570 } 571 PetscCall(DMDAVecRestoreArray(da2prm,prm,&p)); 572 PetscFunctionReturn(0); 573 } 574 575 static PetscErrorCode THISetUpDM(THI thi,DM dm) 576 { 577 PetscInt refinelevel,coarsenlevel,level,dim,Mx,My,Mz,mx,my,s; 578 DMDAStencilType st; 579 DM da2prm; 580 Vec X; 581 582 PetscFunctionBeginUser; 583 PetscCall(DMDAGetInfo(dm,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,0,0,&st)); 584 if (dim == 2) { 585 PetscCall(DMDAGetInfo(dm,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,0,0,&st)); 586 } 587 PetscCall(DMGetRefineLevel(dm,&refinelevel)); 588 PetscCall(DMGetCoarsenLevel(dm,&coarsenlevel)); 589 level = refinelevel - coarsenlevel; 590 PetscCall(DMDACreate2d(PetscObjectComm((PetscObject)thi),DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm)); 591 PetscCall(DMSetUp(da2prm)); 592 PetscCall(DMCreateLocalVector(da2prm,&X)); 593 { 594 PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter; 595 if (dim == 2) { 596 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g, num elements %D x %D (%D), size (m) %g x %g\n",level,(double)Lx,(double)Ly,Mx,My,Mx*My,(double)(Lx/Mx),(double)(Ly/My))); 597 } else { 598 PetscCall(PetscPrintf(PetscObjectComm((PetscObject)thi),"Level %D domain size (m) %8.2g x %8.2g x %8.2g, num elements %D x %D x %D (%D), size (m) %g x %g x %g\n",level,(double)Lx,(double)Ly,(double)Lz,Mx,My,Mz,Mx*My*Mz,(double)(Lx/Mx),(double)(Ly/My),(double)(1000./(Mz-1)))); 599 } 600 } 601 PetscCall(THIInitializePrm(thi,da2prm,X)); 602 if (thi->tridiagonal) { /* Reset coarse Jacobian evaluation */ 603 PetscCall(DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi)); 604 } 605 if (thi->coarse2d) { 606 PetscCall(DMDASNESSetJacobianLocal(dm,(DMDASNESJacobian)THIJacobianLocal_2D,thi)); 607 } 608 PetscCall(PetscObjectCompose((PetscObject)dm,"DMDA2Prm",(PetscObject)da2prm)); 609 PetscCall(PetscObjectCompose((PetscObject)dm,"DMDA2Prm_Vec",(PetscObject)X)); 610 PetscCall(DMDestroy(&da2prm)); 611 PetscCall(VecDestroy(&X)); 612 PetscFunctionReturn(0); 613 } 614 615 static PetscErrorCode DMCoarsenHook_THI(DM dmf,DM dmc,void *ctx) 616 { 617 THI thi = (THI)ctx; 618 PetscInt rlevel,clevel; 619 620 PetscFunctionBeginUser; 621 PetscCall(THISetUpDM(thi,dmc)); 622 PetscCall(DMGetRefineLevel(dmc,&rlevel)); 623 PetscCall(DMGetCoarsenLevel(dmc,&clevel)); 624 if (rlevel-clevel == 0) PetscCall(DMSetMatType(dmc,MATAIJ)); 625 PetscCall(DMCoarsenHookAdd(dmc,DMCoarsenHook_THI,NULL,thi)); 626 PetscFunctionReturn(0); 627 } 628 629 static PetscErrorCode DMRefineHook_THI(DM dmc,DM dmf,void *ctx) 630 { 631 THI thi = (THI)ctx; 632 633 PetscFunctionBeginUser; 634 PetscCall(THISetUpDM(thi,dmf)); 635 PetscCall(DMSetMatType(dmf,thi->mattype)); 636 PetscCall(DMRefineHookAdd(dmf,DMRefineHook_THI,NULL,thi)); 637 /* With grid sequencing, a formerly-refined DM will later be coarsened by PCSetUp_MG */ 638 PetscCall(DMCoarsenHookAdd(dmf,DMCoarsenHook_THI,NULL,thi)); 639 PetscFunctionReturn(0); 640 } 641 642 static PetscErrorCode THIDAGetPrm(DM da,PrmNode ***prm) 643 { 644 DM da2prm; 645 Vec X; 646 647 PetscFunctionBeginUser; 648 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm)); 649 PetscCheck(da2prm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 650 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X)); 651 PetscCheck(X,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 652 PetscCall(DMDAVecGetArray(da2prm,X,prm)); 653 PetscFunctionReturn(0); 654 } 655 656 static PetscErrorCode THIDARestorePrm(DM da,PrmNode ***prm) 657 { 658 DM da2prm; 659 Vec X; 660 661 PetscFunctionBeginUser; 662 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm",(PetscObject*)&da2prm)); 663 PetscCheck(da2prm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm composed with given DMDA"); 664 PetscCall(PetscObjectQuery((PetscObject)da,"DMDA2Prm_Vec",(PetscObject*)&X)); 665 PetscCheck(X,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"No DMDA2Prm_Vec composed with given DMDA"); 666 PetscCall(DMDAVecRestoreArray(da2prm,X,prm)); 667 PetscFunctionReturn(0); 668 } 669 670 static PetscErrorCode THIInitial(SNES snes,Vec X,void *ctx) 671 { 672 THI thi; 673 PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my; 674 PetscReal hx,hy; 675 PrmNode **prm; 676 Node ***x; 677 DM da; 678 679 PetscFunctionBeginUser; 680 PetscCall(SNESGetDM(snes,&da)); 681 PetscCall(DMGetApplicationContext(da,&thi)); 682 PetscCall(DMDAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 683 PetscCall(DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm)); 684 PetscCall(DMDAVecGetArray(da,X,&x)); 685 PetscCall(THIDAGetPrm(da,&prm)); 686 hx = thi->Lx / mx; 687 hy = thi->Ly / my; 688 for (i=xs; i<xs+xm; i++) { 689 for (j=ys; j<ys+ym; j++) { 690 for (k=zs; k<zs+zm; k++) { 691 const PetscScalar zm1 = zm-1, 692 drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx), 693 drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hy); 694 x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1; 695 x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1; 696 } 697 } 698 } 699 PetscCall(DMDAVecRestoreArray(da,X,&x)); 700 PetscCall(THIDARestorePrm(da,&prm)); 701 PetscFunctionReturn(0); 702 } 703 704 static void PointwiseNonlinearity(THI thi,const Node n[PETSC_RESTRICT],const PetscReal phi[PETSC_RESTRICT],PetscReal dphi[PETSC_RESTRICT][3],PetscScalar *PETSC_RESTRICT u,PetscScalar *PETSC_RESTRICT v,PetscScalar du[PETSC_RESTRICT],PetscScalar dv[PETSC_RESTRICT],PetscReal *eta,PetscReal *deta) 705 { 706 PetscInt l,ll; 707 PetscScalar gam; 708 709 du[0] = du[1] = du[2] = 0; 710 dv[0] = dv[1] = dv[2] = 0; 711 *u = 0; 712 *v = 0; 713 for (l=0; l<8; l++) { 714 *u += phi[l] * n[l].u; 715 *v += phi[l] * n[l].v; 716 for (ll=0; ll<3; ll++) { 717 du[ll] += dphi[l][ll] * n[l].u; 718 dv[ll] += dphi[l][ll] * n[l].v; 719 } 720 } 721 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]) + 0.25*PetscSqr(du[2]) + 0.25*PetscSqr(dv[2]); 722 THIViscosity(thi,PetscRealPart(gam),eta,deta); 723 } 724 725 static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta) 726 { 727 PetscInt l,ll; 728 PetscScalar gam; 729 730 du[0] = du[1] = 0; 731 dv[0] = dv[1] = 0; 732 *u = 0; 733 *v = 0; 734 for (l=0; l<4; l++) { 735 *u += phi[l] * n[l].u; 736 *v += phi[l] * n[l].v; 737 for (ll=0; ll<2; ll++) { 738 du[ll] += dphi[l][ll] * n[l].u; 739 dv[ll] += dphi[l][ll] * n[l].v; 740 } 741 } 742 gam = PetscSqr(du[0]) + PetscSqr(dv[1]) + du[0]*dv[1] + 0.25*PetscSqr(du[1]+dv[0]); 743 THIViscosity(thi,PetscRealPart(gam),eta,deta); 744 } 745 746 static PetscErrorCode THIFunctionLocal(DMDALocalInfo *info,Node ***x,Node ***f,THI thi) 747 { 748 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l; 749 PetscReal hx,hy,etamin,etamax,beta2min,beta2max; 750 PrmNode **prm; 751 752 PetscFunctionBeginUser; 753 xs = info->zs; 754 ys = info->ys; 755 xm = info->zm; 756 ym = info->ym; 757 zm = info->xm; 758 hx = thi->Lx / info->mz; 759 hy = thi->Ly / info->my; 760 761 etamin = 1e100; 762 etamax = 0; 763 beta2min = 1e100; 764 beta2max = 0; 765 766 PetscCall(THIDAGetPrm(info->da,&prm)); 767 768 for (i=xs; i<xs+xm; i++) { 769 for (j=ys; j<ys+ym; j++) { 770 PrmNode pn[4]; 771 QuadExtract(prm,i,j,pn); 772 for (k=0; k<zm-1; k++) { 773 PetscInt ls = 0; 774 Node n[8],*fn[8]; 775 PetscReal zn[8],etabase = 0; 776 PrmHexGetZ(pn,k,zm,zn); 777 HexExtract(x,i,j,k,n); 778 HexExtractRef(f,i,j,k,fn); 779 if (thi->no_slip && k == 0) { 780 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 781 /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */ 782 ls = 4; 783 } 784 for (q=0; q<8; q++) { 785 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 786 PetscScalar du[3],dv[3],u,v; 787 HexGrad(HexQDeriv[q],zn,dz); 788 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 789 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 790 jw /= thi->rhog; /* scales residuals to be O(1) */ 791 if (q == 0) etabase = eta; 792 RangeUpdate(&etamin,&etamax,eta); 793 for (l=ls; l<8; l++) { /* test functions */ 794 const PetscReal ds[2] = {-PetscSinReal(thi->alpha),0}; 795 const PetscReal pp = phi[l],*dp = dphi[l]; 796 fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0]; 797 fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1]; 798 } 799 } 800 if (k == 0) { /* we are on a bottom face */ 801 if (thi->no_slip) { 802 /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary 803 * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature 804 * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the 805 * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in 806 * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after 807 * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the 808 * assembled matrix (see the similar block in THIJacobianLocal). 809 * 810 * Note that the residual at this Dirichlet node is linear in the state at this node, but also depends 811 * (nonlinearly in general) on the neighboring interior nodes through the local viscosity. This will make 812 * a matrix-free Jacobian have extra entries in the corresponding row. We assemble only the diagonal part, 813 * so the solution will exactly satisfy the boundary condition after the first linear iteration. 814 */ 815 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.); 816 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 817 fn[0]->u = thi->dirichlet_scale*diagu*x[i][j][k].u; 818 fn[0]->v = thi->dirichlet_scale*diagv*x[i][j][k].v; 819 } else { /* Integrate over bottom face to apply boundary condition */ 820 for (q=0; q<4; q++) { 821 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 822 PetscScalar u =0,v=0,rbeta2=0; 823 PetscReal beta2,dbeta2; 824 for (l=0; l<4; l++) { 825 u += phi[l]*n[l].u; 826 v += phi[l]*n[l].v; 827 rbeta2 += phi[l]*pn[l].beta2; 828 } 829 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 830 RangeUpdate(&beta2min,&beta2max,beta2); 831 for (l=0; l<4; l++) { 832 const PetscReal pp = phi[l]; 833 fn[ls+l]->u += pp*jw*beta2*u; 834 fn[ls+l]->v += pp*jw*beta2*v; 835 } 836 } 837 } 838 } 839 } 840 } 841 } 842 843 PetscCall(THIDARestorePrm(info->da,&prm)); 844 845 PetscCall(PRangeMinMax(&thi->eta,etamin,etamax)); 846 PetscCall(PRangeMinMax(&thi->beta2,beta2min,beta2max)); 847 PetscFunctionReturn(0); 848 } 849 850 static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer) 851 { 852 PetscReal nrm; 853 PetscInt m; 854 PetscMPIInt rank; 855 856 PetscFunctionBeginUser; 857 PetscCall(MatNorm(B,NORM_FROBENIUS,&nrm)); 858 PetscCall(MatGetSize(B,&m,0)); 859 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&rank)); 860 if (rank == 0) { 861 PetscScalar val0,val2; 862 PetscCall(MatGetValue(B,0,0,&val0)); 863 PetscCall(MatGetValue(B,2,2,&val2)); 864 PetscCall(PetscViewerASCIIPrintf(viewer,"Matrix dim %D norm %8.2e (0,0) %8.2e (2,2) %8.2e %8.2e <= eta <= %8.2e %8.2e <= beta2 <= %8.2e\n",m,(double)nrm,(double)PetscRealPart(val0),(double)PetscRealPart(val2),(double)thi->eta.cmin,(double)thi->eta.cmax,(double)thi->beta2.cmin,(double)thi->beta2.cmax)); 865 } 866 PetscFunctionReturn(0); 867 } 868 869 static PetscErrorCode THISurfaceStatistics(DM da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean) 870 { 871 Node ***x; 872 PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz; 873 PetscReal umin = 1e100,umax=-1e100; 874 PetscScalar usum = 0.0,gusum; 875 876 PetscFunctionBeginUser; 877 *min = *max = *mean = 0; 878 PetscCall(DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 879 PetscCall(DMDAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm)); 880 PetscCheck(zs == 0 && zm == mz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected decomposition"); 881 PetscCall(DMDAVecGetArray(da,X,&x)); 882 for (i=xs; i<xs+xm; i++) { 883 for (j=ys; j<ys+ym; j++) { 884 PetscReal u = PetscRealPart(x[i][j][zm-1].u); 885 RangeUpdate(&umin,&umax,u); 886 usum += u; 887 } 888 } 889 PetscCall(DMDAVecRestoreArray(da,X,&x)); 890 PetscCallMPI(MPI_Allreduce(&umin,min,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)da))); 891 PetscCallMPI(MPI_Allreduce(&umax,max,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)da))); 892 PetscCallMPI(MPI_Allreduce(&usum,&gusum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)da))); 893 *mean = PetscRealPart(gusum) / (mx*my); 894 PetscFunctionReturn(0); 895 } 896 897 static PetscErrorCode THISolveStatistics(THI thi,SNES snes,PetscInt coarsened,const char name[]) 898 { 899 MPI_Comm comm; 900 Vec X; 901 DM dm; 902 903 PetscFunctionBeginUser; 904 PetscCall(PetscObjectGetComm((PetscObject)thi,&comm)); 905 PetscCall(SNESGetSolution(snes,&X)); 906 PetscCall(SNESGetDM(snes,&dm)); 907 PetscCall(PetscPrintf(comm,"Solution statistics after solve: %s\n",name)); 908 { 909 PetscInt its,lits; 910 SNESConvergedReason reason; 911 PetscCall(SNESGetIterationNumber(snes,&its)); 912 PetscCall(SNESGetConvergedReason(snes,&reason)); 913 PetscCall(SNESGetLinearSolveIterations(snes,&lits)); 914 PetscCall(PetscPrintf(comm,"%s: Number of SNES iterations = %D, total linear iterations = %D\n",SNESConvergedReasons[reason],its,lits)); 915 } 916 { 917 PetscReal nrm2,tmin[3]={1e100,1e100,1e100},tmax[3]={-1e100,-1e100,-1e100},min[3],max[3]; 918 PetscInt i,j,m; 919 const PetscScalar *x; 920 PetscCall(VecNorm(X,NORM_2,&nrm2)); 921 PetscCall(VecGetLocalSize(X,&m)); 922 PetscCall(VecGetArrayRead(X,&x)); 923 for (i=0; i<m; i+=2) { 924 PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = PetscSqrtReal(u*u+v*v); 925 tmin[0] = PetscMin(u,tmin[0]); 926 tmin[1] = PetscMin(v,tmin[1]); 927 tmin[2] = PetscMin(c,tmin[2]); 928 tmax[0] = PetscMax(u,tmax[0]); 929 tmax[1] = PetscMax(v,tmax[1]); 930 tmax[2] = PetscMax(c,tmax[2]); 931 } 932 PetscCall(VecRestoreArrayRead(X,&x)); 933 PetscCallMPI(MPI_Allreduce(tmin,min,3,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)thi))); 934 PetscCallMPI(MPI_Allreduce(tmax,max,3,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)thi))); 935 /* Dimensionalize to meters/year */ 936 nrm2 *= thi->units->year / thi->units->meter; 937 for (j=0; j<3; j++) { 938 min[j] *= thi->units->year / thi->units->meter; 939 max[j] *= thi->units->year / thi->units->meter; 940 } 941 if (min[0] == 0.0) min[0] = 0.0; 942 PetscCall(PetscPrintf(comm,"|X|_2 %g %g <= u <= %g %g <= v <= %g %g <= c <= %g \n",(double)nrm2,(double)min[0],(double)max[0],(double)min[1],(double)max[1],(double)min[2],(double)max[2])); 943 { 944 PetscReal umin,umax,umean; 945 PetscCall(THISurfaceStatistics(dm,X,&umin,&umax,&umean)); 946 umin *= thi->units->year / thi->units->meter; 947 umax *= thi->units->year / thi->units->meter; 948 umean *= thi->units->year / thi->units->meter; 949 PetscCall(PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",(double)umin,(double)umax,(double)umean)); 950 } 951 /* These values stay nondimensional */ 952 PetscCall(PetscPrintf(comm,"Global eta range %g to %g converged range %g to %g\n",(double)thi->eta.min,(double)thi->eta.max,(double)thi->eta.cmin,(double)thi->eta.cmax)); 953 PetscCall(PetscPrintf(comm,"Global beta2 range %g to %g converged range %g to %g\n",(double)thi->beta2.min,(double)thi->beta2.max,(double)thi->beta2.cmin,(double)thi->beta2.cmax)); 954 } 955 PetscFunctionReturn(0); 956 } 957 958 static PetscErrorCode THIJacobianLocal_2D(DMDALocalInfo *info,Node **x,Mat J,Mat B,THI thi) 959 { 960 PetscInt xs,ys,xm,ym,i,j,q,l,ll; 961 PetscReal hx,hy; 962 PrmNode **prm; 963 964 PetscFunctionBeginUser; 965 xs = info->ys; 966 ys = info->xs; 967 xm = info->ym; 968 ym = info->xm; 969 hx = thi->Lx / info->my; 970 hy = thi->Ly / info->mx; 971 972 PetscCall(MatZeroEntries(B)); 973 PetscCall(THIDAGetPrm(info->da,&prm)); 974 975 for (i=xs; i<xs+xm; i++) { 976 for (j=ys; j<ys+ym; j++) { 977 Node n[4]; 978 PrmNode pn[4]; 979 PetscScalar Ke[4*2][4*2]; 980 QuadExtract(prm,i,j,pn); 981 QuadExtract(x,i,j,n); 982 PetscCall(PetscMemzero(Ke,sizeof(Ke))); 983 for (q=0; q<4; q++) { 984 PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2; 985 PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0; 986 for (l=0; l<4; l++) { 987 phi[l] = QuadQInterp[q][l]; 988 dphi[l][0] = QuadQDeriv[q][l][0]*2./hx; 989 dphi[l][1] = QuadQDeriv[q][l][1]*2./hy; 990 h += phi[l] * pn[l].h; 991 rbeta2 += phi[l] * pn[l].beta2; 992 } 993 jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */ 994 PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 995 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 996 for (l=0; l<4; l++) { 997 const PetscReal pp = phi[l],*dp = dphi[l]; 998 for (ll=0; ll<4; ll++) { 999 const PetscReal ppl = phi[ll],*dpl = dphi[ll]; 1000 PetscScalar dgdu,dgdv; 1001 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1]; 1002 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0]; 1003 /* Picard part */ 1004 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1005 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1006 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1007 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale; 1008 /* extra Newton terms */ 1009 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale; 1010 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale; 1011 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale; 1012 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale; 1013 } 1014 } 1015 } 1016 { 1017 const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}}; 1018 PetscCall(MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES)); 1019 } 1020 } 1021 } 1022 PetscCall(THIDARestorePrm(info->da,&prm)); 1023 1024 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1025 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1026 PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 1027 if (thi->verbose) PetscCall(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD)); 1028 PetscFunctionReturn(0); 1029 } 1030 1031 static PetscErrorCode THIJacobianLocal_3D(DMDALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode) 1032 { 1033 PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll; 1034 PetscReal hx,hy; 1035 PrmNode **prm; 1036 1037 PetscFunctionBeginUser; 1038 xs = info->zs; 1039 ys = info->ys; 1040 xm = info->zm; 1041 ym = info->ym; 1042 zm = info->xm; 1043 hx = thi->Lx / info->mz; 1044 hy = thi->Ly / info->my; 1045 1046 PetscCall(MatZeroEntries(B)); 1047 PetscCall(MatSetOption(B,MAT_SUBSET_OFF_PROC_ENTRIES,PETSC_TRUE)); 1048 PetscCall(THIDAGetPrm(info->da,&prm)); 1049 1050 for (i=xs; i<xs+xm; i++) { 1051 for (j=ys; j<ys+ym; j++) { 1052 PrmNode pn[4]; 1053 QuadExtract(prm,i,j,pn); 1054 for (k=0; k<zm-1; k++) { 1055 Node n[8]; 1056 PetscReal zn[8],etabase = 0; 1057 PetscScalar Ke[8*2][8*2]; 1058 PetscInt ls = 0; 1059 1060 PrmHexGetZ(pn,k,zm,zn); 1061 HexExtract(x,i,j,k,n); 1062 PetscCall(PetscMemzero(Ke,sizeof(Ke))); 1063 if (thi->no_slip && k == 0) { 1064 for (l=0; l<4; l++) n[l].u = n[l].v = 0; 1065 ls = 4; 1066 } 1067 for (q=0; q<8; q++) { 1068 PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta; 1069 PetscScalar du[3],dv[3],u,v; 1070 HexGrad(HexQDeriv[q],zn,dz); 1071 HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw); 1072 PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta); 1073 jw /= thi->rhog; /* residuals are scaled by this factor */ 1074 if (q == 0) etabase = eta; 1075 for (l=ls; l<8; l++) { /* test functions */ 1076 const PetscReal *PETSC_RESTRICT dp = dphi[l]; 1077 #if USE_SSE2_KERNELS 1078 /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */ 1079 __m128d 1080 p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5), 1081 p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)), 1082 du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]), 1083 dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]), 1084 jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta), 1085 dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]), 1086 dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta), 1087 p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */ 1088 p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */ 1089 pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */ 1090 du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */ 1091 t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */ 1092 t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */ 1093 1094 #endif 1095 #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */ 1096 for (ll=ls; ll<8; ll++) { /* trial functions */ 1097 #else 1098 for (ll=l; ll<8; ll++) { 1099 #endif 1100 const PetscReal *PETSC_RESTRICT dpl = dphi[ll]; 1101 if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */ 1102 #if !USE_SSE2_KERNELS 1103 /* The analytic Jacobian in nice, easy-to-read form */ 1104 { 1105 PetscScalar dgdu,dgdv; 1106 dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2]; 1107 dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2]; 1108 /* Picard part */ 1109 Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2]; 1110 Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0]; 1111 Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1]; 1112 Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2]; 1113 /* extra Newton terms */ 1114 Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2]; 1115 Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2]; 1116 Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2]; 1117 Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2]; 1118 } 1119 #else 1120 /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed 1121 * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost 1122 * by 25 to 30 percent. */ 1123 { 1124 __m128d 1125 keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]), 1126 kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]), 1127 dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]), 1128 t0,t3,pdgduv; 1129 keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01), 1130 _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10), 1131 _mm_mul_pd(dp2jweta,dpl2)))); 1132 kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01), 1133 _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10), 1134 _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1)))))); 1135 pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)), 1136 _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))), 1137 _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10), 1138 _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */ 1139 t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */ 1140 t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */ 1141 _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0), 1142 _mm_add_pd(_mm_mul_pd(dp1,t3), 1143 _mm_mul_pd(t0,_mm_mul_pd(dp2,du2)))))); 1144 _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0), 1145 _mm_add_pd(_mm_mul_pd(dp0,t3), 1146 _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2)))))); 1147 } 1148 #endif 1149 } 1150 } 1151 } 1152 if (k == 0) { /* on a bottom face */ 1153 if (thi->no_slip) { 1154 const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1); 1155 const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx); 1156 Ke[0][0] = thi->dirichlet_scale*diagu; 1157 Ke[1][1] = thi->dirichlet_scale*diagv; 1158 } else { 1159 for (q=0; q<4; q++) { 1160 const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q]; 1161 PetscScalar u =0,v=0,rbeta2=0; 1162 PetscReal beta2,dbeta2; 1163 for (l=0; l<4; l++) { 1164 u += phi[l]*n[l].u; 1165 v += phi[l]*n[l].v; 1166 rbeta2 += phi[l]*pn[l].beta2; 1167 } 1168 THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2); 1169 for (l=0; l<4; l++) { 1170 const PetscReal pp = phi[l]; 1171 for (ll=0; ll<4; ll++) { 1172 const PetscReal ppl = phi[ll]; 1173 Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl; 1174 Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl; 1175 Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl; 1176 Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl; 1177 } 1178 } 1179 } 1180 } 1181 } 1182 { 1183 const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}}; 1184 if (amode == THIASSEMBLY_TRIDIAGONAL) { 1185 for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */ 1186 const PetscInt l4 = l+4; 1187 const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}}; 1188 #if defined COMPUTE_LOWER_TRIANGULAR 1189 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1190 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1191 {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1192 {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1193 #else 1194 /* Same as above except for the lower-left block */ 1195 const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]}, 1196 {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]}, 1197 {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]}, 1198 {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}}; 1199 #endif 1200 PetscCall(MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES)); 1201 } 1202 } else { 1203 #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */ 1204 for (l=0; l<8; l++) { 1205 for (ll=l+1; ll<8; ll++) { 1206 Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0]; 1207 Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1]; 1208 Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0]; 1209 Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1]; 1210 } 1211 } 1212 #endif 1213 PetscCall(MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES)); 1214 } 1215 } 1216 } 1217 } 1218 } 1219 PetscCall(THIDARestorePrm(info->da,&prm)); 1220 1221 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1222 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1223 PetscCall(MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE)); 1224 if (thi->verbose) PetscCall(THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD)); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode THIJacobianLocal_3D_Full(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1229 { 1230 PetscFunctionBeginUser; 1231 PetscCall(THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL)); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DMDALocalInfo *info,Node ***x,Mat A,Mat B,THI thi) 1236 { 1237 PetscFunctionBeginUser; 1238 PetscCall(THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL)); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 static PetscErrorCode DMRefineHierarchy_THI(DM dac0,PetscInt nlevels,DM hierarchy[]) 1243 { 1244 THI thi; 1245 PetscInt dim,M,N,m,n,s,dof; 1246 DM dac,daf; 1247 DMDAStencilType st; 1248 DM_DA *ddf,*ddc; 1249 1250 PetscFunctionBeginUser; 1251 PetscCall(PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi)); 1252 PetscCheck(thi,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot refine this DMDA, missing composed THI instance"); 1253 if (nlevels > 1) { 1254 PetscCall(DMRefineHierarchy(dac0,nlevels-1,hierarchy)); 1255 dac = hierarchy[nlevels-2]; 1256 } else { 1257 dac = dac0; 1258 } 1259 PetscCall(DMDAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,0,0,&st)); 1260 PetscCheck(dim == 2,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"This function can only refine 2D DMDAs"); 1261 1262 /* Creates a 3D DMDA with the same map-plane layout as the 2D one, with contiguous columns */ 1263 PetscCall(DMDACreate3d(PetscObjectComm((PetscObject)dac),DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf)); 1264 PetscCall(DMSetUp(daf)); 1265 1266 daf->ops->creatematrix = dac->ops->creatematrix; 1267 daf->ops->createinterpolation = dac->ops->createinterpolation; 1268 daf->ops->getcoloring = dac->ops->getcoloring; 1269 ddf = (DM_DA*)daf->data; 1270 ddc = (DM_DA*)dac->data; 1271 ddf->interptype = ddc->interptype; 1272 1273 PetscCall(DMDASetFieldName(daf,0,"x-velocity")); 1274 PetscCall(DMDASetFieldName(daf,1,"y-velocity")); 1275 1276 hierarchy[nlevels-1] = daf; 1277 PetscFunctionReturn(0); 1278 } 1279 1280 static PetscErrorCode DMCreateInterpolation_DA_THI(DM dac,DM daf,Mat *A,Vec *scale) 1281 { 1282 PetscInt dim; 1283 1284 PetscFunctionBeginUser; 1285 PetscValidHeaderSpecific(dac,DM_CLASSID,1); 1286 PetscValidHeaderSpecific(daf,DM_CLASSID,2); 1287 PetscValidPointer(A,3); 1288 if (scale) PetscValidPointer(scale,4); 1289 PetscCall(DMDAGetInfo(daf,&dim,0,0,0,0,0,0,0,0,0,0,0,0)); 1290 if (dim == 2) { 1291 /* We are in the 2D problem and use normal DMDA interpolation */ 1292 PetscCall(DMCreateInterpolation(dac,daf,A,scale)); 1293 } else { 1294 PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart; 1295 Mat B; 1296 1297 PetscCall(DMDAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 1298 PetscCall(DMDAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm)); 1299 PetscCheck(!zs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"unexpected"); 1300 PetscCall(MatCreate(PetscObjectComm((PetscObject)daf),&B)); 1301 PetscCall(MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my)); 1302 1303 PetscCall(MatSetType(B,MATAIJ)); 1304 PetscCall(MatSeqAIJSetPreallocation(B,1,NULL)); 1305 PetscCall(MatMPIAIJSetPreallocation(B,1,NULL,0,NULL)); 1306 PetscCall(MatGetOwnershipRange(B,&rstart,NULL)); 1307 PetscCall(MatGetOwnershipRangeColumn(B,&cstart,NULL)); 1308 for (i=xs; i<xs+xm; i++) { 1309 for (j=ys; j<ys+ym; j++) { 1310 for (k=zs; k<zs+zm; k++) { 1311 PetscInt i2 = i*ym+j,i3 = i2*zm+k; 1312 PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */ 1313 PetscCall(MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES)); 1314 } 1315 } 1316 } 1317 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 1318 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 1319 PetscCall(MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A)); 1320 PetscCall(MatDestroy(&B)); 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 static PetscErrorCode DMCreateMatrix_THI_Tridiagonal(DM da,Mat *J) 1326 { 1327 Mat A; 1328 PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3]; 1329 ISLocalToGlobalMapping ltog; 1330 1331 PetscFunctionBeginUser; 1332 PetscCall(DMDAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0,0,0)); 1333 PetscCheck(dim == 3,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Expected DMDA to be 3D"); 1334 PetscCall(DMDAGetCorners(da,0,0,0,&zm,&ym,&xm)); 1335 PetscCall(DMGetLocalToGlobalMapping(da,<og)); 1336 PetscCall(MatCreate(PetscObjectComm((PetscObject)da),&A)); 1337 PetscCall(MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE)); 1338 PetscCall(MatSetType(A,da->mattype)); 1339 PetscCall(MatSetFromOptions(A)); 1340 PetscCall(MatSeqAIJSetPreallocation(A,3*2,NULL)); 1341 PetscCall(MatMPIAIJSetPreallocation(A,3*2,NULL,0,NULL)); 1342 PetscCall(MatSeqBAIJSetPreallocation(A,2,3,NULL)); 1343 PetscCall(MatMPIBAIJSetPreallocation(A,2,3,NULL,0,NULL)); 1344 PetscCall(MatSeqSBAIJSetPreallocation(A,2,2,NULL)); 1345 PetscCall(MatMPISBAIJSetPreallocation(A,2,2,NULL,0,NULL)); 1346 PetscCall(MatSetLocalToGlobalMapping(A,ltog,ltog)); 1347 PetscCall(DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2])); 1348 PetscCall(MatSetStencil(A,dim,dims,starts,dof)); 1349 *J = A; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DM da,Vec X,const char filename[]) 1354 { 1355 const PetscInt dof = 2; 1356 Units units = thi->units; 1357 MPI_Comm comm; 1358 PetscViewer viewer; 1359 PetscMPIInt rank,size,tag,nn,nmax; 1360 PetscInt mx,my,mz,r,range[6]; 1361 const PetscScalar *x; 1362 1363 PetscFunctionBeginUser; 1364 PetscCall(PetscObjectGetComm((PetscObject)thi,&comm)); 1365 PetscCall(DMDAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0,0,0)); 1366 PetscCallMPI(MPI_Comm_size(comm,&size)); 1367 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 1368 PetscCall(PetscViewerASCIIOpen(comm,filename,&viewer)); 1369 PetscCall(PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n")); 1370 PetscCall(PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %D %d %D %d %D\">\n",0,mz-1,0,my-1,0,mx-1)); 1371 1372 PetscCall(DMDAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5)); 1373 PetscCall(PetscMPIIntCast(range[3]*range[4]*range[5]*dof,&nn)); 1374 PetscCallMPI(MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm)); 1375 tag = ((PetscObject) viewer)->tag; 1376 PetscCall(VecGetArrayRead(X,&x)); 1377 if (rank == 0) { 1378 PetscScalar *array; 1379 PetscCall(PetscMalloc1(nmax,&array)); 1380 for (r=0; r<size; r++) { 1381 PetscInt i,j,k,xs,xm,ys,ym,zs,zm; 1382 const PetscScalar *ptr; 1383 MPI_Status status; 1384 if (r) { 1385 PetscCallMPI(MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE)); 1386 } 1387 zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5]; 1388 PetscCheck(xm*ym*zm*dof <= nmax,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1389 if (r) { 1390 PetscCallMPI(MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status)); 1391 PetscCallMPI(MPI_Get_count(&status,MPIU_SCALAR,&nn)); 1392 PetscCheck(nn == xm*ym*zm*dof,PETSC_COMM_SELF,PETSC_ERR_PLIB,"should not happen"); 1393 ptr = array; 1394 } else ptr = x; 1395 PetscCall(PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%D %D %D %D %D %D\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1)); 1396 1397 PetscCall(PetscViewerASCIIPrintf(viewer," <Points>\n")); 1398 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1399 for (i=xs; i<xs+xm; i++) { 1400 for (j=ys; j<ys+ym; j++) { 1401 for (k=zs; k<zs+zm; k++) { 1402 PrmNode p; 1403 PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz; 1404 thi->initialize(thi,xx,yy,&p); 1405 zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1); 1406 PetscCall(PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)xx,(double)yy,(double)zz)); 1407 } 1408 } 1409 } 1410 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1411 PetscCall(PetscViewerASCIIPrintf(viewer," </Points>\n")); 1412 1413 PetscCall(PetscViewerASCIIPrintf(viewer," <PointData>\n")); 1414 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n")); 1415 for (i=0; i<nn; i+=dof) { 1416 PetscCall(PetscViewerASCIIPrintf(viewer,"%f %f %f\n",(double)(PetscRealPart(ptr[i])*units->year/units->meter),(double)(PetscRealPart(ptr[i+1])*units->year/units->meter),0.0)); 1417 } 1418 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1419 1420 PetscCall(PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n")); 1421 for (i=0; i<nn; i+=dof) { 1422 PetscCall(PetscViewerASCIIPrintf(viewer,"%D\n",r)); 1423 } 1424 PetscCall(PetscViewerASCIIPrintf(viewer," </DataArray>\n")); 1425 PetscCall(PetscViewerASCIIPrintf(viewer," </PointData>\n")); 1426 1427 PetscCall(PetscViewerASCIIPrintf(viewer," </Piece>\n")); 1428 } 1429 PetscCall(PetscFree(array)); 1430 } else { 1431 PetscCallMPI(MPI_Send(range,6,MPIU_INT,0,tag,comm)); 1432 PetscCallMPI(MPI_Send((PetscScalar*)x,nn,MPIU_SCALAR,0,tag,comm)); 1433 } 1434 PetscCall(VecRestoreArrayRead(X,&x)); 1435 PetscCall(PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n")); 1436 PetscCall(PetscViewerASCIIPrintf(viewer,"</VTKFile>\n")); 1437 PetscCall(PetscViewerDestroy(&viewer)); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 int main(int argc,char *argv[]) 1442 { 1443 MPI_Comm comm; 1444 THI thi; 1445 PetscErrorCode ierr; 1446 DM da; 1447 SNES snes; 1448 1449 PetscCall(PetscInitialize(&argc,&argv,0,help)); 1450 comm = PETSC_COMM_WORLD; 1451 1452 PetscCall(THICreate(comm,&thi)); 1453 { 1454 PetscInt M = 3,N = 3,P = 2; 1455 ierr = PetscOptionsBegin(comm,NULL,"Grid resolution options","");PetscCall(ierr); 1456 { 1457 PetscCall(PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL)); 1458 N = M; 1459 PetscCall(PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL)); 1460 if (thi->coarse2d) { 1461 PetscCall(PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL)); 1462 } else { 1463 PetscCall(PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL)); 1464 } 1465 } 1466 ierr = PetscOptionsEnd();PetscCall(ierr); 1467 if (thi->coarse2d) { 1468 PetscCall(DMDACreate2d(comm,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC,DMDA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da)); 1469 PetscCall(DMSetFromOptions(da)); 1470 PetscCall(DMSetUp(da)); 1471 da->ops->refinehierarchy = DMRefineHierarchy_THI; 1472 da->ops->createinterpolation = DMCreateInterpolation_DA_THI; 1473 1474 PetscCall(PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi)); 1475 } else { 1476 PetscCall(DMDACreate3d(comm,DM_BOUNDARY_NONE,DM_BOUNDARY_PERIODIC,DM_BOUNDARY_PERIODIC, DMDA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da)); 1477 PetscCall(DMSetFromOptions(da)); 1478 PetscCall(DMSetUp(da)); 1479 } 1480 PetscCall(DMDASetFieldName(da,0,"x-velocity")); 1481 PetscCall(DMDASetFieldName(da,1,"y-velocity")); 1482 } 1483 PetscCall(THISetUpDM(thi,da)); 1484 if (thi->tridiagonal) da->ops->creatematrix = DMCreateMatrix_THI_Tridiagonal; 1485 1486 { /* Set the fine level matrix type if -da_refine */ 1487 PetscInt rlevel,clevel; 1488 PetscCall(DMGetRefineLevel(da,&rlevel)); 1489 PetscCall(DMGetCoarsenLevel(da,&clevel)); 1490 if (rlevel - clevel > 0) PetscCall(DMSetMatType(da,thi->mattype)); 1491 } 1492 1493 PetscCall(DMDASNESSetFunctionLocal(da,ADD_VALUES,(DMDASNESFunction)THIFunctionLocal,thi)); 1494 if (thi->tridiagonal) { 1495 PetscCall(DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Tridiagonal,thi)); 1496 } else { 1497 PetscCall(DMDASNESSetJacobianLocal(da,(DMDASNESJacobian)THIJacobianLocal_3D_Full,thi)); 1498 } 1499 PetscCall(DMCoarsenHookAdd(da,DMCoarsenHook_THI,NULL,thi)); 1500 PetscCall(DMRefineHookAdd(da,DMRefineHook_THI,NULL,thi)); 1501 1502 PetscCall(DMSetApplicationContext(da,thi)); 1503 1504 PetscCall(SNESCreate(comm,&snes)); 1505 PetscCall(SNESSetDM(snes,da)); 1506 PetscCall(DMDestroy(&da)); 1507 PetscCall(SNESSetComputeInitialGuess(snes,THIInitial,NULL)); 1508 PetscCall(SNESSetFromOptions(snes)); 1509 1510 PetscCall(SNESSolve(snes,NULL,NULL)); 1511 1512 PetscCall(THISolveStatistics(thi,snes,0,"Full")); 1513 1514 { 1515 PetscBool flg; 1516 char filename[PETSC_MAX_PATH_LEN] = ""; 1517 PetscCall(PetscOptionsGetString(NULL,NULL,"-o",filename,sizeof(filename),&flg)); 1518 if (flg) { 1519 Vec X; 1520 DM dm; 1521 PetscCall(SNESGetSolution(snes,&X)); 1522 PetscCall(SNESGetDM(snes,&dm)); 1523 PetscCall(THIDAVecView_VTK_XML(thi,dm,X,filename)); 1524 } 1525 } 1526 1527 PetscCall(DMDestroy(&da)); 1528 PetscCall(SNESDestroy(&snes)); 1529 PetscCall(THIDestroy(&thi)); 1530 PetscCall(PetscFinalize()); 1531 return 0; 1532 } 1533 1534 /*TEST 1535 1536 build: 1537 requires: !single 1538 1539 test: 1540 args: -M 6 -P 4 -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type icc 1541 1542 test: 1543 suffix: 2 1544 nsize: 2 1545 args: -M 6 -P 4 -thi_hom z -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type sbaij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 6 -mg_levels_0_pc_type redundant -snes_grid_sequence 1 -mat_partitioning_type current -ksp_atol -1 1546 1547 test: 1548 suffix: 3 1549 nsize: 3 1550 args: -M 7 -P 4 -thi_hom z -da_refine 1 -snes_monitor_short -snes_converged_reason -ksp_monitor_short -ksp_converged_reason -thi_mat_type baij -ksp_type fgmres -pc_type mg -pc_mg_type full -mg_levels_pc_asm_type restrict -mg_levels_pc_type asm -mg_levels_pc_asm_blocks 9 -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mat_partitioning_type current 1551 1552 test: 1553 suffix: 4 1554 nsize: 6 1555 args: -M 4 -P 2 -da_refine_hierarchy_x 1,1,3 -da_refine_hierarchy_y 2,2,1 -da_refine_hierarchy_z 2,2,1 -snes_grid_sequence 3 -ksp_converged_reason -ksp_type fgmres -ksp_rtol 1e-2 -pc_type mg -mg_levels_ksp_type gmres -mg_levels_ksp_max_it 1 -mg_levels_pc_type bjacobi -mg_levels_1_sub_pc_type cholesky -pc_mg_type multiplicative -snes_converged_reason -snes_stol 1e-12 -thi_L 80e3 -thi_alpha 0.05 -thi_friction_m 1 -thi_hom x -snes_view -mg_levels_0_pc_type redundant -mg_levels_0_ksp_type preonly -ksp_atol -1 1556 1557 test: 1558 suffix: 5 1559 nsize: 6 1560 args: -M 12 -P 5 -snes_monitor_short -ksp_converged_reason -pc_type asm -pc_asm_type restrict -dm_mat_type {{aij baij sbaij}} 1561 1562 TEST*/ 1563