1 /* Portions of this code are under: 2 Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved. 3 */ 4 static char help[] = "3D tensor hexahedra & 3D Laplacian displacement finite element formulation\n\ 5 of linear elasticity. E=1.0, nu=1/3.\n\ 6 Unit cube domain with Dirichlet boundary\n\n"; 7 8 #include <petscdmplex.h> 9 #include <petscsnes.h> 10 #include <petscds.h> 11 #include <petscdmforest.h> 12 13 static PetscReal s_soft_alpha = 0.01; 14 static PetscReal s_mu = 0.4; 15 static PetscReal s_lambda = 0.4; 16 17 static void f0_bd_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 18 { 19 f0[0] = 1; /* x direction pull */ 20 f0[1] = -x[2]; /* add a twist around x-axis */ 21 f0[2] = x[1]; 22 } 23 24 static void f1_bd_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 25 { 26 const PetscInt Ncomp = dim; 27 PetscInt comp, d; 28 for (comp = 0; comp < Ncomp; ++comp) { 29 for (d = 0; d < dim; ++d) f1[comp * dim + d] = 0.0; 30 } 31 } 32 33 /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */ 34 static void f1_u_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 35 { 36 PetscReal trace, mu = s_mu, lambda = s_lambda, rad; 37 PetscInt i, j; 38 for (i = 0, rad = 0.; i < dim; i++) { 39 PetscReal t = x[i]; 40 rad += t * t; 41 } 42 rad = PetscSqrtReal(rad); 43 if (rad > 0.25) { 44 mu *= s_soft_alpha; 45 lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */ 46 } 47 for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]); 48 for (i = 0; i < dim; ++i) { 49 for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]); 50 f1[i * dim + i] += lambda * trace; 51 } 52 } 53 54 /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */ 55 static void f1_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 56 { 57 PetscReal trace, mu = s_mu, lambda = s_lambda; 58 PetscInt i, j; 59 for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]); 60 for (i = 0; i < dim; ++i) { 61 for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]); 62 f1[i * dim + i] += lambda * trace; 63 } 64 } 65 66 static void f1_u_lap(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 67 { 68 PetscInt d; 69 for (d = 0; d < dim; ++d) f1[d] = u_x[d]; 70 } 71 72 /* 3D elasticity */ 73 #define IDX(ii, jj, kk, ll) (27 * ii + 9 * jj + 3 * kk + ll) 74 75 void g3_uu_3d_private(PetscScalar g3[], const PetscReal mu, const PetscReal lambda) 76 { 77 if (1) { 78 g3[0] += lambda; 79 g3[0] += mu; 80 g3[0] += mu; 81 g3[4] += lambda; 82 g3[8] += lambda; 83 g3[10] += mu; 84 g3[12] += mu; 85 g3[20] += mu; 86 g3[24] += mu; 87 g3[28] += mu; 88 g3[30] += mu; 89 g3[36] += lambda; 90 g3[40] += lambda; 91 g3[40] += mu; 92 g3[40] += mu; 93 g3[44] += lambda; 94 g3[50] += mu; 95 g3[52] += mu; 96 g3[56] += mu; 97 g3[60] += mu; 98 g3[68] += mu; 99 g3[70] += mu; 100 g3[72] += lambda; 101 g3[76] += lambda; 102 g3[80] += lambda; 103 g3[80] += mu; 104 g3[80] += mu; 105 } else { 106 int i, j, k, l; 107 static int cc = -1; 108 cc++; 109 for (i = 0; i < 3; ++i) { 110 for (j = 0; j < 3; ++j) { 111 for (k = 0; k < 3; ++k) { 112 for (l = 0; l < 3; ++l) { 113 if (k == l && i == j) g3[IDX(i, j, k, l)] += lambda; 114 if (i == k && j == l) g3[IDX(i, j, k, l)] += mu; 115 if (i == l && j == k) g3[IDX(i, j, k, l)] += mu; 116 if (k == l && i == j && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += lambda;\n", IDX(i, j, k, l)); 117 if (i == k && j == l && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l)); 118 if (i == l && j == k && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l)); 119 } 120 } 121 } 122 } 123 } 124 } 125 126 static void g3_uu_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 127 { 128 PetscReal mu = s_mu, lambda = s_lambda, rad; 129 PetscInt i; 130 for (i = 0, rad = 0.; i < dim; i++) { 131 PetscReal t = x[i]; 132 rad += t * t; 133 } 134 rad = PetscSqrtReal(rad); 135 if (rad > 0.25) { 136 mu *= s_soft_alpha; 137 lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */ 138 } 139 g3_uu_3d_private(g3, mu, lambda); 140 } 141 142 static void g3_uu_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 143 { 144 g3_uu_3d_private(g3, s_mu, s_lambda); 145 } 146 147 static void g3_lap(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 148 { 149 PetscInt d; 150 for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0; 151 } 152 153 static void g3_lap_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 154 { 155 PetscReal lambda = 1, rad; 156 PetscInt i; 157 for (i = 0, rad = 0.; i < dim; i++) { 158 PetscReal t = x[i]; 159 rad += t * t; 160 } 161 rad = PetscSqrtReal(rad); 162 if (rad > 0.25) { lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */ } 163 for (int d = 0; d < dim; ++d) g3[d * dim + d] = lambda; 164 } 165 166 static void f0_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 167 { 168 const PetscInt Ncomp = dim; 169 PetscInt comp; 170 171 for (comp = 0; comp < Ncomp; ++comp) f0[comp] = 0.0; 172 } 173 174 /* PI_i (x_i^4 - x_i^2) */ 175 static void f0_u_x4(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 176 { 177 for (int comp = 0; comp < Nf; ++comp) { 178 f0[comp] = 1e5; 179 for (int i = 0; i < dim; ++i) { f0[comp] *= /* (comp+1)* */ (x[i] * x[i] * x[i] * x[i] - x[i] * x[i]); /* assumes (0,1]^D domain */ } 180 } 181 } 182 183 PetscErrorCode zero(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf, PetscScalar *u, void *ctx) 184 { 185 const PetscInt Ncomp = dim; 186 PetscInt comp; 187 188 for (comp = 0; comp < Ncomp; ++comp) u[comp] = 0; 189 return PETSC_SUCCESS; 190 } 191 192 int main(int argc, char **args) 193 { 194 Mat Amat; 195 SNES snes; 196 KSP ksp; 197 MPI_Comm comm; 198 PetscMPIInt rank; 199 PetscLogStage stage[17]; 200 PetscBool test_nonzero_cols = PETSC_FALSE, use_nearnullspace = PETSC_TRUE, attach_nearnullspace = PETSC_FALSE; 201 Vec xx, bb; 202 PetscInt iter, i, N, dim = 3, max_conv_its, sizes[7], run_type = 1, Ncomp = dim; 203 DM dm; 204 PetscBool flg; 205 PetscReal Lx, mdisp[10], err[10]; 206 207 PetscFunctionBeginUser; 208 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 209 comm = PETSC_COMM_WORLD; 210 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 211 /* options */ 212 PetscOptionsBegin(comm, NULL, "3D bilinear Q1 elasticity options", ""); 213 { 214 Lx = 1.; /* or ne for rod */ 215 max_conv_its = 3; 216 PetscCall(PetscOptionsInt("-max_conv_its", "Number of iterations in convergence study", "", max_conv_its, &max_conv_its, NULL)); 217 PetscCheck(max_conv_its > 0 && max_conv_its < 8, PETSC_COMM_WORLD, PETSC_ERR_USER, "Bad number of iterations for convergence test (%" PetscInt_FMT ")", max_conv_its); 218 PetscCall(PetscOptionsReal("-lx", "Length of domain", "", Lx, &Lx, NULL)); 219 PetscCall(PetscOptionsReal("-alpha", "material coefficient inside circle", "", s_soft_alpha, &s_soft_alpha, NULL)); 220 PetscCall(PetscOptionsBool("-test_nonzero_cols", "nonzero test", "", test_nonzero_cols, &test_nonzero_cols, NULL)); 221 PetscCall(PetscOptionsBool("-use_mat_nearnullspace", "MatNearNullSpace API test", "", use_nearnullspace, &use_nearnullspace, NULL)); 222 PetscCall(PetscOptionsBool("-attach_mat_nearnullspace", "MatNearNullSpace API test (via MatSetNearNullSpace)", "", attach_nearnullspace, &attach_nearnullspace, NULL)); 223 PetscCall(PetscOptionsInt("-run_type", "0: twisting load on cantalever, 1: Elasticty convergence test on cube, 2: Laplacian, 3: soft core Laplacian", "", run_type, &run_type, NULL)); 224 } 225 PetscOptionsEnd(); 226 PetscCall(PetscLogStageRegister("Mesh Setup", &stage[16])); 227 for (iter = 0; iter < max_conv_its; iter++) { 228 char str[] = "Solve 0"; 229 str[6] += iter; 230 PetscCall(PetscLogStageRegister(str, &stage[iter])); 231 } 232 /* create DM, Plex calls DMSetup */ 233 PetscCall(PetscLogStagePush(stage[16])); 234 PetscCall(DMCreate(comm, &dm)); 235 PetscCall(DMSetType(dm, DMPLEX)); 236 PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh")); 237 PetscCall(DMSetFromOptions(dm)); 238 PetscCall(DMPlexDistributeSetDefault(dm, PETSC_FALSE)); 239 PetscCall(DMGetDimension(dm, &dim)); 240 { 241 DMLabel label; 242 IS is; 243 PetscCall(DMCreateLabel(dm, "boundary")); 244 PetscCall(DMGetLabel(dm, "boundary", &label)); 245 PetscCall(DMPlexMarkBoundaryFaces(dm, 1, label)); 246 if (run_type == 0) { 247 PetscCall(DMGetStratumIS(dm, "boundary", 1, &is)); 248 PetscCall(DMCreateLabel(dm, "Faces")); 249 if (is) { 250 PetscInt d, f, Nf; 251 const PetscInt *faces; 252 PetscInt csize; 253 PetscSection cs; 254 Vec coordinates; 255 DM cdm; 256 PetscCall(ISGetLocalSize(is, &Nf)); 257 PetscCall(ISGetIndices(is, &faces)); 258 PetscCall(DMGetCoordinatesLocal(dm, &coordinates)); 259 PetscCall(DMGetCoordinateDM(dm, &cdm)); 260 PetscCall(DMGetLocalSection(cdm, &cs)); 261 /* Check for each boundary face if any component of its centroid is either 0.0 or 1.0 */ 262 for (f = 0; f < Nf; ++f) { 263 PetscReal faceCoord; 264 PetscInt b, v; 265 PetscScalar *coords = NULL; 266 PetscInt Nv; 267 PetscCall(DMPlexVecGetClosure(cdm, cs, coordinates, faces[f], &csize, &coords)); 268 Nv = csize / dim; /* Calculate mean coordinate vector */ 269 for (d = 0; d < dim; ++d) { 270 faceCoord = 0.0; 271 for (v = 0; v < Nv; ++v) faceCoord += PetscRealPart(coords[v * dim + d]); 272 faceCoord /= Nv; 273 for (b = 0; b < 2; ++b) { 274 if (PetscAbs(faceCoord - b) < PETSC_SMALL) { /* domain have not been set yet, still [0,1]^3 */ 275 PetscCall(DMSetLabelValue(dm, "Faces", faces[f], d * 2 + b + 1)); 276 } 277 } 278 } 279 PetscCall(DMPlexVecRestoreClosure(cdm, cs, coordinates, faces[f], &csize, &coords)); 280 } 281 PetscCall(ISRestoreIndices(is, &faces)); 282 } 283 PetscCall(ISDestroy(&is)); 284 PetscCall(DMGetLabel(dm, "Faces", &label)); 285 PetscCall(DMPlexLabelComplete(dm, label)); 286 } 287 } 288 PetscCall(PetscLogStagePop()); 289 for (iter = 0; iter < max_conv_its; iter++) { 290 PetscCall(PetscLogStagePush(stage[16])); 291 /* snes */ 292 PetscCall(SNESCreate(comm, &snes)); 293 PetscCall(SNESSetDM(snes, dm)); 294 PetscCall(DMViewFromOptions(dm, NULL, "-dm_view")); 295 /* fem */ 296 { 297 const PetscInt components[] = {0, 1, 2}; 298 const PetscInt Nfid = 1, Npid = 1; 299 PetscInt fid[] = {1}; /* The fixed faces (x=0) */ 300 const PetscInt pid[] = {2}; /* The faces with loading (x=L_x) */ 301 PetscFE fe; 302 PetscDS prob; 303 DMLabel label; 304 305 if (run_type == 2 || run_type == 3) Ncomp = 1; 306 else Ncomp = dim; 307 PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, Ncomp, PETSC_FALSE, NULL, PETSC_DECIDE, &fe)); 308 PetscCall(PetscObjectSetName((PetscObject)fe, "deformation")); 309 /* FEM prob */ 310 PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe)); 311 PetscCall(DMCreateDS(dm)); 312 PetscCall(DMGetDS(dm, &prob)); 313 /* setup problem */ 314 if (run_type == 1) { // elast 315 PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d)); 316 PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_3d)); 317 } else if (run_type == 0) { //twisted not maintained 318 PetscWeakForm wf; 319 PetscInt bd, i; 320 PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d_alpha)); 321 PetscCall(PetscDSSetResidual(prob, 0, f0_u, f1_u_3d_alpha)); 322 PetscCall(DMGetLabel(dm, "Faces", &label)); 323 PetscCall(DMAddBoundary(dm, DM_BC_NATURAL, "traction", label, Npid, pid, 0, Ncomp, components, NULL, NULL, NULL, &bd)); 324 PetscCall(PetscDSGetBoundary(prob, bd, &wf, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 325 for (i = 0; i < Npid; ++i) PetscCall(PetscWeakFormSetIndexBdResidual(wf, label, pid[i], 0, 0, 0, f0_bd_u_3d, 0, f1_bd_u)); 326 } else if (run_type == 2) { // Laplacian 327 PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_lap)); 328 PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_lap)); 329 } else if (run_type == 3) { // soft core Laplacian 330 PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_lap_alpha)); 331 PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_lap)); 332 } 333 /* bcs */ 334 if (run_type != 0) { 335 PetscInt id = 1; 336 PetscCall(DMGetLabel(dm, "boundary", &label)); 337 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))zero, NULL, NULL, NULL)); 338 } else { 339 PetscCall(DMGetLabel(dm, "Faces", &label)); 340 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "fixed", label, Nfid, fid, 0, Ncomp, components, (void (*)(void))zero, NULL, NULL, NULL)); 341 } 342 PetscCall(PetscFEDestroy(&fe)); 343 } 344 /* vecs & mat */ 345 PetscCall(DMCreateGlobalVector(dm, &xx)); 346 PetscCall(VecDuplicate(xx, &bb)); 347 PetscCall(PetscObjectSetName((PetscObject)bb, "b")); 348 PetscCall(PetscObjectSetName((PetscObject)xx, "u")); 349 PetscCall(DMCreateMatrix(dm, &Amat)); 350 PetscCall(MatSetOption(Amat, MAT_SYMMETRIC, PETSC_TRUE)); /* Some matrix kernels can take advantage of symmetry if we set this. */ 351 PetscCall(MatSetOption(Amat, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); /* Inform PETSc that Amat is always symmetric, so info set above isn't lost. */ 352 PetscCall(MatSetBlockSize(Amat, Ncomp)); 353 PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE)); 354 PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE)); 355 PetscCall(VecGetSize(bb, &N)); 356 sizes[iter] = N; 357 PetscCall(PetscInfo(snes, "%" PetscInt_FMT " global equations, %" PetscInt_FMT " vertices\n", N, N / dim)); 358 if ((use_nearnullspace || attach_nearnullspace) && N / dim > 1 && Ncomp > 1) { 359 /* Set up the near null space (a.k.a. rigid body modes) that will be used by the multigrid preconditioner */ 360 DM subdm; 361 MatNullSpace nearNullSpace; 362 PetscInt fields = 0; 363 PetscObject deformation; 364 PetscCall(DMCreateSubDM(dm, 1, &fields, NULL, &subdm)); 365 PetscCall(DMPlexCreateRigidBody(subdm, 0, &nearNullSpace)); 366 PetscCall(DMGetField(dm, 0, NULL, &deformation)); 367 PetscCall(PetscObjectCompose(deformation, "nearnullspace", (PetscObject)nearNullSpace)); 368 PetscCall(DMDestroy(&subdm)); 369 if (attach_nearnullspace) PetscCall(MatSetNearNullSpace(Amat, nearNullSpace)); 370 PetscCall(MatNullSpaceDestroy(&nearNullSpace)); /* created by DM and destroyed by Mat */ 371 } 372 PetscCall(DMPlexSetSNESLocalFEM(dm, PETSC_FALSE, NULL)); 373 PetscCall(SNESSetJacobian(snes, Amat, Amat, NULL, NULL)); 374 PetscCall(SNESSetFromOptions(snes)); 375 PetscCall(DMSetUp(dm)); 376 PetscCall(PetscLogStagePop()); 377 PetscCall(PetscLogStagePush(stage[16])); 378 /* ksp */ 379 PetscCall(SNESGetKSP(snes, &ksp)); 380 PetscCall(KSPSetComputeSingularValues(ksp, PETSC_TRUE)); 381 if (!use_nearnullspace) { 382 PC pc; 383 PetscCall(KSPGetPC(ksp, &pc)); 384 PetscCall(PCGAMGASMSetHEM(pc, 3)); // code coverage 385 } 386 /* test BCs */ 387 PetscCall(VecZeroEntries(xx)); 388 if (test_nonzero_cols) { 389 if (rank == 0) PetscCall(VecSetValue(xx, 0, 1.0, INSERT_VALUES)); 390 PetscCall(VecAssemblyBegin(xx)); 391 PetscCall(VecAssemblyEnd(xx)); 392 } 393 PetscCall(VecZeroEntries(bb)); 394 PetscCall(VecGetSize(bb, &i)); 395 sizes[iter] = i; 396 PetscCall(PetscInfo(snes, "%" PetscInt_FMT " equations in vector, %" PetscInt_FMT " vertices\n", i, i / dim)); 397 PetscCall(PetscLogStagePop()); 398 /* solve */ 399 PetscCall(SNESComputeJacobian(snes, xx, Amat, Amat)); 400 PetscCall(MatViewFromOptions(Amat, NULL, "-my_mat_view")); 401 PetscCall(PetscLogStagePush(stage[iter])); 402 PetscCall(SNESSolve(snes, bb, xx)); 403 PetscCall(PetscLogStagePop()); 404 PetscCall(VecNorm(xx, NORM_INFINITY, &mdisp[iter])); 405 { 406 PetscViewer viewer = NULL; 407 PetscViewerFormat fmt; 408 PetscCall(PetscOptionsGetViewer(comm, NULL, "", "-vec_view", &viewer, &fmt, &flg)); 409 if (flg) { 410 PetscCall(PetscViewerPushFormat(viewer, fmt)); 411 PetscCall(VecView(xx, viewer)); 412 PetscCall(VecView(bb, viewer)); 413 PetscCall(PetscViewerPopFormat(viewer)); 414 } 415 PetscCall(PetscOptionsRestoreViewer(&viewer)); 416 } 417 /* Free work space */ 418 PetscCall(SNESDestroy(&snes)); 419 PetscCall(VecDestroy(&xx)); 420 PetscCall(VecDestroy(&bb)); 421 PetscCall(MatDestroy(&Amat)); 422 if (iter + 1 < max_conv_its) { 423 DM newdm; 424 PetscCall(DMViewFromOptions(dm, NULL, "-my_dm_view")); 425 PetscCall(DMRefine(dm, comm, &newdm)); 426 if (rank == -1) { 427 PetscDS prob; 428 PetscCall(DMGetDS(dm, &prob)); 429 PetscCall(PetscDSViewFromOptions(prob, NULL, "-ds_view")); 430 PetscCall(DMGetDS(newdm, &prob)); 431 PetscCall(PetscDSViewFromOptions(prob, NULL, "-ds_view")); 432 } 433 PetscCall(DMDestroy(&dm)); 434 dm = newdm; 435 PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh")); 436 PetscCall(DMViewFromOptions(dm, NULL, "-my_dm_view")); 437 PetscCall(DMSetFromOptions(dm)); 438 } 439 } 440 PetscCall(DMDestroy(&dm)); 441 if (run_type == 1) err[0] = 5.97537599375e+01 - mdisp[0]; /* error with what I think is the exact solution */ 442 else if (run_type == 0) err[0] = 0; 443 else if (run_type == 2) err[0] = 3.527795e+01 - mdisp[0]; 444 else err[0] = 0; 445 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %d) N=%12" PetscInt_FMT ", max displ=%9.7e, error=%4.3e\n", rank, 0, sizes[0], (double)mdisp[0], (double)err[0])); 446 for (iter = 1; iter < max_conv_its; iter++) { 447 if (run_type == 1) err[iter] = 5.97537599375e+01 - mdisp[iter]; 448 else if (run_type == 0) err[iter] = 0; 449 else if (run_type == 2) err[iter] = 3.527795e+01 - mdisp[iter]; 450 else err[iter] = 0; 451 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %" PetscInt_FMT ") N=%12" PetscInt_FMT ", max displ=%9.7e, disp diff=%9.2e, error=%4.3e, rate=%3.2g\n", rank, iter, sizes[iter], (double)mdisp[iter], (double)(mdisp[iter] - mdisp[iter - 1]), (double)err[iter], (double)(PetscLogReal(PetscAbs(err[iter - 1] / err[iter])) / PetscLogReal(2.)))); 452 } 453 454 PetscCall(PetscFinalize()); 455 return 0; 456 } 457 458 /*TEST 459 460 testset: 461 nsize: 4 462 requires: !single 463 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -petscspace_degree 2 -snes_max_it 1 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.001 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -my_dm_view -snes_lag_jacobian -2 -snes_type ksponly -pc_gamg_mis_k_minimum_degree_ordering true -pc_gamg_low_memory_threshold_filter 464 timeoutfactor: 2 465 test: 466 suffix: 0 467 args: -run_type 1 -max_conv_its 3 -mat_coarsen_type hem -mat_coarsen_max_it 5 -pc_gamg_asm_hem_aggs 4 -ksp_rtol 1.e-6 468 filter: sed -e "s/Linear solve converged due to CONVERGED_RTOL iterations 7/Linear solve converged due to CONVERGED_RTOL iterations 8/g" 469 test: 470 suffix: 1 471 filter: grep -v HERMITIAN 472 args: -run_type 2 -max_conv_its 2 -use_mat_nearnullspace false -snes_view 473 474 test: 475 nsize: 1 476 requires: !single 477 suffix: 2 478 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 1 -ksp_type cg -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_aggressive_coarsening 1 -ksp_converged_reason -use_mat_nearnullspace true -my_dm_view -snes_type ksponly 479 timeoutfactor: 2 480 481 # HYPRE PtAP broken with complex numbers 482 test: 483 suffix: hypre 484 requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_DEVICE) 485 nsize: 4 486 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -pc_type hypre -pc_hypre_type boomeramg -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -ksp_converged_reason -use_mat_nearnullspace true -petscpartitioner_type simple 487 488 test: 489 suffix: ml 490 requires: ml !single 491 nsize: 4 492 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type ml -mg_levels_ksp_type chebyshev -mg_levels_ksp_max_it 3 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type sor -petscpartitioner_type simple -use_mat_nearnullspace 493 494 test: 495 suffix: hpddm 496 requires: hpddm slepc !single defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES) 497 nsize: 4 498 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fgmres -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type hpddm -petscpartitioner_type simple -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 6 -pc_hpddm_coarse_p 1 -pc_hpddm_coarse_pc_type svd 499 500 test: 501 suffix: repart 502 nsize: 4 503 requires: parmetis !single 504 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 4 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-2 -ksp_norm_type unpreconditioned -snes_rtol 1.e-3 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -pc_gamg_mat_partitioning_type parmetis -pc_gamg_repartition true -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 10 -ksp_converged_reason -pc_gamg_reuse_interpolation true -petscpartitioner_type simple 505 506 test: 507 suffix: bddc 508 nsize: 4 509 requires: !single 510 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type {{sbaij baij aij}} -pc_type bddc 511 512 testset: 513 nsize: 4 514 requires: !single 515 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-10 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type aij -pc_type bddc -attach_mat_nearnullspace {{0 1}separate output} 516 test: 517 suffix: bddc_approx_gamg 518 args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop 519 # HYPRE PtAP broken with complex numbers 520 test: 521 requires: hypre !complex !defined(PETSC_HAVE_HYPRE_DEVICE) 522 suffix: bddc_approx_hypre 523 args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop -prefix_push pc_bddc_neumann_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop 524 test: 525 requires: ml 526 suffix: bddc_approx_ml 527 args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop 528 529 test: 530 suffix: fetidp 531 nsize: 4 532 requires: !single 533 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type {{sbaij baij aij}} 534 535 test: 536 suffix: bddc_elast 537 nsize: 4 538 requires: !single 539 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type sbaij -pc_type bddc -pc_bddc_monolithic -attach_mat_nearnullspace 540 541 test: 542 suffix: fetidp_elast 543 nsize: 4 544 requires: !single 545 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -matis_localmat_type sbaij -fetidp_bddc_pc_bddc_monolithic -attach_mat_nearnullspace 546 547 test: 548 suffix: gdsw 549 nsize: 4 550 requires: !single 551 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -dm_mat_type is -attach_mat_nearnullspace \ 552 -pc_type mg -pc_mg_galerkin -pc_mg_adapt_interp_coarse_space gdsw -pc_mg_levels 2 -mg_levels_pc_type bjacobi -mg_levels_sub_pc_type icc 553 554 testset: 555 nsize: 4 556 requires: !single 557 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -ksp_monitor_short -ksp_converged_reason -snes_monitor_short -dm_view -petscpartitioner_type simple -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 40 558 output_file: output/ex56_cuda.out 559 560 test: 561 suffix: cuda 562 requires: cuda 563 args: -dm_mat_type aijcusparse -dm_vec_type cuda 564 565 test: 566 suffix: hip 567 requires: hip 568 args: -dm_mat_type aijhipsparse -dm_vec_type hip 569 570 test: 571 suffix: viennacl 572 requires: viennacl 573 args: -dm_mat_type aijviennacl -dm_vec_type viennacl 574 575 test: 576 suffix: kokkos 577 requires: kokkos_kernels 578 args: -dm_mat_type aijkokkos -dm_vec_type kokkos 579 # Don't run AIJMKL caes with complex scalars because of convergence issues. 580 # Note that we need to test both single and multiple MPI rank cases, because these use different sparse MKL routines to implement the PtAP operation. 581 test: 582 suffix: seqaijmkl 583 nsize: 1 584 requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex 585 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -dm_view -mat_seqaij_type seqaijmkl 586 timeoutfactor: 2 587 588 test: 589 suffix: mpiaijmkl 590 nsize: 4 591 requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex 592 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_lower 0,0,0 -dm_plex_box_upper 1,1,1 -run_type 1 -dm_plex_box_faces 2,2,1 -petscpartitioner_simple_process_grid 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -dm_view -mat_seqaij_type seqaijmkl 593 timeoutfactor: 2 594 595 TEST*/ 596