1 /* Portions of this code are under: 2 Copyright (C) 2022 Advanced Micro Devices, Inc. All rights reserved. 3 */ 4 static char help[] = "3D, tensor hexahedra (Q1-K), displacement finite element formulation\n\ 5 of linear elasticity. E=1.0, nu=1/3.\n\ 6 Unit cube domain with Dirichlet boundary\n\n"; 7 8 #include <petscdmplex.h> 9 #include <petscsnes.h> 10 #include <petscds.h> 11 #include <petscdmforest.h> 12 13 static PetscReal s_soft_alpha = 1.e-3; 14 static PetscReal s_mu = 0.4; 15 static PetscReal s_lambda = 0.4; 16 17 static void f0_bd_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 18 { 19 f0[0] = 1; /* x direction pull */ 20 f0[1] = -x[2]; /* add a twist around x-axis */ 21 f0[2] = x[1]; 22 } 23 24 static void f1_bd_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], const PetscReal n[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 25 { 26 const PetscInt Ncomp = dim; 27 PetscInt comp, d; 28 for (comp = 0; comp < Ncomp; ++comp) { 29 for (d = 0; d < dim; ++d) f1[comp * dim + d] = 0.0; 30 } 31 } 32 33 /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */ 34 static void f1_u_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 35 { 36 PetscReal trace, mu = s_mu, lambda = s_lambda, rad; 37 PetscInt i, j; 38 for (i = 0, rad = 0.; i < dim; i++) { 39 PetscReal t = x[i]; 40 rad += t * t; 41 } 42 rad = PetscSqrtReal(rad); 43 if (rad > 0.25) { 44 mu *= s_soft_alpha; 45 lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */ 46 } 47 for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]); 48 for (i = 0; i < dim; ++i) { 49 for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]); 50 f1[i * dim + i] += lambda * trace; 51 } 52 } 53 54 /* gradU[comp*dim+d] = {u_x, u_y} or {u_x, u_y, u_z} */ 55 static void f1_u_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 56 { 57 PetscReal trace, mu = s_mu, lambda = s_lambda; 58 PetscInt i, j; 59 for (i = 0, trace = 0; i < dim; ++i) trace += PetscRealPart(u_x[i * dim + i]); 60 for (i = 0; i < dim; ++i) { 61 for (j = 0; j < dim; ++j) f1[i * dim + j] = mu * (u_x[i * dim + j] + u_x[j * dim + i]); 62 f1[i * dim + i] += lambda * trace; 63 } 64 } 65 66 /* 3D elasticity */ 67 #define IDX(ii, jj, kk, ll) (27 * ii + 9 * jj + 3 * kk + ll) 68 69 void g3_uu_3d_private(PetscScalar g3[], const PetscReal mu, const PetscReal lambda) 70 { 71 if (1) { 72 g3[0] += lambda; 73 g3[0] += mu; 74 g3[0] += mu; 75 g3[4] += lambda; 76 g3[8] += lambda; 77 g3[10] += mu; 78 g3[12] += mu; 79 g3[20] += mu; 80 g3[24] += mu; 81 g3[28] += mu; 82 g3[30] += mu; 83 g3[36] += lambda; 84 g3[40] += lambda; 85 g3[40] += mu; 86 g3[40] += mu; 87 g3[44] += lambda; 88 g3[50] += mu; 89 g3[52] += mu; 90 g3[56] += mu; 91 g3[60] += mu; 92 g3[68] += mu; 93 g3[70] += mu; 94 g3[72] += lambda; 95 g3[76] += lambda; 96 g3[80] += lambda; 97 g3[80] += mu; 98 g3[80] += mu; 99 } else { 100 int i, j, k, l; 101 static int cc = -1; 102 cc++; 103 for (i = 0; i < 3; ++i) { 104 for (j = 0; j < 3; ++j) { 105 for (k = 0; k < 3; ++k) { 106 for (l = 0; l < 3; ++l) { 107 if (k == l && i == j) g3[IDX(i, j, k, l)] += lambda; 108 if (i == k && j == l) g3[IDX(i, j, k, l)] += mu; 109 if (i == l && j == k) g3[IDX(i, j, k, l)] += mu; 110 if (k == l && i == j && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += lambda;\n", IDX(i, j, k, l)); 111 if (i == k && j == l && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l)); 112 if (i == l && j == k && !cc) (void)PetscPrintf(PETSC_COMM_WORLD, "g3[%d] += mu;\n", IDX(i, j, k, l)); 113 } 114 } 115 } 116 } 117 } 118 } 119 120 static void g3_uu_3d_alpha(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 121 { 122 PetscReal mu = s_mu, lambda = s_lambda, rad; 123 PetscInt i; 124 for (i = 0, rad = 0.; i < dim; i++) { 125 PetscReal t = x[i]; 126 rad += t * t; 127 } 128 rad = PetscSqrtReal(rad); 129 if (rad > 0.25) { 130 mu *= s_soft_alpha; 131 lambda *= s_soft_alpha; /* we could keep the bulk the same like rubberish */ 132 } 133 g3_uu_3d_private(g3, mu, lambda); 134 } 135 136 static void g3_uu_3d(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 137 { 138 g3_uu_3d_private(g3, s_mu, s_lambda); 139 } 140 141 static void f0_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 142 { 143 const PetscInt Ncomp = dim; 144 PetscInt comp; 145 146 for (comp = 0; comp < Ncomp; ++comp) f0[comp] = 0.0; 147 } 148 149 /* PI_i (x_i^4 - x_i^2) */ 150 static void f0_u_x4(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 151 { 152 const PetscInt Ncomp = dim; 153 PetscInt comp, i; 154 155 for (comp = 0; comp < Ncomp; ++comp) { 156 f0[comp] = 1e5; 157 for (i = 0; i < Ncomp; ++i) { f0[comp] *= /* (comp+1)* */ (x[i] * x[i] * x[i] * x[i] - x[i] * x[i]); /* assumes (0,1]^D domain */ } 158 } 159 } 160 161 PetscErrorCode zero(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nf, PetscScalar *u, void *ctx) 162 { 163 const PetscInt Ncomp = dim; 164 PetscInt comp; 165 166 for (comp = 0; comp < Ncomp; ++comp) u[comp] = 0; 167 return 0; 168 } 169 170 int main(int argc, char **args) 171 { 172 Mat Amat; 173 SNES snes; 174 KSP ksp; 175 MPI_Comm comm; 176 PetscMPIInt rank; 177 #if defined(PETSC_USE_LOG) 178 PetscLogStage stage[17]; 179 #endif 180 PetscBool test_nonzero_cols = PETSC_FALSE, use_nearnullspace = PETSC_TRUE, attach_nearnullspace = PETSC_FALSE; 181 Vec xx, bb; 182 PetscInt iter, i, N, dim = 3, cells[3] = {1, 1, 1}, max_conv_its, local_sizes[7], run_type = 1; 183 DM dm, distdm, basedm; 184 PetscBool flg; 185 char convType[256]; 186 PetscReal Lx, mdisp[10], err[10]; 187 const char *const options[10] = {"-ex56_dm_refine 0", "-ex56_dm_refine 1", "-ex56_dm_refine 2", "-ex56_dm_refine 3", "-ex56_dm_refine 4", "-ex56_dm_refine 5", "-ex56_dm_refine 6", "-ex56_dm_refine 7", "-ex56_dm_refine 8", "-ex56_dm_refine 9"}; 188 PetscFunctionBeginUser; 189 PetscFunctionBeginUser; 190 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 191 comm = PETSC_COMM_WORLD; 192 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 193 /* options */ 194 PetscOptionsBegin(comm, NULL, "3D bilinear Q1 elasticity options", ""); 195 { 196 i = 3; 197 PetscCall(PetscOptionsIntArray("-cells", "Number of (flux tube) processor in each dimension", "ex56.c", cells, &i, NULL)); 198 199 Lx = 1.; /* or ne for rod */ 200 max_conv_its = 3; 201 PetscCall(PetscOptionsInt("-max_conv_its", "Number of iterations in convergence study", "", max_conv_its, &max_conv_its, NULL)); 202 PetscCheck(max_conv_its > 0 && max_conv_its < 7, PETSC_COMM_WORLD, PETSC_ERR_USER, "Bad number of iterations for convergence test (%" PetscInt_FMT ")", max_conv_its); 203 PetscCall(PetscOptionsReal("-lx", "Length of domain", "", Lx, &Lx, NULL)); 204 PetscCall(PetscOptionsReal("-alpha", "material coefficient inside circle", "", s_soft_alpha, &s_soft_alpha, NULL)); 205 PetscCall(PetscOptionsBool("-test_nonzero_cols", "nonzero test", "", test_nonzero_cols, &test_nonzero_cols, NULL)); 206 PetscCall(PetscOptionsBool("-use_mat_nearnullspace", "MatNearNullSpace API test", "", use_nearnullspace, &use_nearnullspace, NULL)); 207 PetscCall(PetscOptionsBool("-attach_mat_nearnullspace", "MatNearNullSpace API test (via MatSetNearNullSpace)", "", attach_nearnullspace, &attach_nearnullspace, NULL)); 208 PetscCall(PetscOptionsInt("-run_type", "0: twisting load on cantalever, 1: 3rd order accurate convergence test", "", run_type, &run_type, NULL)); 209 } 210 PetscOptionsEnd(); 211 PetscCall(PetscLogStageRegister("Mesh Setup", &stage[16])); 212 for (iter = 0; iter < max_conv_its; iter++) { 213 char str[] = "Solve 0"; 214 str[6] += iter; 215 PetscCall(PetscLogStageRegister(str, &stage[iter])); 216 } 217 /* create DM, Plex calls DMSetup */ 218 PetscCall(PetscLogStagePush(stage[16])); 219 PetscCall(DMPlexCreateBoxMesh(comm, dim, PETSC_FALSE, cells, NULL, NULL, NULL, PETSC_TRUE, &dm)); 220 { 221 DMLabel label; 222 IS is; 223 PetscCall(DMCreateLabel(dm, "boundary")); 224 PetscCall(DMGetLabel(dm, "boundary", &label)); 225 PetscCall(DMPlexMarkBoundaryFaces(dm, 1, label)); 226 if (run_type == 0) { 227 PetscCall(DMGetStratumIS(dm, "boundary", 1, &is)); 228 PetscCall(DMCreateLabel(dm, "Faces")); 229 if (is) { 230 PetscInt d, f, Nf; 231 const PetscInt *faces; 232 PetscInt csize; 233 PetscSection cs; 234 Vec coordinates; 235 DM cdm; 236 PetscCall(ISGetLocalSize(is, &Nf)); 237 PetscCall(ISGetIndices(is, &faces)); 238 PetscCall(DMGetCoordinatesLocal(dm, &coordinates)); 239 PetscCall(DMGetCoordinateDM(dm, &cdm)); 240 PetscCall(DMGetLocalSection(cdm, &cs)); 241 /* Check for each boundary face if any component of its centroid is either 0.0 or 1.0 */ 242 for (f = 0; f < Nf; ++f) { 243 PetscReal faceCoord; 244 PetscInt b, v; 245 PetscScalar *coords = NULL; 246 PetscInt Nv; 247 PetscCall(DMPlexVecGetClosure(cdm, cs, coordinates, faces[f], &csize, &coords)); 248 Nv = csize / dim; /* Calculate mean coordinate vector */ 249 for (d = 0; d < dim; ++d) { 250 faceCoord = 0.0; 251 for (v = 0; v < Nv; ++v) faceCoord += PetscRealPart(coords[v * dim + d]); 252 faceCoord /= Nv; 253 for (b = 0; b < 2; ++b) { 254 if (PetscAbs(faceCoord - b) < PETSC_SMALL) { /* domain have not been set yet, still [0,1]^3 */ 255 PetscCall(DMSetLabelValue(dm, "Faces", faces[f], d * 2 + b + 1)); 256 } 257 } 258 } 259 PetscCall(DMPlexVecRestoreClosure(cdm, cs, coordinates, faces[f], &csize, &coords)); 260 } 261 PetscCall(ISRestoreIndices(is, &faces)); 262 } 263 PetscCall(ISDestroy(&is)); 264 PetscCall(DMGetLabel(dm, "Faces", &label)); 265 PetscCall(DMPlexLabelComplete(dm, label)); 266 } 267 } 268 { 269 PetscInt dimEmbed, i; 270 PetscInt nCoords; 271 PetscScalar *coords, bounds[] = { 272 0, 1, -.5, .5, -.5, .5, 273 }; /* x_min,x_max,y_min,y_max */ 274 Vec coordinates; 275 bounds[1] = Lx; 276 if (run_type == 1) { 277 for (i = 0; i < 2 * dim; i++) bounds[i] = (i % 2) ? 1 : 0; 278 } 279 PetscCall(DMGetCoordinatesLocal(dm, &coordinates)); 280 PetscCall(DMGetCoordinateDim(dm, &dimEmbed)); 281 PetscCheck(dimEmbed == dim, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "dimEmbed != dim %" PetscInt_FMT, dimEmbed); 282 PetscCall(VecGetLocalSize(coordinates, &nCoords)); 283 PetscCheck((nCoords % dimEmbed) == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Coordinate vector the wrong size"); 284 PetscCall(VecGetArray(coordinates, &coords)); 285 for (i = 0; i < nCoords; i += dimEmbed) { 286 PetscInt j; 287 PetscScalar *coord = &coords[i]; 288 for (j = 0; j < dimEmbed; j++) coord[j] = bounds[2 * j] + coord[j] * (bounds[2 * j + 1] - bounds[2 * j]); 289 } 290 PetscCall(VecRestoreArray(coordinates, &coords)); 291 PetscCall(DMSetCoordinatesLocal(dm, coordinates)); 292 } 293 294 /* convert to p4est, and distribute */ 295 PetscOptionsBegin(comm, "", "Mesh conversion options", "DMPLEX"); 296 PetscCall(PetscOptionsFList("-dm_type", "Convert DMPlex to another format (should not be Plex!)", "ex56.c", DMList, DMPLEX, convType, 256, &flg)); 297 PetscOptionsEnd(); 298 if (flg) { 299 DM newdm; 300 PetscCall(DMConvert(dm, convType, &newdm)); 301 if (newdm) { 302 const char *prefix; 303 PetscBool isForest; 304 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix)); 305 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)newdm, prefix)); 306 PetscCall(DMIsForest(newdm, &isForest)); 307 PetscCheck(isForest, PETSC_COMM_WORLD, PETSC_ERR_USER, "Converted to non Forest?"); 308 PetscCall(DMDestroy(&dm)); 309 dm = newdm; 310 } else SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_USER, "Convert failed?"); 311 } else { 312 PetscPartitioner part; 313 /* Plex Distribute mesh over processes */ 314 PetscCall(DMPlexGetPartitioner(dm, &part)); 315 PetscCall(PetscPartitionerSetFromOptions(part)); 316 PetscCall(DMPlexDistribute(dm, 0, NULL, &distdm)); 317 if (distdm) { 318 const char *prefix; 319 PetscCall(PetscObjectGetOptionsPrefix((PetscObject)dm, &prefix)); 320 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)distdm, prefix)); 321 PetscCall(DMDestroy(&dm)); 322 dm = distdm; 323 } 324 } 325 PetscCall(PetscLogStagePop()); 326 basedm = dm; 327 dm = NULL; 328 329 for (iter = 0; iter < max_conv_its; iter++) { 330 PetscCall(PetscLogStagePush(stage[16])); 331 /* make new DM */ 332 PetscCall(DMClone(basedm, &dm)); 333 PetscCall(PetscObjectSetOptionsPrefix((PetscObject)dm, "ex56_")); 334 PetscCall(PetscObjectSetName((PetscObject)dm, "Mesh")); 335 if (max_conv_its > 1) { 336 /* If max_conv_its == 1, then we are not doing a convergence study. */ 337 PetscCall(PetscOptionsInsertString(NULL, options[iter])); 338 } 339 PetscCall(DMSetFromOptions(dm)); /* refinement done here in Plex, p4est */ 340 /* snes */ 341 PetscCall(SNESCreate(comm, &snes)); 342 PetscCall(SNESSetDM(snes, dm)); 343 /* fem */ 344 { 345 const PetscInt Ncomp = dim; 346 const PetscInt components[] = {0, 1, 2}; 347 const PetscInt Nfid = 1, Npid = 1; 348 const PetscInt fid[] = {1}; /* The fixed faces (x=0) */ 349 const PetscInt pid[] = {2}; /* The faces with loading (x=L_x) */ 350 PetscFE fe; 351 PetscDS prob; 352 DMLabel label; 353 DM cdm = dm; 354 355 PetscCall(PetscFECreateDefault(PetscObjectComm((PetscObject)dm), dim, dim, PETSC_FALSE, NULL, PETSC_DECIDE, &fe)); /* elasticity */ 356 PetscCall(PetscObjectSetName((PetscObject)fe, "deformation")); 357 /* FEM prob */ 358 PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe)); 359 PetscCall(DMCreateDS(dm)); 360 PetscCall(DMGetDS(dm, &prob)); 361 /* setup problem */ 362 if (run_type == 1) { 363 PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d)); 364 PetscCall(PetscDSSetResidual(prob, 0, f0_u_x4, f1_u_3d)); 365 } else { 366 PetscWeakForm wf; 367 PetscInt bd, i; 368 369 PetscCall(PetscDSSetJacobian(prob, 0, 0, NULL, NULL, NULL, g3_uu_3d_alpha)); 370 PetscCall(PetscDSSetResidual(prob, 0, f0_u, f1_u_3d_alpha)); 371 372 PetscCall(DMGetLabel(dm, "Faces", &label)); 373 PetscCall(DMAddBoundary(dm, DM_BC_NATURAL, "traction", label, Npid, pid, 0, Ncomp, components, NULL, NULL, NULL, &bd)); 374 PetscCall(PetscDSGetBoundary(prob, bd, &wf, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 375 for (i = 0; i < Npid; ++i) PetscCall(PetscWeakFormSetIndexBdResidual(wf, label, pid[i], 0, 0, 0, f0_bd_u_3d, 0, f1_bd_u)); 376 } 377 /* bcs */ 378 if (run_type == 1) { 379 PetscInt id = 1; 380 PetscCall(DMGetLabel(dm, "boundary", &label)); 381 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))zero, NULL, NULL, NULL)); 382 } else { 383 PetscCall(DMGetLabel(dm, "Faces", &label)); 384 PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "fixed", label, Nfid, fid, 0, Ncomp, components, (void (*)(void))zero, NULL, NULL, NULL)); 385 } 386 while (cdm) { 387 PetscCall(DMCopyDisc(dm, cdm)); 388 PetscCall(DMGetCoarseDM(cdm, &cdm)); 389 } 390 PetscCall(PetscFEDestroy(&fe)); 391 } 392 /* vecs & mat */ 393 PetscCall(DMCreateGlobalVector(dm, &xx)); 394 PetscCall(VecDuplicate(xx, &bb)); 395 PetscCall(PetscObjectSetName((PetscObject)bb, "b")); 396 PetscCall(PetscObjectSetName((PetscObject)xx, "u")); 397 PetscCall(DMCreateMatrix(dm, &Amat)); 398 PetscCall(MatSetOption(Amat, MAT_SYMMETRIC, PETSC_TRUE)); /* Some matrix kernels can take advantage of symmetry if we set this. */ 399 PetscCall(MatSetOption(Amat, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); /* Inform PETSc that Amat is always symmetric, so info set above isn't lost. */ 400 PetscCall(MatSetBlockSize(Amat, 3)); 401 PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE)); 402 PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE)); 403 PetscCall(VecGetSize(bb, &N)); 404 local_sizes[iter] = N; 405 PetscCall(PetscInfo(snes, "%" PetscInt_FMT " global equations, %" PetscInt_FMT " vertices\n", N, N / dim)); 406 if ((use_nearnullspace || attach_nearnullspace) && N / dim > 1) { 407 /* Set up the near null space (a.k.a. rigid body modes) that will be used by the multigrid preconditioner */ 408 DM subdm; 409 MatNullSpace nearNullSpace; 410 PetscInt fields = 0; 411 PetscObject deformation; 412 PetscCall(DMCreateSubDM(dm, 1, &fields, NULL, &subdm)); 413 PetscCall(DMPlexCreateRigidBody(subdm, 0, &nearNullSpace)); 414 PetscCall(DMGetField(dm, 0, NULL, &deformation)); 415 PetscCall(PetscObjectCompose(deformation, "nearnullspace", (PetscObject)nearNullSpace)); 416 PetscCall(DMDestroy(&subdm)); 417 if (attach_nearnullspace) PetscCall(MatSetNearNullSpace(Amat, nearNullSpace)); 418 PetscCall(MatNullSpaceDestroy(&nearNullSpace)); /* created by DM and destroyed by Mat */ 419 } 420 PetscCall(DMPlexSetSNESLocalFEM(dm, NULL, NULL, NULL)); 421 PetscCall(SNESSetJacobian(snes, Amat, Amat, NULL, NULL)); 422 PetscCall(SNESSetFromOptions(snes)); 423 PetscCall(DMSetUp(dm)); 424 PetscCall(PetscLogStagePop()); 425 PetscCall(PetscLogStagePush(stage[16])); 426 /* ksp */ 427 PetscCall(SNESGetKSP(snes, &ksp)); 428 PetscCall(KSPSetComputeSingularValues(ksp, PETSC_TRUE)); 429 /* test BCs */ 430 PetscCall(VecZeroEntries(xx)); 431 if (test_nonzero_cols) { 432 if (rank == 0) PetscCall(VecSetValue(xx, 0, 1.0, INSERT_VALUES)); 433 PetscCall(VecAssemblyBegin(xx)); 434 PetscCall(VecAssemblyEnd(xx)); 435 } 436 PetscCall(VecZeroEntries(bb)); 437 PetscCall(VecGetSize(bb, &i)); 438 local_sizes[iter] = i; 439 PetscCall(PetscInfo(snes, "%" PetscInt_FMT " equations in vector, %" PetscInt_FMT " vertices\n", i, i / dim)); 440 PetscCall(PetscLogStagePop()); 441 /* solve */ 442 PetscCall(PetscLogStagePush(stage[iter])); 443 PetscCall(SNESSolve(snes, bb, xx)); 444 PetscCall(PetscLogStagePop()); 445 PetscCall(VecNorm(xx, NORM_INFINITY, &mdisp[iter])); 446 PetscCall(DMViewFromOptions(dm, NULL, "-dm_view")); 447 { 448 PetscViewer viewer = NULL; 449 PetscViewerFormat fmt; 450 PetscCall(PetscOptionsGetViewer(comm, NULL, "ex56_", "-vec_view", &viewer, &fmt, &flg)); 451 if (flg) { 452 PetscCall(PetscViewerPushFormat(viewer, fmt)); 453 PetscCall(VecView(xx, viewer)); 454 PetscCall(VecView(bb, viewer)); 455 PetscCall(PetscViewerPopFormat(viewer)); 456 } 457 PetscCall(PetscViewerDestroy(&viewer)); 458 } 459 /* Free work space */ 460 PetscCall(DMDestroy(&dm)); 461 PetscCall(SNESDestroy(&snes)); 462 PetscCall(VecDestroy(&xx)); 463 PetscCall(VecDestroy(&bb)); 464 PetscCall(MatDestroy(&Amat)); 465 } 466 PetscCall(DMDestroy(&basedm)); 467 if (run_type == 1) err[0] = 59.975208 - mdisp[0]; /* error with what I think is the exact solution */ 468 else err[0] = 171.038 - mdisp[0]; 469 for (iter = 1; iter < max_conv_its; iter++) { 470 if (run_type == 1) err[iter] = 59.975208 - mdisp[iter]; 471 else err[iter] = 171.038 - mdisp[iter]; 472 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "[%d] %" PetscInt_FMT ") N=%12" PetscInt_FMT ", max displ=%9.7e, disp diff=%9.2e, error=%4.3e, rate=%3.2g\n", rank, iter, local_sizes[iter], (double)mdisp[iter], (double)(mdisp[iter] - mdisp[iter - 1]), (double)err[iter], (double)(PetscLogReal(err[iter - 1] / err[iter]) / PetscLogReal(2.)))); 473 } 474 475 PetscCall(PetscFinalize()); 476 return 0; 477 } 478 479 /*TEST 480 481 test: 482 suffix: 0 483 nsize: 4 484 requires: !single 485 args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 3 -snes_max_it 1 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -pc_type gamg -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -ksp_converged_reason -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -ex56_dm_view -snes_lag_jacobian -2 -snes_type ksponly -use_gpu_aware_mpi true 486 timeoutfactor: 2 487 488 # HYPRE PtAP broken with complex numbers 489 test: 490 suffix: hypre 491 requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_DEVICE) 492 nsize: 4 493 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -pc_type hypre -pc_hypre_type boomeramg -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -ksp_converged_reason -use_mat_nearnullspace true -petscpartitioner_type simple 494 495 test: 496 suffix: ml 497 requires: ml !single 498 nsize: 4 499 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type ml -mg_levels_ksp_type chebyshev -mg_levels_ksp_max_it 3 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type sor -petscpartitioner_type simple -use_mat_nearnullspace 500 501 test: 502 suffix: hpddm 503 requires: hpddm slepc !single defined(PETSC_HAVE_DYNAMIC_LIBRARIES) defined(PETSC_USE_SHARED_LIBRARIES) 504 nsize: 4 505 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fgmres -ksp_monitor_short -ksp_converged_reason -ksp_rtol 1.e-8 -pc_type hpddm -petscpartitioner_type simple -pc_hpddm_levels_1_sub_pc_type lu -pc_hpddm_levels_1_eps_nev 6 -pc_hpddm_coarse_p 1 -pc_hpddm_coarse_pc_type svd 506 507 test: 508 suffix: repart 509 nsize: 4 510 requires: parmetis !single 511 args: -cells 8,2,2 -max_conv_its 1 -petscspace_degree 2 -snes_max_it 4 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-2 -ksp_norm_type unpreconditioned -snes_rtol 1.e-3 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -pc_gamg_mat_partitioning_type parmetis -pc_gamg_repartition true -snes_converged_reason -pc_gamg_process_eq_limit 20 -pc_gamg_coarse_eq_limit 10 -ksp_converged_reason -snes_converged_reason -pc_gamg_reuse_interpolation true 512 513 test: 514 suffix: bddc 515 nsize: 4 516 requires: !single 517 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type {{sbaij baij aij}} -pc_type bddc 518 519 testset: 520 nsize: 4 521 requires: !single 522 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-10 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type aij -pc_type bddc -attach_mat_nearnullspace {{0 1}separate output} 523 test: 524 suffix: bddc_approx_gamg 525 args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop 526 # HYPRE PtAP broken with complex numbers 527 test: 528 requires: hypre !complex !defined(PETSC_HAVE_HYPRE_DEVICE) 529 suffix: bddc_approx_hypre 530 args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop -prefix_push pc_bddc_neumann_ -pc_type hypre -pc_hypre_boomeramg_no_CF true -pc_hypre_boomeramg_strong_threshold 0.75 -pc_hypre_boomeramg_agg_nl 1 -pc_hypre_boomeramg_coarsen_type HMIS -pc_hypre_boomeramg_interp_type ext+i -prefix_pop 531 test: 532 requires: ml 533 suffix: bddc_approx_ml 534 args: -pc_bddc_switch_static -prefix_push pc_bddc_dirichlet_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop -prefix_push pc_bddc_neumann_ -approximate -pc_type ml -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -prefix_pop 535 536 test: 537 suffix: fetidp 538 nsize: 4 539 requires: !single 540 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type {{sbaij baij aij}} 541 542 test: 543 suffix: bddc_elast 544 nsize: 4 545 requires: !single 546 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type sbaij -pc_type bddc -pc_bddc_monolithic -attach_mat_nearnullspace 547 548 test: 549 suffix: fetidp_elast 550 nsize: 4 551 requires: !single 552 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type fetidp -fetidp_ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -matis_localmat_type sbaij -fetidp_bddc_pc_bddc_monolithic -attach_mat_nearnullspace 553 554 test: 555 suffix: gdsw 556 nsize: 4 557 requires: !single 558 args: -cells 2,2,1 -max_conv_its 2 -lx 1. -alpha .01 -petscspace_degree 2 -ksp_type cg -ksp_monitor_short -ksp_rtol 1.e-8 -ksp_converged_reason -petscpartitioner_type simple -ex56_dm_mat_type is -attach_mat_nearnullspace \ 559 -pc_type mg -pc_mg_galerkin -pc_mg_adapt_interp_coarse_space gdsw -pc_mg_levels 2 -mg_levels_pc_type bjacobi -mg_levels_sub_pc_type icc 560 561 testset: 562 nsize: 4 563 requires: !single 564 args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-10 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 10 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -use_mat_nearnullspace true -mg_levels_ksp_max_it 2 -mg_levels_ksp_type chebyshev -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.05 -mg_levels_pc_type jacobi -ksp_monitor_short -ksp_converged_reason -snes_converged_reason -snes_monitor_short -ex56_dm_view -petscpartitioner_type simple -pc_gamg_process_eq_limit 20 565 output_file: output/ex56_cuda.out 566 567 test: 568 suffix: cuda 569 requires: cuda 570 args: -ex56_dm_mat_type aijcusparse -ex56_dm_vec_type cuda 571 572 test: 573 suffix: hip 574 requires: hip 575 args: -ex56_dm_mat_type aijhipsparse -ex56_dm_vec_type hip 576 577 test: 578 suffix: viennacl 579 requires: viennacl 580 args: -ex56_dm_mat_type aijviennacl -ex56_dm_vec_type viennacl 581 582 test: 583 suffix: kokkos 584 requires: kokkos_kernels 585 args: -ex56_dm_mat_type aijkokkos -ex56_dm_vec_type kokkos 586 # Don't run AIJMKL caes with complex scalars because of convergence issues. 587 # Note that we need to test both single and multiple MPI rank cases, because these use different sparse MKL routines to implement the PtAP operation. 588 test: 589 suffix: seqaijmkl 590 nsize: 1 591 requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex 592 args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -snes_monitor_short -ksp_monitor_short -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -ex56_dm_view -run_type 1 -mat_seqaij_type seqaijmkl 593 timeoutfactor: 2 594 595 test: 596 suffix: mpiaijmkl 597 nsize: 2 598 requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) !single !complex 599 args: -cells 2,2,1 -max_conv_its 2 -petscspace_degree 2 -snes_max_it 2 -ksp_max_it 100 -ksp_type cg -ksp_rtol 1.e-11 -ksp_norm_type unpreconditioned -snes_rtol 1.e-10 -pc_type gamg -pc_gamg_type agg -pc_gamg_agg_nsmooths 1 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 1 -pc_gamg_threshold 0.05 -pc_gamg_threshold_scale .0 -ksp_converged_reason -snes_monitor_short -ksp_monitor_short -snes_converged_reason -use_mat_nearnullspace true -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -mg_levels_ksp_chebyshev_esteig 0,0.05,0,1.1 -mg_levels_pc_type jacobi -petscpartitioner_type simple -mat_block_size 3 -ex56_dm_view -run_type 1 -mat_seqaij_type seqaijmkl 600 timeoutfactor: 2 601 602 TEST*/ 603