1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: Elasticity 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve 20 // elasticity problems. 21 // 22 // The code uses higher level communication protocols in DMPlex. 23 // 24 // Build with: 25 // 26 // make elasticity [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 27 // 28 // Sample runs: 29 // 30 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -problem linElas -forcing mms 31 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -bc_clamp 998,999 -bc_clamp_998_translate 0.1,0.2,0.3 -problem hyperSS -forcing none -ceed /cpu/self 32 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -bc_clamp 998,999 -bc_clamp_998_rotate 1,0,0,0.2 -problem hyperFS -forcing none -ceed /gpu/occa 33 // 34 // Sample meshes can be found at https://github.com/jeremylt/ceedSampleMeshes 35 // 36 //TESTARGS -ceed {ceed_resource} -test -degree 3 -nu 0.3 -E 1 -dm_plex_box_faces 3,3,3 37 38 /// @file 39 /// CEED elasticity example using PETSc with DMPlex 40 41 const char help[] = "Solve solid Problems with CEED and PETSc DMPlex\n"; 42 43 #include "elasticity.h" 44 45 int main(int argc, char **argv) { 46 PetscInt ierr; 47 MPI_Comm comm; 48 // Context structs 49 AppCtx appCtx; // Contains problem options 50 Physics phys; // Contains physical constants 51 Physics physSmoother = NULL; // Separate context if nuSmoother set 52 Units units; // Contains units scaling 53 // PETSc objects 54 PetscLogStage stageDMSetup, stageLibceedSetup, 55 stageSnesSetup, stageSnesSolve; 56 DM dmOrig; // Distributed DM to clone 57 DM dmEnergy, dmDiagnostic; // DMs for postprocessing 58 DM *levelDMs; 59 Vec U, *Ug, *Uloc; // U: solution, R: residual, F: forcing 60 Vec R, Rloc, F, Floc; // g: global, loc: local 61 SNES snes, snesCoarse = NULL; 62 Mat *jacobMat, jacobMatCoarse, *prolongRestrMat; 63 // PETSc data 64 UserMult resCtx, jacobCoarseCtx = NULL, *jacobCtx; 65 FormJacobCtx formJacobCtx; 66 UserMultProlongRestr *prolongRestrCtx; 67 PCMGCycleType pcmgCycleType = PC_MG_CYCLE_V; 68 // libCEED objects 69 Ceed ceed, ceedFine = NULL; 70 CeedData *ceedData; 71 CeedQFunction qfRestrict = NULL, qfProlong = NULL; 72 // Parameters 73 PetscInt ncompu = 3; // 3 DoFs in 3D 74 PetscInt ncompe = 1, ncompd = 5; // 1 energy output, 5 diagnostic 75 PetscInt numLevels = 1, fineLevel = 0; 76 PetscInt *Ugsz, *Ulsz, *Ulocsz; // sz: size 77 PetscInt snesIts = 0, kspIts = 0; 78 // Timing 79 double startTime, elapsedTime, minTime, maxTime; 80 81 ierr = PetscInitialize(&argc, &argv, NULL, help); 82 if (ierr) 83 return ierr; 84 85 // --------------------------------------------------------------------------- 86 // Process command line options 87 // --------------------------------------------------------------------------- 88 comm = PETSC_COMM_WORLD; 89 90 // -- Set mesh file, polynomial degree, problem type 91 ierr = PetscCalloc1(1, &appCtx); CHKERRQ(ierr); 92 ierr = ProcessCommandLineOptions(comm, appCtx); CHKERRQ(ierr); 93 numLevels = appCtx->numLevels; 94 fineLevel = numLevels - 1; 95 96 // -- Set Poison's ratio, Young's Modulus 97 ierr = PetscMalloc1(1, &phys); CHKERRQ(ierr); 98 ierr = PetscMalloc1(1, &units); CHKERRQ(ierr); 99 ierr = ProcessPhysics(comm, phys, units); CHKERRQ(ierr); 100 if (fabs(appCtx->nuSmoother) > 1E-14) { 101 ierr = PetscMalloc1(1, &physSmoother); CHKERRQ(ierr); 102 ierr = PetscMemcpy(physSmoother, phys, sizeof(*phys)); CHKERRQ(ierr); 103 physSmoother->nu = appCtx->nuSmoother; 104 } 105 106 // --------------------------------------------------------------------------- 107 // Initalize libCEED 108 // --------------------------------------------------------------------------- 109 // Initalize backend 110 CeedInit(appCtx->ceedResource, &ceed); 111 if (appCtx->degree > 4 && appCtx->ceedResourceFine[0]) 112 CeedInit(appCtx->ceedResourceFine, &ceedFine); 113 114 // Check preferred MemType 115 CeedMemType memTypeBackend; 116 CeedGetPreferredMemType(ceed, &memTypeBackend); 117 if (!appCtx->setMemTypeRequest) 118 appCtx->memTypeRequested = memTypeBackend; 119 else if (!appCtx->petscHaveCuda && appCtx->memTypeRequested == CEED_MEM_DEVICE) 120 SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS, 121 "PETSc was not built with CUDA. " 122 "Requested MemType CEED_MEM_DEVICE is not supported.", NULL); 123 124 // --------------------------------------------------------------------------- 125 // Setup DM 126 // --------------------------------------------------------------------------- 127 // Performance logging 128 ierr = PetscLogStageRegister("DM and Vector Setup Stage", &stageDMSetup); 129 CHKERRQ(ierr); 130 ierr = PetscLogStagePush(stageDMSetup); CHKERRQ(ierr); 131 132 // -- Create distributed DM from mesh file 133 ierr = CreateDistributedDM(comm, appCtx, &dmOrig); CHKERRQ(ierr); 134 135 // -- Setup DM by polynomial degree 136 ierr = PetscMalloc1(numLevels, &levelDMs); CHKERRQ(ierr); 137 for (PetscInt level = 0; level < numLevels; level++) { 138 ierr = DMClone(dmOrig, &levelDMs[level]); CHKERRQ(ierr); 139 if (appCtx->memTypeRequested == CEED_MEM_DEVICE) { 140 ierr = DMSetVecType(levelDMs[level], VECCUDA); CHKERRQ(ierr); 141 } 142 ierr = SetupDMByDegree(levelDMs[level], appCtx, appCtx->levelDegrees[level], 143 PETSC_TRUE, ncompu); CHKERRQ(ierr); 144 // -- Label field components for viewing 145 // Empty name for conserved field (because there is only one field) 146 PetscSection section; 147 ierr = DMGetLocalSection(levelDMs[level], §ion); CHKERRQ(ierr); 148 ierr = PetscSectionSetFieldName(section, 0, "Displacement"); CHKERRQ(ierr); 149 ierr = PetscSectionSetComponentName(section, 0, 0, "DisplacementX"); 150 CHKERRQ(ierr); 151 ierr = PetscSectionSetComponentName(section, 0, 1, "DisplacementY"); 152 CHKERRQ(ierr); 153 ierr = PetscSectionSetComponentName(section, 0, 2, "DisplacementZ"); 154 CHKERRQ(ierr); 155 } 156 157 // -- Setup postprocessing DMs 158 ierr = DMClone(dmOrig, &dmEnergy); CHKERRQ(ierr); 159 ierr = SetupDMByDegree(dmEnergy, appCtx, appCtx->levelDegrees[fineLevel], 160 PETSC_FALSE, ncompe); CHKERRQ(ierr); 161 ierr = DMClone(dmOrig, &dmDiagnostic); CHKERRQ(ierr); 162 ierr = SetupDMByDegree(dmDiagnostic, appCtx, appCtx->levelDegrees[fineLevel], 163 PETSC_FALSE, ncompu + ncompd); CHKERRQ(ierr); 164 if (appCtx->memTypeRequested == CEED_MEM_DEVICE) { 165 ierr = DMSetVecType(dmEnergy, VECCUDA); CHKERRQ(ierr); 166 ierr = DMSetVecType(dmDiagnostic, VECCUDA); CHKERRQ(ierr); 167 } 168 { 169 // -- Label field components for viewing 170 // Empty name for conserved field (because there is only one field) 171 PetscSection section; 172 ierr = DMGetLocalSection(dmDiagnostic, §ion); CHKERRQ(ierr); 173 ierr = PetscSectionSetFieldName(section, 0, "Diagnostics"); CHKERRQ(ierr); 174 ierr = PetscSectionSetComponentName(section, 0, 0, "DisplacementX"); 175 CHKERRQ(ierr); 176 ierr = PetscSectionSetComponentName(section, 0, 1, "DisplacementY"); 177 CHKERRQ(ierr); 178 ierr = PetscSectionSetComponentName(section, 0, 2, "DisplacementZ"); 179 CHKERRQ(ierr); 180 ierr = PetscSectionSetComponentName(section, 0, 3, "Pressure"); 181 CHKERRQ(ierr); 182 ierr = PetscSectionSetComponentName(section, 0, 4, "VolumentricStrain"); 183 CHKERRQ(ierr); 184 ierr = PetscSectionSetComponentName(section, 0, 5, "TraceE2"); 185 CHKERRQ(ierr); 186 ierr = PetscSectionSetComponentName(section, 0, 6, "detJ"); 187 CHKERRQ(ierr); 188 ierr = PetscSectionSetComponentName(section, 0, 7, "StrainEnergyDensity"); 189 CHKERRQ(ierr); 190 } 191 192 // --------------------------------------------------------------------------- 193 // Setup solution and work vectors 194 // --------------------------------------------------------------------------- 195 // Allocate arrays 196 ierr = PetscMalloc1(numLevels, &Ug); CHKERRQ(ierr); 197 ierr = PetscMalloc1(numLevels, &Uloc); CHKERRQ(ierr); 198 ierr = PetscMalloc1(numLevels, &Ugsz); CHKERRQ(ierr); 199 ierr = PetscMalloc1(numLevels, &Ulsz); CHKERRQ(ierr); 200 ierr = PetscMalloc1(numLevels, &Ulocsz); CHKERRQ(ierr); 201 202 // -- Setup solution vectors for each level 203 for (PetscInt level = 0; level < numLevels; level++) { 204 // -- Create global unknown vector U 205 ierr = DMCreateGlobalVector(levelDMs[level], &Ug[level]); CHKERRQ(ierr); 206 ierr = VecGetSize(Ug[level], &Ugsz[level]); CHKERRQ(ierr); 207 // Note: Local size for matShell 208 ierr = VecGetLocalSize(Ug[level], &Ulsz[level]); CHKERRQ(ierr); 209 210 // -- Create local unknown vector Uloc 211 ierr = DMCreateLocalVector(levelDMs[level], &Uloc[level]); CHKERRQ(ierr); 212 // Note: local size for libCEED 213 ierr = VecGetSize(Uloc[level], &Ulocsz[level]); CHKERRQ(ierr); 214 } 215 216 // -- Create residual and forcing vectors 217 ierr = VecDuplicate(Ug[fineLevel], &U); CHKERRQ(ierr); 218 ierr = VecDuplicate(Ug[fineLevel], &R); CHKERRQ(ierr); 219 ierr = VecDuplicate(Ug[fineLevel], &F); CHKERRQ(ierr); 220 ierr = VecDuplicate(Uloc[fineLevel], &Rloc); CHKERRQ(ierr); 221 ierr = VecDuplicate(Uloc[fineLevel], &Floc); CHKERRQ(ierr); 222 223 // Performance logging 224 ierr = PetscLogStagePop(); 225 226 // --------------------------------------------------------------------------- 227 // Set up libCEED 228 // --------------------------------------------------------------------------- 229 // Performance logging 230 ierr = PetscLogStageRegister("libCEED Setup Stage", &stageLibceedSetup); 231 CHKERRQ(ierr); 232 ierr = PetscLogStagePush(stageLibceedSetup); CHKERRQ(ierr); 233 234 // -- Create libCEED local forcing vector 235 CeedVector forceCeed; 236 CeedScalar *f; 237 if (appCtx->forcingChoice != FORCE_NONE) { 238 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 239 ierr = VecGetArray(Floc, &f); CHKERRQ(ierr); 240 } else { 241 ierr = VecCUDAGetArray(Floc, &f); CHKERRQ(ierr); 242 } 243 CeedVectorCreate(ceed, Ulocsz[fineLevel], &forceCeed); 244 CeedVectorSetArray(forceCeed, appCtx->memTypeRequested, CEED_USE_POINTER, f); 245 } 246 247 // -- Restriction and prolongation QFunction 248 if (appCtx->multigridChoice != MULTIGRID_NONE) { 249 CeedQFunctionCreateIdentity(ceed, ncompu, CEED_EVAL_NONE, CEED_EVAL_INTERP, 250 &qfRestrict); 251 CeedQFunctionCreateIdentity(ceed, ncompu, CEED_EVAL_INTERP, CEED_EVAL_NONE, 252 &qfProlong); 253 } 254 255 // -- Setup libCEED objects 256 ierr = PetscMalloc1(numLevels, &ceedData); CHKERRQ(ierr); 257 // ---- Setup residual evaluator and geometric information 258 ierr = PetscCalloc1(1, &ceedData[fineLevel]); CHKERRQ(ierr); 259 { 260 bool highOrder = (appCtx->levelDegrees[fineLevel] > 4 && ceedFine); 261 Ceed levelCeed = highOrder ? ceedFine : ceed; 262 ierr = SetupLibceedFineLevel(levelDMs[fineLevel], dmEnergy, dmDiagnostic, 263 levelCeed, appCtx, phys, ceedData, fineLevel, 264 ncompu, Ugsz[fineLevel], Ulocsz[fineLevel], 265 forceCeed, qfRestrict, qfProlong); 266 CHKERRQ(ierr); 267 } 268 // ---- Setup Jacobian evaluator and prolongation/restriction 269 for (PetscInt level = 0; level < numLevels; level++) { 270 if (level != fineLevel) { 271 ierr = PetscCalloc1(1, &ceedData[level]); CHKERRQ(ierr); 272 } 273 274 // Note: use high order ceed, if specified and degree > 4 275 bool highOrder = (appCtx->levelDegrees[level] > 4 && ceedFine); 276 Ceed levelCeed = highOrder ? ceedFine : ceed; 277 ierr = SetupLibceedLevel(levelDMs[level], levelCeed, appCtx, phys, 278 ceedData, level, ncompu, Ugsz[level], 279 Ulocsz[level], forceCeed, qfRestrict, 280 qfProlong); CHKERRQ(ierr); 281 } 282 283 // Performance logging 284 ierr = PetscLogStagePop(); 285 286 // --------------------------------------------------------------------------- 287 // Setup global forcing vector 288 // --------------------------------------------------------------------------- 289 ierr = VecZeroEntries(F); CHKERRQ(ierr); 290 291 if (appCtx->forcingChoice != FORCE_NONE) { 292 CeedVectorTakeArray(forceCeed, appCtx->memTypeRequested, NULL); 293 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 294 ierr = VecRestoreArray(Floc, &f); CHKERRQ(ierr); 295 } else { 296 ierr = VecCUDARestoreArray(Floc, &f); CHKERRQ(ierr); 297 } 298 ierr = DMLocalToGlobal(levelDMs[fineLevel], Floc, ADD_VALUES, F); 299 CHKERRQ(ierr); 300 CeedVectorDestroy(&forceCeed); 301 } 302 303 // --------------------------------------------------------------------------- 304 // Print problem summary 305 // --------------------------------------------------------------------------- 306 if (!appCtx->testMode) { 307 const char *usedresource; 308 CeedGetResource(ceed, &usedresource); 309 310 ierr = PetscPrintf(comm, 311 "\n-- Elasticity Example - libCEED + PETSc --\n" 312 " libCEED:\n" 313 " libCEED Backend : %s\n" 314 " libCEED Backend MemType : %s\n" 315 " libCEED User Requested MemType : %s\n", 316 usedresource, CeedMemTypes[memTypeBackend], 317 (appCtx->setMemTypeRequest) ? 318 CeedMemTypes[appCtx->memTypeRequested] : "none"); 319 CHKERRQ(ierr); 320 321 if (ceedFine) { 322 ierr = PetscPrintf(comm, 323 " libCEED Backend - high order : %s\n", 324 appCtx->ceedResourceFine); CHKERRQ(ierr); 325 } 326 327 VecType vecType; 328 ierr = VecGetType(U, &vecType); CHKERRQ(ierr); 329 ierr = PetscPrintf(comm, 330 " PETSc:\n" 331 " PETSc Vec Type : %s\n", 332 vecType); CHKERRQ(ierr); 333 334 ierr = PetscPrintf(comm, 335 " Problem:\n" 336 " Problem Name : %s\n" 337 " Forcing Function : %s\n" 338 " Mesh:\n" 339 " File : %s\n" 340 " Number of 1D Basis Nodes (p) : %d\n" 341 " Number of 1D Quadrature Points (q) : %d\n" 342 " Global nodes : %D\n" 343 " Owned nodes : %D\n" 344 " DoF per node : %D\n" 345 " Multigrid:\n" 346 " Type : %s\n" 347 " Number of Levels : %d\n", 348 problemTypesForDisp[appCtx->problemChoice], 349 forcingTypesForDisp[appCtx->forcingChoice], 350 appCtx->meshFile[0] ? appCtx->meshFile : "Box Mesh", 351 appCtx->degree + 1, appCtx->degree + 1, 352 Ugsz[fineLevel]/ncompu, Ulsz[fineLevel]/ncompu, ncompu, 353 (appCtx->degree == 1 && 354 appCtx->multigridChoice != MULTIGRID_NONE) ? 355 "Algebraic multigrid" : 356 multigridTypesForDisp[appCtx->multigridChoice], 357 (appCtx->degree == 1 || 358 appCtx->multigridChoice == MULTIGRID_NONE) ? 359 0 : numLevels); CHKERRQ(ierr); 360 361 if (appCtx->multigridChoice != MULTIGRID_NONE) { 362 for (PetscInt i = 0; i < 2; i++) { 363 CeedInt level = i ? fineLevel : 0; 364 ierr = PetscPrintf(comm, 365 " Level %D (%s):\n" 366 " Number of 1D Basis Nodes (p) : %d\n" 367 " Global Nodes : %D\n" 368 " Owned Nodes : %D\n", 369 level, i ? "fine" : "coarse", 370 appCtx->levelDegrees[level] + 1, 371 Ugsz[level]/ncompu, Ulsz[level]/ncompu); 372 CHKERRQ(ierr); 373 } 374 } 375 } 376 377 // --------------------------------------------------------------------------- 378 // Setup SNES 379 // --------------------------------------------------------------------------- 380 // Performance logging 381 ierr = PetscLogStageRegister("SNES Setup Stage", &stageSnesSetup); 382 CHKERRQ(ierr); 383 ierr = PetscLogStagePush(stageSnesSetup); CHKERRQ(ierr); 384 385 // Create SNES 386 ierr = SNESCreate(comm, &snes); CHKERRQ(ierr); 387 ierr = SNESSetDM(snes, levelDMs[fineLevel]); CHKERRQ(ierr); 388 389 // -- Jacobian evaluators 390 ierr = PetscMalloc1(numLevels, &jacobCtx); CHKERRQ(ierr); 391 ierr = PetscMalloc1(numLevels, &jacobMat); CHKERRQ(ierr); 392 for (PetscInt level = 0; level < numLevels; level++) { 393 // -- Jacobian context for level 394 ierr = PetscMalloc1(1, &jacobCtx[level]); CHKERRQ(ierr); 395 ierr = SetupJacobianCtx(comm, appCtx, levelDMs[level], Ug[level], 396 Uloc[level], ceedData[level], ceed, phys, 397 physSmoother, jacobCtx[level]); CHKERRQ(ierr); 398 399 // -- Form Action of Jacobian on delta_u 400 ierr = MatCreateShell(comm, Ulsz[level], Ulsz[level], Ugsz[level], 401 Ugsz[level], jacobCtx[level], &jacobMat[level]); 402 CHKERRQ(ierr); 403 ierr = MatShellSetOperation(jacobMat[level], MATOP_MULT, 404 (void (*)(void))ApplyJacobian_Ceed); 405 CHKERRQ(ierr); 406 ierr = MatShellSetOperation(jacobMat[level], MATOP_GET_DIAGONAL, 407 (void(*)(void))GetDiag_Ceed); 408 if (appCtx->memTypeRequested == CEED_MEM_DEVICE) { 409 ierr = MatShellSetVecType(jacobMat[level], VECCUDA); CHKERRQ(ierr); 410 } 411 } 412 // Note: FormJacobian updates Jacobian matrices on each level 413 // and assembles the Jpre matrix, if needed 414 ierr = PetscMalloc1(1, &formJacobCtx); CHKERRQ(ierr); 415 formJacobCtx->jacobCtx = jacobCtx; 416 formJacobCtx->numLevels = numLevels; 417 formJacobCtx->jacobMat = jacobMat; 418 419 // -- Residual evaluation function 420 ierr = PetscMalloc1(1, &resCtx); CHKERRQ(ierr); 421 ierr = PetscMemcpy(resCtx, jacobCtx[fineLevel], 422 sizeof(*jacobCtx[fineLevel])); CHKERRQ(ierr); 423 resCtx->op = ceedData[fineLevel]->opApply; 424 resCtx->qf = ceedData[fineLevel]->qfApply; 425 ierr = SNESSetFunction(snes, R, FormResidual_Ceed, resCtx); CHKERRQ(ierr); 426 427 // -- Prolongation/Restriction evaluation 428 ierr = PetscMalloc1(numLevels, &prolongRestrCtx); CHKERRQ(ierr); 429 ierr = PetscMalloc1(numLevels, &prolongRestrMat); CHKERRQ(ierr); 430 for (PetscInt level = 1; level < numLevels; level++) { 431 // ---- Prolongation/restriction context for level 432 ierr = PetscMalloc1(1, &prolongRestrCtx[level]); CHKERRQ(ierr); 433 ierr = SetupProlongRestrictCtx(comm, appCtx, levelDMs[level-1], 434 levelDMs[level], Ug[level], Uloc[level-1], 435 Uloc[level], ceedData[level-1], 436 ceedData[level], ceed, 437 prolongRestrCtx[level]); CHKERRQ(ierr); 438 439 // ---- Form Action of Jacobian on delta_u 440 ierr = MatCreateShell(comm, Ulsz[level], Ulsz[level-1], Ugsz[level], 441 Ugsz[level-1], prolongRestrCtx[level], 442 &prolongRestrMat[level]); CHKERRQ(ierr); 443 // Note: In PCMG, restriction is the transpose of prolongation 444 ierr = MatShellSetOperation(prolongRestrMat[level], MATOP_MULT, 445 (void (*)(void))Prolong_Ceed); 446 ierr = MatShellSetOperation(prolongRestrMat[level], MATOP_MULT_TRANSPOSE, 447 (void (*)(void))Restrict_Ceed); 448 CHKERRQ(ierr); 449 if (appCtx->memTypeRequested == CEED_MEM_DEVICE) { 450 ierr = MatShellSetVecType(prolongRestrMat[level], VECCUDA); CHKERRQ(ierr); 451 } 452 } 453 454 // --------------------------------------------------------------------------- 455 // Setup dummy SNES for AMG coarse solve 456 // --------------------------------------------------------------------------- 457 if (appCtx->multigridChoice != MULTIGRID_NONE) { 458 // -- Jacobian Matrix 459 ierr = DMSetMatType(levelDMs[0], MATAIJ); CHKERRQ(ierr); 460 ierr = DMCreateMatrix(levelDMs[0], &jacobMatCoarse); CHKERRQ(ierr); 461 462 if (appCtx->degree > 1) { 463 ierr = SNESCreate(comm, &snesCoarse); CHKERRQ(ierr); 464 ierr = SNESSetDM(snesCoarse, levelDMs[0]); CHKERRQ(ierr); 465 ierr = SNESSetSolution(snesCoarse, Ug[0]); CHKERRQ(ierr); 466 467 // -- Jacobian function 468 ierr = SNESSetJacobian(snesCoarse, jacobMatCoarse, jacobMatCoarse, NULL, 469 NULL); CHKERRQ(ierr); 470 471 // -- Residual evaluation function 472 ierr = PetscMalloc1(1, &jacobCoarseCtx); CHKERRQ(ierr); 473 ierr = PetscMemcpy(jacobCoarseCtx, jacobCtx[0], sizeof(*jacobCtx[0])); 474 CHKERRQ(ierr); 475 ierr = SNESSetFunction(snesCoarse, Ug[0], ApplyJacobianCoarse_Ceed, 476 jacobCoarseCtx); CHKERRQ(ierr); 477 478 // -- Update formJacobCtx 479 formJacobCtx->Ucoarse = Ug[0]; 480 formJacobCtx->snesCoarse = snesCoarse; 481 formJacobCtx->jacobMatCoarse = jacobMatCoarse; 482 } 483 } 484 485 // Set Jacobian function 486 if (appCtx->degree > 1) { 487 ierr = SNESSetJacobian(snes, jacobMat[fineLevel], jacobMat[fineLevel], 488 FormJacobian, formJacobCtx); CHKERRQ(ierr); 489 } else { 490 ierr = SNESSetJacobian(snes, jacobMat[0], jacobMatCoarse, 491 SNESComputeJacobianDefaultColor, NULL); 492 CHKERRQ(ierr); 493 } 494 495 // --------------------------------------------------------------------------- 496 // Setup KSP 497 // --------------------------------------------------------------------------- 498 { 499 PC pc; 500 KSP ksp; 501 502 // -- KSP 503 ierr = SNESGetKSP(snes, &ksp); CHKERRQ(ierr); 504 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 505 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 506 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 507 PETSC_DEFAULT); CHKERRQ(ierr); 508 ierr = KSPSetOptionsPrefix(ksp, "outer_"); CHKERRQ(ierr); 509 510 // -- Preconditioning 511 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 512 ierr = PCSetDM(pc, levelDMs[fineLevel]); CHKERRQ(ierr); 513 ierr = PCSetOptionsPrefix(pc, "outer_"); CHKERRQ(ierr); 514 515 if (appCtx->multigridChoice == MULTIGRID_NONE) { 516 // ---- No Multigrid 517 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 518 ierr = PCJacobiSetType(pc, PC_JACOBI_DIAGONAL); CHKERRQ(ierr); 519 } else if (appCtx->degree == 1) { 520 // ---- AMG for degree 1 521 ierr = PCSetType(pc, PCGAMG); CHKERRQ(ierr); 522 } else { 523 // ---- PCMG 524 ierr = PCSetType(pc, PCMG); CHKERRQ(ierr); 525 526 // ------ PCMG levels 527 ierr = PCMGSetLevels(pc, numLevels, NULL); CHKERRQ(ierr); 528 for (PetscInt level = 0; level < numLevels; level++) { 529 // -------- Smoother 530 KSP kspSmoother, kspEst; 531 PC pcSmoother; 532 533 // ---------- Smoother KSP 534 ierr = PCMGGetSmoother(pc, level, &kspSmoother); CHKERRQ(ierr); 535 ierr = KSPSetDM(kspSmoother, levelDMs[level]); CHKERRQ(ierr); 536 ierr = KSPSetDMActive(kspSmoother, PETSC_FALSE); CHKERRQ(ierr); 537 538 // ---------- Chebyshev options 539 ierr = KSPSetType(kspSmoother, KSPCHEBYSHEV); CHKERRQ(ierr); 540 ierr = KSPChebyshevEstEigSet(kspSmoother, 0, 0.1, 0, 1.1); 541 CHKERRQ(ierr); 542 ierr = KSPChebyshevEstEigGetKSP(kspSmoother, &kspEst); CHKERRQ(ierr); 543 ierr = KSPSetType(kspEst, KSPCG); CHKERRQ(ierr); 544 ierr = KSPChebyshevEstEigSetUseNoisy(kspSmoother, PETSC_TRUE); 545 CHKERRQ(ierr); 546 ierr = KSPSetOperators(kspSmoother, jacobMat[level], jacobMat[level]); 547 CHKERRQ(ierr); 548 549 // ---------- Smoother preconditioner 550 ierr = KSPGetPC(kspSmoother, &pcSmoother); CHKERRQ(ierr); 551 ierr = PCSetType(pcSmoother, PCJACOBI); CHKERRQ(ierr); 552 ierr = PCJacobiSetType(pcSmoother, PC_JACOBI_DIAGONAL); CHKERRQ(ierr); 553 554 // -------- Work vector 555 if (level != fineLevel) { 556 ierr = PCMGSetX(pc, level, Ug[level]); CHKERRQ(ierr); 557 } 558 559 // -------- Level prolongation/restriction operator 560 if (level > 0) { 561 ierr = PCMGSetInterpolation(pc, level, prolongRestrMat[level]); 562 CHKERRQ(ierr); 563 ierr = PCMGSetRestriction(pc, level, prolongRestrMat[level]); 564 CHKERRQ(ierr); 565 } 566 } 567 568 // ------ PCMG coarse solve 569 KSP kspCoarse; 570 PC pcCoarse; 571 572 // -------- Coarse KSP 573 ierr = PCMGGetCoarseSolve(pc, &kspCoarse); CHKERRQ(ierr); 574 ierr = KSPSetType(kspCoarse, KSPPREONLY); CHKERRQ(ierr); 575 ierr = KSPSetOperators(kspCoarse, jacobMatCoarse, jacobMatCoarse); 576 CHKERRQ(ierr); 577 ierr = KSPSetOptionsPrefix(kspCoarse, "coarse_"); CHKERRQ(ierr); 578 579 // -------- Coarse preconditioner 580 ierr = KSPGetPC(kspCoarse, &pcCoarse); CHKERRQ(ierr); 581 ierr = PCSetType(pcCoarse, PCGAMG); CHKERRQ(ierr); 582 ierr = PCSetOptionsPrefix(pcCoarse, "coarse_"); CHKERRQ(ierr); 583 584 ierr = KSPSetFromOptions(kspCoarse); CHKERRQ(ierr); 585 ierr = PCSetFromOptions(pcCoarse); CHKERRQ(ierr); 586 587 // ------ PCMG options 588 ierr = PCMGSetType(pc, PC_MG_MULTIPLICATIVE); CHKERRQ(ierr); 589 ierr = PCMGSetNumberSmooth(pc, 3); CHKERRQ(ierr); 590 ierr = PCMGSetCycleType(pc, pcmgCycleType); CHKERRQ(ierr); 591 } 592 ierr = KSPSetFromOptions(ksp); 593 ierr = PCSetFromOptions(pc); 594 } 595 { 596 // Default to critical-point (CP) line search (related to Wolfe's curvature condition) 597 SNESLineSearch linesearch; 598 599 ierr = SNESGetLineSearch(snes, &linesearch); CHKERRQ(ierr); 600 ierr = SNESLineSearchSetType(linesearch, SNESLINESEARCHCP); CHKERRQ(ierr); 601 } 602 603 ierr = SNESSetFromOptions(snes); CHKERRQ(ierr); 604 605 // Performance logging 606 ierr = PetscLogStagePop(); 607 608 // --------------------------------------------------------------------------- 609 // Set initial guess 610 // --------------------------------------------------------------------------- 611 ierr = PetscObjectSetName((PetscObject)U, ""); CHKERRQ(ierr); 612 ierr = VecSet(U, 0.0); CHKERRQ(ierr); 613 614 // View solution 615 if (appCtx->viewSoln) { 616 ierr = ViewSolution(comm, U, 0, 0.0); CHKERRQ(ierr); 617 } 618 619 // --------------------------------------------------------------------------- 620 // Solve SNES 621 // --------------------------------------------------------------------------- 622 PetscBool snesMonitor = PETSC_FALSE; 623 ierr = PetscOptionsHasName(NULL, NULL, "-snes_monitor", &snesMonitor); 624 CHKERRQ(ierr); 625 626 // Performance logging 627 ierr = PetscLogStageRegister("SNES Solve Stage", &stageSnesSolve); 628 CHKERRQ(ierr); 629 ierr = PetscLogStagePush(stageSnesSolve); CHKERRQ(ierr); 630 631 // Timing 632 ierr = PetscBarrier((PetscObject)snes); CHKERRQ(ierr); 633 startTime = MPI_Wtime(); 634 635 // Solve for each load increment 636 PetscInt increment; 637 for (increment = 1; increment <= appCtx->numIncrements; increment++) { 638 // -- Log increment count 639 if (snesMonitor) { 640 ierr = PetscPrintf(comm, "%d Load Increment\n", increment - 1); 641 CHKERRQ(ierr); 642 } 643 644 // -- Scale the problem 645 PetscScalar loadIncrement = 1.0*increment / appCtx->numIncrements, 646 scalingFactor = loadIncrement / 647 (increment == 1 ? 1 : resCtx->loadIncrement); 648 resCtx->loadIncrement = loadIncrement; 649 if (appCtx->numIncrements > 1 && appCtx->forcingChoice != FORCE_NONE) { 650 ierr = VecScale(F, scalingFactor); CHKERRQ(ierr); 651 } 652 653 // -- Solve 654 ierr = SNESSolve(snes, F, U); CHKERRQ(ierr); 655 656 // -- View solution 657 if (appCtx->viewSoln) { 658 ierr = ViewSolution(comm, U, increment, loadIncrement); CHKERRQ(ierr); 659 } 660 661 // -- Update SNES iteration count 662 PetscInt its; 663 ierr = SNESGetIterationNumber(snes, &its); CHKERRQ(ierr); 664 snesIts += its; 665 ierr = SNESGetLinearSolveIterations(snes, &its); CHKERRQ(ierr); 666 kspIts += its; 667 668 // -- Check for divergence 669 SNESConvergedReason reason; 670 ierr = SNESGetConvergedReason(snes, &reason); CHKERRQ(ierr); 671 if (reason < 0) 672 break; 673 } 674 675 // Timing 676 elapsedTime = MPI_Wtime() - startTime; 677 678 // Performance logging 679 ierr = PetscLogStagePop(); 680 681 // --------------------------------------------------------------------------- 682 // Output summary 683 // --------------------------------------------------------------------------- 684 if (!appCtx->testMode) { 685 // -- SNES 686 SNESType snesType; 687 SNESConvergedReason reason; 688 PetscReal rnorm; 689 ierr = SNESGetType(snes, &snesType); CHKERRQ(ierr); 690 ierr = SNESGetConvergedReason(snes, &reason); CHKERRQ(ierr); 691 ierr = SNESGetFunctionNorm(snes, &rnorm); CHKERRQ(ierr); 692 ierr = PetscPrintf(comm, 693 " SNES:\n" 694 " SNES Type : %s\n" 695 " SNES Convergence : %s\n" 696 " Number of Load Increments : %d\n" 697 " Completed Load Increments : %d\n" 698 " Total SNES Iterations : %D\n" 699 " Final rnorm : %e\n", 700 snesType, SNESConvergedReasons[reason], 701 appCtx->numIncrements, increment - 1, 702 snesIts, (double)rnorm); CHKERRQ(ierr); 703 704 // -- KSP 705 KSP ksp; 706 KSPType kspType; 707 ierr = SNESGetKSP(snes, &ksp); CHKERRQ(ierr); 708 ierr = KSPGetType(ksp, &kspType); CHKERRQ(ierr); 709 ierr = PetscPrintf(comm, 710 " Linear Solver:\n" 711 " KSP Type : %s\n" 712 " Total KSP Iterations : %D\n", 713 kspType, kspIts); CHKERRQ(ierr); 714 715 // -- PC 716 PC pc; 717 PCType pcType; 718 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 719 ierr = PCGetType(pc, &pcType); CHKERRQ(ierr); 720 ierr = PetscPrintf(comm, 721 " PC Type : %s\n", 722 pcType); CHKERRQ(ierr); 723 724 if (!strcmp(pcType, PCMG)) { 725 PCMGType pcmgType; 726 ierr = PCMGGetType(pc, &pcmgType); CHKERRQ(ierr); 727 ierr = PetscPrintf(comm, 728 " P-Multigrid:\n" 729 " PCMG Type : %s\n" 730 " PCMG Cycle Type : %s\n", 731 PCMGTypes[pcmgType], 732 PCMGCycleTypes[pcmgCycleType]); CHKERRQ(ierr); 733 734 // -- Coarse Solve 735 KSP kspCoarse; 736 PC pcCoarse; 737 PCType pcType; 738 739 ierr = PCMGGetCoarseSolve(pc, &kspCoarse); CHKERRQ(ierr); 740 ierr = KSPGetType(kspCoarse, &kspType); CHKERRQ(ierr); 741 ierr = KSPGetPC(kspCoarse, &pcCoarse); CHKERRQ(ierr); 742 ierr = PCGetType(pcCoarse, &pcType); CHKERRQ(ierr); 743 ierr = PetscPrintf(comm, 744 " Coarse Solve:\n" 745 " KSP Type : %s\n" 746 " PC Type : %s\n", 747 kspType, pcType); CHKERRQ(ierr); 748 } 749 } 750 751 // --------------------------------------------------------------------------- 752 // Compute solve time 753 // --------------------------------------------------------------------------- 754 if (!appCtx->testMode) { 755 ierr = MPI_Allreduce(&elapsedTime, &minTime, 1, MPI_DOUBLE, MPI_MIN, comm); 756 CHKERRQ(ierr); 757 ierr = MPI_Allreduce(&elapsedTime, &maxTime, 1, MPI_DOUBLE, MPI_MAX, comm); 758 CHKERRQ(ierr); 759 ierr = PetscPrintf(comm, 760 " Performance:\n" 761 " SNES Solve Time : %g (%g) sec\n" 762 " DoFs/Sec in SNES : %g (%g) million\n", 763 maxTime, minTime, 1e-6*Ugsz[fineLevel]*kspIts/maxTime, 764 1e-6*Ugsz[fineLevel]*kspIts/minTime); CHKERRQ(ierr); 765 } 766 767 // --------------------------------------------------------------------------- 768 // Compute error 769 // --------------------------------------------------------------------------- 770 if (appCtx->forcingChoice == FORCE_MMS) { 771 CeedScalar l2Error = 1., l2Unorm = 1.; 772 const CeedScalar *truearray; 773 Vec errorVec, trueVec; 774 775 // -- Work vectors 776 ierr = VecDuplicate(U, &errorVec); CHKERRQ(ierr); 777 ierr = VecSet(errorVec, 0.0); CHKERRQ(ierr); 778 ierr = VecDuplicate(U, &trueVec); CHKERRQ(ierr); 779 ierr = VecSet(trueVec, 0.0); CHKERRQ(ierr); 780 781 // -- Assemble global true solution vector 782 CeedVectorGetArrayRead(ceedData[fineLevel]->truesoln, 783 appCtx->memTypeRequested, &truearray); 784 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 785 ierr = VecPlaceArray(resCtx->Yloc, (PetscScalar *)truearray); 786 CHKERRQ(ierr); 787 } else { 788 ierr = VecCUDAPlaceArray(resCtx->Yloc, (PetscScalar *)truearray); 789 CHKERRQ(ierr); 790 } 791 ierr = DMLocalToGlobal(resCtx->dm, resCtx->Yloc, INSERT_VALUES, trueVec); 792 CHKERRQ(ierr); 793 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 794 ierr = VecResetArray(resCtx->Yloc); CHKERRQ(ierr); 795 } else { 796 ierr = VecCUDAResetArray(resCtx->Yloc); CHKERRQ(ierr); 797 } 798 CeedVectorRestoreArrayRead(ceedData[fineLevel]->truesoln, &truearray); 799 800 // -- Compute L2 error 801 ierr = VecWAXPY(errorVec, -1.0, U, trueVec); CHKERRQ(ierr); 802 ierr = VecNorm(errorVec, NORM_2, &l2Error); CHKERRQ(ierr); 803 ierr = VecNorm(U, NORM_2, &l2Unorm); CHKERRQ(ierr); 804 l2Error /= l2Unorm; 805 806 // -- Output 807 if (!appCtx->testMode || l2Error > 0.05) { 808 ierr = PetscPrintf(comm, 809 " L2 Error : %e\n", 810 l2Error); CHKERRQ(ierr); 811 } 812 813 // -- Cleanup 814 ierr = VecDestroy(&errorVec); CHKERRQ(ierr); 815 ierr = VecDestroy(&trueVec); CHKERRQ(ierr); 816 } 817 818 // --------------------------------------------------------------------------- 819 // Compute energy 820 // --------------------------------------------------------------------------- 821 if (!appCtx->testMode) { 822 // -- Compute L2 error 823 CeedScalar energy; 824 ierr = ComputeStrainEnergy(dmEnergy, resCtx, ceedData[fineLevel]->opEnergy, 825 U, &energy); CHKERRQ(ierr); 826 827 // -- Output 828 ierr = PetscPrintf(comm, 829 " Strain Energy : %e\n", 830 energy); CHKERRQ(ierr); 831 } 832 833 // --------------------------------------------------------------------------- 834 // Output diagnostic quantities 835 // --------------------------------------------------------------------------- 836 if (appCtx->viewSoln || appCtx->viewFinalSoln) { 837 // -- Setup context 838 UserMult diagnosticCtx; 839 ierr = PetscMalloc1(1, &diagnosticCtx); CHKERRQ(ierr); 840 ierr = PetscMemcpy(diagnosticCtx, resCtx, sizeof(*resCtx)); CHKERRQ(ierr); 841 diagnosticCtx->dm = dmDiagnostic; 842 diagnosticCtx->op = ceedData[fineLevel]->opDiagnostic; 843 844 // -- Compute and output 845 ierr = ViewDiagnosticQuantities(comm, levelDMs[fineLevel], diagnosticCtx, U, 846 ceedData[fineLevel]->ErestrictDiagnostic); 847 CHKERRQ(ierr); 848 849 // -- Cleanup 850 ierr = PetscFree(diagnosticCtx); CHKERRQ(ierr); 851 } 852 853 // --------------------------------------------------------------------------- 854 // Free objects 855 // --------------------------------------------------------------------------- 856 // Data in arrays per level 857 for (PetscInt level = 0; level < numLevels; level++) { 858 // Vectors 859 ierr = VecDestroy(&Ug[level]); CHKERRQ(ierr); 860 ierr = VecDestroy(&Uloc[level]); CHKERRQ(ierr); 861 862 // Jacobian matrix and data 863 ierr = VecDestroy(&jacobCtx[level]->Yloc); CHKERRQ(ierr); 864 ierr = MatDestroy(&jacobMat[level]); CHKERRQ(ierr); 865 ierr = PetscFree(jacobCtx[level]); CHKERRQ(ierr); 866 867 // Prolongation/Restriction matrix and data 868 if (level > 0) { 869 ierr = VecDestroy(&prolongRestrCtx[level]->multVec); CHKERRQ(ierr); 870 ierr = PetscFree(prolongRestrCtx[level]); CHKERRQ(ierr); 871 ierr = MatDestroy(&prolongRestrMat[level]); CHKERRQ(ierr); 872 } 873 874 // DM 875 ierr = DMDestroy(&levelDMs[level]); CHKERRQ(ierr); 876 877 // libCEED objects 878 ierr = CeedDataDestroy(level, ceedData[level]); CHKERRQ(ierr); 879 } 880 881 // Arrays 882 ierr = PetscFree(Ug); CHKERRQ(ierr); 883 ierr = PetscFree(Uloc); CHKERRQ(ierr); 884 ierr = PetscFree(Ugsz); CHKERRQ(ierr); 885 ierr = PetscFree(Ulsz); CHKERRQ(ierr); 886 ierr = PetscFree(Ulocsz); CHKERRQ(ierr); 887 ierr = PetscFree(jacobCtx); CHKERRQ(ierr); 888 ierr = PetscFree(jacobMat); CHKERRQ(ierr); 889 ierr = PetscFree(prolongRestrCtx); CHKERRQ(ierr); 890 ierr = PetscFree(prolongRestrMat); CHKERRQ(ierr); 891 ierr = PetscFree(appCtx->levelDegrees); CHKERRQ(ierr); 892 ierr = PetscFree(ceedData); CHKERRQ(ierr); 893 894 // libCEED objects 895 CeedQFunctionDestroy(&qfRestrict); 896 CeedQFunctionDestroy(&qfProlong); 897 CeedDestroy(&ceed); 898 CeedDestroy(&ceedFine); 899 900 // PETSc objects 901 ierr = VecDestroy(&U); CHKERRQ(ierr); 902 ierr = VecDestroy(&R); CHKERRQ(ierr); 903 ierr = VecDestroy(&Rloc); CHKERRQ(ierr); 904 ierr = VecDestroy(&F); CHKERRQ(ierr); 905 ierr = VecDestroy(&Floc); CHKERRQ(ierr); 906 ierr = MatDestroy(&jacobMatCoarse); CHKERRQ(ierr); 907 ierr = SNESDestroy(&snes); CHKERRQ(ierr); 908 ierr = SNESDestroy(&snesCoarse); CHKERRQ(ierr); 909 ierr = DMDestroy(&dmOrig); CHKERRQ(ierr); 910 ierr = DMDestroy(&dmEnergy); CHKERRQ(ierr); 911 ierr = DMDestroy(&dmDiagnostic); CHKERRQ(ierr); 912 ierr = PetscFree(levelDMs); CHKERRQ(ierr); 913 914 // Structs 915 ierr = PetscFree(resCtx); CHKERRQ(ierr); 916 ierr = PetscFree(formJacobCtx); CHKERRQ(ierr); 917 ierr = PetscFree(jacobCoarseCtx); CHKERRQ(ierr); 918 ierr = PetscFree(appCtx); CHKERRQ(ierr); 919 ierr = PetscFree(phys); CHKERRQ(ierr); 920 ierr = PetscFree(physSmoother); CHKERRQ(ierr); 921 ierr = PetscFree(units); CHKERRQ(ierr); 922 923 return PetscFinalize(); 924 } 925