1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: Elasticity 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve 20 // elasticity problems. 21 // 22 // The code uses higher level communication protocols in DMPlex. 23 // 24 // Build with: 25 // 26 // make elasticity [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 27 // 28 // Sample runs: 29 // 30 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -problem Linear -forcing mms 31 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -bc_clamp 998,999 -bc_clamp_998_translate 0.1,0.2,0.3 -problem SS-NH -forcing none -ceed /cpu/self 32 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -bc_clamp 998,999 -bc_clamp_998_rotate 1,0,0,0.2 -problem FSInitial-NH1 -forcing none -ceed /gpu/cuda 33 // 34 // Sample meshes can be found at https://github.com/jeremylt/ceedSampleMeshes 35 // 36 //TESTARGS -ceed {ceed_resource} -test -degree 3 -nu 0.3 -E 1 -dm_plex_box_faces 3,3,3 37 38 /// @file 39 /// CEED elasticity example using PETSc with DMPlex 40 41 const char help[] = "Solve solid Problems with CEED and PETSc DMPlex\n"; 42 43 #include "elasticity.h" 44 45 int main(int argc, char **argv) { 46 PetscInt ierr; 47 MPI_Comm comm; 48 // Context structs 49 AppCtx appCtx; // Contains problem options 50 Physics phys; // Contains physical constants 51 Physics physSmoother = NULL; // Separate context if nuSmoother set 52 Units units; // Contains units scaling 53 // PETSc objects 54 PetscLogStage stageDMSetup, stageLibceedSetup, 55 stageSnesSetup, stageSnesSolve; 56 DM dmOrig; // Distributed DM to clone 57 DM dmEnergy, dmDiagnostic; // DMs for postprocessing 58 DM *levelDMs; 59 Vec U, *Ug, *Uloc; // U: solution, R: residual, F: forcing 60 Vec R, Rloc, F, Floc; // g: global, loc: local 61 Vec NBCs = NULL, NBCsloc = NULL; 62 SNES snes, snesCoarse = NULL; 63 Mat *jacobMat, jacobMatCoarse, *prolongRestrMat; 64 // PETSc data 65 UserMult resCtx, jacobCoarseCtx = NULL, *jacobCtx; 66 FormJacobCtx formJacobCtx; 67 UserMultProlongRestr *prolongRestrCtx; 68 PCMGCycleType pcmgCycleType = PC_MG_CYCLE_V; 69 // libCEED objects 70 Ceed ceed; 71 CeedData *ceedData; 72 CeedQFunctionContext ctxPhys, ctxPhysSmoother = NULL; 73 // Parameters 74 PetscInt ncompu = 3; // 3 DoFs in 3D 75 PetscInt ncompe = 1, ncompd = 5; // 1 energy output, 5 diagnostic 76 PetscInt numLevels = 1, fineLevel = 0; 77 PetscInt *Ugsz, *Ulsz, *Ulocsz; // sz: size 78 PetscInt snesIts = 0, kspIts = 0; 79 // Timing 80 double startTime, elapsedTime, minTime, maxTime; 81 82 ierr = PetscInitialize(&argc, &argv, NULL, help); 83 if (ierr) 84 return ierr; 85 86 // --------------------------------------------------------------------------- 87 // Process command line options 88 // --------------------------------------------------------------------------- 89 comm = PETSC_COMM_WORLD; 90 91 // -- Set mesh file, polynomial degree, problem type 92 ierr = PetscCalloc1(1, &appCtx); CHKERRQ(ierr); 93 ierr = ProcessCommandLineOptions(comm, appCtx); CHKERRQ(ierr); 94 numLevels = appCtx->numLevels; 95 fineLevel = numLevels - 1; 96 97 // -- Set Poison's ratio, Young's Modulus 98 ierr = PetscMalloc1(1, &phys); CHKERRQ(ierr); 99 ierr = PetscMalloc1(1, &units); CHKERRQ(ierr); 100 ierr = ProcessPhysics(comm, phys, units); CHKERRQ(ierr); 101 if (fabs(appCtx->nuSmoother) > 1E-14) { 102 ierr = PetscMalloc1(1, &physSmoother); CHKERRQ(ierr); 103 ierr = PetscMemcpy(physSmoother, phys, sizeof(*phys)); CHKERRQ(ierr); 104 physSmoother->nu = appCtx->nuSmoother; 105 } 106 107 // --------------------------------------------------------------------------- 108 // Initalize libCEED 109 // --------------------------------------------------------------------------- 110 // Initalize backend 111 CeedInit(appCtx->ceedResource, &ceed); 112 113 // Check preferred MemType 114 CeedMemType memTypeBackend; 115 CeedGetPreferredMemType(ceed, &memTypeBackend); 116 117 // Wrap context in libCEED objects 118 CeedQFunctionContextCreate(ceed, &ctxPhys); 119 CeedQFunctionContextSetData(ctxPhys, CEED_MEM_HOST, CEED_USE_POINTER, 120 sizeof(*phys), phys); 121 if (physSmoother) { 122 CeedQFunctionContextCreate(ceed, &ctxPhysSmoother); 123 CeedQFunctionContextSetData(ctxPhysSmoother, CEED_MEM_HOST, CEED_USE_POINTER, 124 sizeof(*physSmoother), physSmoother); 125 } 126 127 // --------------------------------------------------------------------------- 128 // Setup DM 129 // --------------------------------------------------------------------------- 130 // Performance logging 131 ierr = PetscLogStageRegister("DM and Vector Setup Stage", &stageDMSetup); 132 CHKERRQ(ierr); 133 ierr = PetscLogStagePush(stageDMSetup); CHKERRQ(ierr); 134 135 // -- Create distributed DM from mesh file 136 ierr = CreateDistributedDM(comm, appCtx, &dmOrig); CHKERRQ(ierr); 137 VecType vectype; 138 switch (memTypeBackend) { 139 case CEED_MEM_HOST: vectype = VECSTANDARD; break; 140 case CEED_MEM_DEVICE: { 141 const char *resolved; 142 CeedGetResource(ceed, &resolved); 143 if (strstr(resolved, "/gpu/cuda")) vectype = VECCUDA; 144 else if (strstr(resolved, "/gpu/hip")) vectype = VECHIP; 145 else vectype = VECSTANDARD; 146 } 147 } 148 ierr = DMSetVecType(dmOrig, vectype); CHKERRQ(ierr); 149 ierr = DMSetFromOptions(dmOrig); CHKERRQ(ierr); 150 151 // -- Setup DM by polynomial degree 152 ierr = PetscMalloc1(numLevels, &levelDMs); CHKERRQ(ierr); 153 for (PetscInt level = 0; level < numLevels; level++) { 154 ierr = DMClone(dmOrig, &levelDMs[level]); CHKERRQ(ierr); 155 ierr = DMGetVecType(dmOrig, &vectype); CHKERRQ(ierr); 156 ierr = DMSetVecType(levelDMs[level], vectype); CHKERRQ(ierr); 157 ierr = SetupDMByDegree(levelDMs[level], appCtx, appCtx->levelDegrees[level], 158 PETSC_TRUE, ncompu); CHKERRQ(ierr); 159 // -- Label field components for viewing 160 // Empty name for conserved field (because there is only one field) 161 PetscSection section; 162 ierr = DMGetLocalSection(levelDMs[level], §ion); CHKERRQ(ierr); 163 ierr = PetscSectionSetFieldName(section, 0, "Displacement"); CHKERRQ(ierr); 164 ierr = PetscSectionSetComponentName(section, 0, 0, "DisplacementX"); 165 CHKERRQ(ierr); 166 ierr = PetscSectionSetComponentName(section, 0, 1, "DisplacementY"); 167 CHKERRQ(ierr); 168 ierr = PetscSectionSetComponentName(section, 0, 2, "DisplacementZ"); 169 CHKERRQ(ierr); 170 } 171 172 // -- Setup postprocessing DMs 173 ierr = DMClone(dmOrig, &dmEnergy); CHKERRQ(ierr); 174 ierr = SetupDMByDegree(dmEnergy, appCtx, appCtx->levelDegrees[fineLevel], 175 PETSC_FALSE, ncompe); CHKERRQ(ierr); 176 ierr = DMClone(dmOrig, &dmDiagnostic); CHKERRQ(ierr); 177 ierr = SetupDMByDegree(dmDiagnostic, appCtx, appCtx->levelDegrees[fineLevel], 178 PETSC_FALSE, ncompu + ncompd); CHKERRQ(ierr); 179 ierr = DMSetVecType(dmEnergy, vectype); CHKERRQ(ierr); 180 ierr = DMSetVecType(dmDiagnostic, vectype); CHKERRQ(ierr); 181 { 182 // -- Label field components for viewing 183 // Empty name for conserved field (because there is only one field) 184 PetscSection section; 185 ierr = DMGetLocalSection(dmDiagnostic, §ion); CHKERRQ(ierr); 186 ierr = PetscSectionSetFieldName(section, 0, "Diagnostics"); CHKERRQ(ierr); 187 ierr = PetscSectionSetComponentName(section, 0, 0, "DisplacementX"); 188 CHKERRQ(ierr); 189 ierr = PetscSectionSetComponentName(section, 0, 1, "DisplacementY"); 190 CHKERRQ(ierr); 191 ierr = PetscSectionSetComponentName(section, 0, 2, "DisplacementZ"); 192 CHKERRQ(ierr); 193 ierr = PetscSectionSetComponentName(section, 0, 3, "Pressure"); 194 CHKERRQ(ierr); 195 ierr = PetscSectionSetComponentName(section, 0, 4, "VolumentricStrain"); 196 CHKERRQ(ierr); 197 ierr = PetscSectionSetComponentName(section, 0, 5, "TraceE2"); 198 CHKERRQ(ierr); 199 ierr = PetscSectionSetComponentName(section, 0, 6, "detJ"); 200 CHKERRQ(ierr); 201 ierr = PetscSectionSetComponentName(section, 0, 7, "StrainEnergyDensity"); 202 CHKERRQ(ierr); 203 } 204 205 // --------------------------------------------------------------------------- 206 // Setup solution and work vectors 207 // --------------------------------------------------------------------------- 208 // Allocate arrays 209 ierr = PetscMalloc1(numLevels, &Ug); CHKERRQ(ierr); 210 ierr = PetscMalloc1(numLevels, &Uloc); CHKERRQ(ierr); 211 ierr = PetscMalloc1(numLevels, &Ugsz); CHKERRQ(ierr); 212 ierr = PetscMalloc1(numLevels, &Ulsz); CHKERRQ(ierr); 213 ierr = PetscMalloc1(numLevels, &Ulocsz); CHKERRQ(ierr); 214 215 // -- Setup solution vectors for each level 216 for (PetscInt level = 0; level < numLevels; level++) { 217 // -- Create global unknown vector U 218 ierr = DMCreateGlobalVector(levelDMs[level], &Ug[level]); CHKERRQ(ierr); 219 ierr = VecGetSize(Ug[level], &Ugsz[level]); CHKERRQ(ierr); 220 // Note: Local size for matShell 221 ierr = VecGetLocalSize(Ug[level], &Ulsz[level]); CHKERRQ(ierr); 222 223 // -- Create local unknown vector Uloc 224 ierr = DMCreateLocalVector(levelDMs[level], &Uloc[level]); CHKERRQ(ierr); 225 // Note: local size for libCEED 226 ierr = VecGetSize(Uloc[level], &Ulocsz[level]); CHKERRQ(ierr); 227 } 228 229 // -- Create residual and forcing vectors 230 ierr = VecDuplicate(Ug[fineLevel], &U); CHKERRQ(ierr); 231 ierr = VecDuplicate(Ug[fineLevel], &R); CHKERRQ(ierr); 232 ierr = VecDuplicate(Ug[fineLevel], &F); CHKERRQ(ierr); 233 ierr = VecDuplicate(Uloc[fineLevel], &Rloc); CHKERRQ(ierr); 234 ierr = VecDuplicate(Uloc[fineLevel], &Floc); CHKERRQ(ierr); 235 236 // Performance logging 237 ierr = PetscLogStagePop(); 238 239 // --------------------------------------------------------------------------- 240 // Set up libCEED 241 // --------------------------------------------------------------------------- 242 // Performance logging 243 ierr = PetscLogStageRegister("libCEED Setup Stage", &stageLibceedSetup); 244 CHKERRQ(ierr); 245 ierr = PetscLogStagePush(stageLibceedSetup); CHKERRQ(ierr); 246 247 // -- Create libCEED local forcing vector 248 CeedVector forceCeed; 249 CeedScalar *f; 250 PetscMemType fmemtype; 251 if (appCtx->forcingChoice != FORCE_NONE) { 252 ierr = VecGetArrayAndMemType(Floc, &f, &fmemtype); CHKERRQ(ierr); 253 CeedVectorCreate(ceed, Ulocsz[fineLevel], &forceCeed); 254 CeedVectorSetArray(forceCeed, MemTypeP2C(fmemtype), CEED_USE_POINTER, f); 255 } 256 257 // -- Create libCEED local Neumann BCs vector 258 CeedVector neumannCeed; 259 CeedScalar *n; 260 PetscMemType nmemtype; 261 if (appCtx->bcTractionCount > 0) { 262 ierr = VecDuplicate(U, &NBCs); CHKERRQ(ierr); 263 ierr = VecDuplicate(Uloc[fineLevel], &NBCsloc); CHKERRQ(ierr); 264 ierr = VecGetArrayAndMemType(NBCsloc, &n, &nmemtype); CHKERRQ(ierr); 265 CeedVectorCreate(ceed, Ulocsz[fineLevel], &neumannCeed); 266 CeedVectorSetArray(neumannCeed, MemTypeP2C(nmemtype), 267 CEED_USE_POINTER, n); 268 } 269 270 // -- Setup libCEED objects 271 ierr = PetscMalloc1(numLevels, &ceedData); CHKERRQ(ierr); 272 // ---- Setup residual, Jacobian evaluator and geometric information 273 ierr = PetscCalloc1(1, &ceedData[fineLevel]); CHKERRQ(ierr); 274 { 275 ierr = SetupLibceedFineLevel(levelDMs[fineLevel], dmEnergy, dmDiagnostic, 276 ceed, appCtx, ctxPhys, ceedData, fineLevel, 277 ncompu, Ugsz[fineLevel], Ulocsz[fineLevel], 278 forceCeed, neumannCeed); 279 CHKERRQ(ierr); 280 } 281 // ---- Setup coarse Jacobian evaluator and prolongation/restriction 282 for (PetscInt level = numLevels - 2; level >= 0; level--) { 283 ierr = PetscCalloc1(1, &ceedData[level]); CHKERRQ(ierr); 284 285 // Get global communication restriction 286 ierr = VecZeroEntries(Ug[level+1]); CHKERRQ(ierr); 287 ierr = VecSet(Uloc[level+1], 1.0); CHKERRQ(ierr); 288 ierr = DMLocalToGlobal(levelDMs[level+1], Uloc[level+1], ADD_VALUES, 289 Ug[level+1]); CHKERRQ(ierr); 290 ierr = DMGlobalToLocal(levelDMs[level+1], Ug[level+1], INSERT_VALUES, 291 Uloc[level+1]); CHKERRQ(ierr); 292 293 // Place in libCEED array 294 const PetscScalar *m; 295 PetscMemType mmemtype; 296 ierr = VecGetArrayReadAndMemType(Uloc[level+1], &m, &mmemtype); CHKERRQ(ierr); 297 CeedVectorSetArray(ceedData[level+1]->xceed, MemTypeP2C(mmemtype), 298 CEED_USE_POINTER, (CeedScalar *)m); 299 300 // Note: use high order ceed, if specified and degree > 4 301 ierr = SetupLibceedLevel(levelDMs[level], ceed, appCtx, 302 ceedData, level, ncompu, Ugsz[level], 303 Ulocsz[level], ceedData[level+1]->xceed); 304 CHKERRQ(ierr); 305 306 // Restore PETSc vector 307 CeedVectorTakeArray(ceedData[level+1]->xceed, MemTypeP2C(mmemtype), 308 (CeedScalar **)&m); 309 ierr = VecRestoreArrayReadAndMemType(Uloc[level+1], &m); CHKERRQ(ierr); 310 ierr = VecZeroEntries(Ug[level+1]); CHKERRQ(ierr); 311 ierr = VecZeroEntries(Uloc[level+1]); CHKERRQ(ierr); 312 } 313 314 // Performance logging 315 ierr = PetscLogStagePop(); 316 317 // --------------------------------------------------------------------------- 318 // Setup global forcing and Neumann BC vectors 319 // --------------------------------------------------------------------------- 320 ierr = VecZeroEntries(F); CHKERRQ(ierr); 321 322 if (appCtx->forcingChoice != FORCE_NONE) { 323 CeedVectorTakeArray(forceCeed, MemTypeP2C(fmemtype), NULL); 324 ierr = VecRestoreArrayAndMemType(Floc, &f); CHKERRQ(ierr); 325 ierr = DMLocalToGlobal(levelDMs[fineLevel], Floc, ADD_VALUES, F); 326 CHKERRQ(ierr); 327 CeedVectorDestroy(&forceCeed); 328 } 329 330 if (appCtx->bcTractionCount > 0) { 331 ierr = VecZeroEntries(NBCs); CHKERRQ(ierr); 332 CeedVectorTakeArray(neumannCeed, MemTypeP2C(nmemtype), NULL); 333 ierr = VecRestoreArrayAndMemType(NBCsloc, &n); CHKERRQ(ierr); 334 ierr = DMLocalToGlobal(levelDMs[fineLevel], NBCsloc, ADD_VALUES, NBCs); 335 CHKERRQ(ierr); 336 CeedVectorDestroy(&neumannCeed); 337 } 338 339 // --------------------------------------------------------------------------- 340 // Print problem summary 341 // --------------------------------------------------------------------------- 342 if (!appCtx->testMode) { 343 const char *usedresource; 344 CeedGetResource(ceed, &usedresource); 345 346 ierr = PetscPrintf(comm, 347 "\n-- Elasticity Example - libCEED + PETSc --\n" 348 " libCEED:\n" 349 " libCEED Backend : %s\n" 350 " libCEED Backend MemType : %s\n", 351 usedresource, CeedMemTypes[memTypeBackend]); 352 CHKERRQ(ierr); 353 354 VecType vecType; 355 ierr = VecGetType(U, &vecType); CHKERRQ(ierr); 356 ierr = PetscPrintf(comm, 357 " PETSc:\n" 358 " PETSc Vec Type : %s\n", 359 vecType); CHKERRQ(ierr); 360 361 ierr = PetscPrintf(comm, 362 " Problem:\n" 363 " Problem Name : %s\n" 364 " Forcing Function : %s\n" 365 " Mesh:\n" 366 " File : %s\n" 367 " Number of 1D Basis Nodes (p) : %d\n" 368 " Number of 1D Quadrature Points (q) : %d\n" 369 " Global nodes : %D\n" 370 " Owned nodes : %D\n" 371 " DoF per node : %D\n" 372 " Multigrid:\n" 373 " Type : %s\n" 374 " Number of Levels : %d\n", 375 problemTypesForDisp[appCtx->problemChoice], 376 forcingTypesForDisp[appCtx->forcingChoice], 377 appCtx->meshFile[0] ? appCtx->meshFile : "Box Mesh", 378 appCtx->degree + 1, appCtx->degree + 1, 379 Ugsz[fineLevel]/ncompu, Ulsz[fineLevel]/ncompu, ncompu, 380 (appCtx->degree == 1 && 381 appCtx->multigridChoice != MULTIGRID_NONE) ? 382 "Algebraic multigrid" : 383 multigridTypesForDisp[appCtx->multigridChoice], 384 (appCtx->degree == 1 || 385 appCtx->multigridChoice == MULTIGRID_NONE) ? 386 0 : numLevels); CHKERRQ(ierr); 387 388 if (appCtx->multigridChoice != MULTIGRID_NONE) { 389 for (PetscInt i = 0; i < 2; i++) { 390 CeedInt level = i ? fineLevel : 0; 391 ierr = PetscPrintf(comm, 392 " Level %D (%s):\n" 393 " Number of 1D Basis Nodes (p) : %d\n" 394 " Global Nodes : %D\n" 395 " Owned Nodes : %D\n", 396 level, i ? "fine" : "coarse", 397 appCtx->levelDegrees[level] + 1, 398 Ugsz[level]/ncompu, Ulsz[level]/ncompu); 399 CHKERRQ(ierr); 400 } 401 } 402 } 403 404 // --------------------------------------------------------------------------- 405 // Setup SNES 406 // --------------------------------------------------------------------------- 407 // Performance logging 408 ierr = PetscLogStageRegister("SNES Setup Stage", &stageSnesSetup); 409 CHKERRQ(ierr); 410 ierr = PetscLogStagePush(stageSnesSetup); CHKERRQ(ierr); 411 412 // Create SNES 413 ierr = SNESCreate(comm, &snes); CHKERRQ(ierr); 414 ierr = SNESSetDM(snes, levelDMs[fineLevel]); CHKERRQ(ierr); 415 416 // -- Jacobian evaluators 417 ierr = PetscMalloc1(numLevels, &jacobCtx); CHKERRQ(ierr); 418 ierr = PetscMalloc1(numLevels, &jacobMat); CHKERRQ(ierr); 419 for (PetscInt level = 0; level < numLevels; level++) { 420 // -- Jacobian context for level 421 ierr = PetscMalloc1(1, &jacobCtx[level]); CHKERRQ(ierr); 422 ierr = SetupJacobianCtx(comm, appCtx, levelDMs[level], Ug[level], 423 Uloc[level], ceedData[level], ceed, ctxPhys, 424 ctxPhysSmoother, jacobCtx[level]); CHKERRQ(ierr); 425 426 // -- Form Action of Jacobian on delta_u 427 ierr = MatCreateShell(comm, Ulsz[level], Ulsz[level], Ugsz[level], 428 Ugsz[level], jacobCtx[level], &jacobMat[level]); 429 CHKERRQ(ierr); 430 ierr = MatShellSetOperation(jacobMat[level], MATOP_MULT, 431 (void (*)(void))ApplyJacobian_Ceed); 432 CHKERRQ(ierr); 433 ierr = MatShellSetOperation(jacobMat[level], MATOP_GET_DIAGONAL, 434 (void(*)(void))GetDiag_Ceed); 435 ierr = MatShellSetVecType(jacobMat[level], vectype); CHKERRQ(ierr); 436 } 437 // Note: FormJacobian updates Jacobian matrices on each level 438 // and assembles the Jpre matrix, if needed 439 ierr = PetscMalloc1(1, &formJacobCtx); CHKERRQ(ierr); 440 formJacobCtx->jacobCtx = jacobCtx; 441 formJacobCtx->numLevels = numLevels; 442 formJacobCtx->jacobMat = jacobMat; 443 444 // -- Residual evaluation function 445 ierr = PetscCalloc1(1, &resCtx); CHKERRQ(ierr); 446 ierr = PetscMemcpy(resCtx, jacobCtx[fineLevel], 447 sizeof(*jacobCtx[fineLevel])); CHKERRQ(ierr); 448 resCtx->op = ceedData[fineLevel]->opApply; 449 resCtx->qf = ceedData[fineLevel]->qfApply; 450 if (appCtx->bcTractionCount > 0) 451 resCtx->NBCs = NBCs; 452 else 453 resCtx->NBCs = NULL; 454 ierr = SNESSetFunction(snes, R, FormResidual_Ceed, resCtx); CHKERRQ(ierr); 455 456 // -- Prolongation/Restriction evaluation 457 ierr = PetscMalloc1(numLevels, &prolongRestrCtx); CHKERRQ(ierr); 458 ierr = PetscMalloc1(numLevels, &prolongRestrMat); CHKERRQ(ierr); 459 for (PetscInt level = 1; level < numLevels; level++) { 460 // ---- Prolongation/restriction context for level 461 ierr = PetscMalloc1(1, &prolongRestrCtx[level]); CHKERRQ(ierr); 462 ierr = SetupProlongRestrictCtx(comm, appCtx, levelDMs[level-1], 463 levelDMs[level], Ug[level], Uloc[level-1], 464 Uloc[level], ceedData[level-1], 465 ceedData[level], ceed, 466 prolongRestrCtx[level]); CHKERRQ(ierr); 467 468 // ---- Form Action of Jacobian on delta_u 469 ierr = MatCreateShell(comm, Ulsz[level], Ulsz[level-1], Ugsz[level], 470 Ugsz[level-1], prolongRestrCtx[level], 471 &prolongRestrMat[level]); CHKERRQ(ierr); 472 // Note: In PCMG, restriction is the transpose of prolongation 473 ierr = MatShellSetOperation(prolongRestrMat[level], MATOP_MULT, 474 (void (*)(void))Prolong_Ceed); 475 ierr = MatShellSetOperation(prolongRestrMat[level], MATOP_MULT_TRANSPOSE, 476 (void (*)(void))Restrict_Ceed); 477 CHKERRQ(ierr); 478 ierr = MatShellSetVecType(prolongRestrMat[level], vectype); CHKERRQ(ierr); 479 } 480 481 // --------------------------------------------------------------------------- 482 // Setup dummy SNES for AMG coarse solve 483 // --------------------------------------------------------------------------- 484 if (appCtx->multigridChoice != MULTIGRID_NONE) { 485 // -- Jacobian Matrix 486 ierr = DMSetMatType(levelDMs[0], MATAIJ); CHKERRQ(ierr); 487 ierr = DMCreateMatrix(levelDMs[0], &jacobMatCoarse); CHKERRQ(ierr); 488 489 if (appCtx->degree > 1) { 490 ierr = SNESCreate(comm, &snesCoarse); CHKERRQ(ierr); 491 ierr = SNESSetDM(snesCoarse, levelDMs[0]); CHKERRQ(ierr); 492 ierr = SNESSetSolution(snesCoarse, Ug[0]); CHKERRQ(ierr); 493 494 // -- Jacobian function 495 ierr = SNESSetJacobian(snesCoarse, jacobMatCoarse, jacobMatCoarse, NULL, 496 NULL); CHKERRQ(ierr); 497 498 // -- Residual evaluation function 499 ierr = PetscMalloc1(1, &jacobCoarseCtx); CHKERRQ(ierr); 500 ierr = PetscMemcpy(jacobCoarseCtx, jacobCtx[0], sizeof(*jacobCtx[0])); 501 CHKERRQ(ierr); 502 ierr = SNESSetFunction(snesCoarse, Ug[0], ApplyJacobianCoarse_Ceed, 503 jacobCoarseCtx); CHKERRQ(ierr); 504 505 // -- Update formJacobCtx 506 formJacobCtx->Ucoarse = Ug[0]; 507 formJacobCtx->snesCoarse = snesCoarse; 508 formJacobCtx->jacobMatCoarse = jacobMatCoarse; 509 } 510 } 511 512 // Set Jacobian function 513 if (appCtx->degree > 1) { 514 ierr = SNESSetJacobian(snes, jacobMat[fineLevel], jacobMat[fineLevel], 515 FormJacobian, formJacobCtx); CHKERRQ(ierr); 516 } else { 517 ierr = SNESSetJacobian(snes, jacobMat[0], jacobMatCoarse, 518 SNESComputeJacobianDefaultColor, NULL); 519 CHKERRQ(ierr); 520 } 521 522 // --------------------------------------------------------------------------- 523 // Setup KSP 524 // --------------------------------------------------------------------------- 525 { 526 PC pc; 527 KSP ksp; 528 529 // -- KSP 530 ierr = SNESGetKSP(snes, &ksp); CHKERRQ(ierr); 531 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 532 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 533 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 534 PETSC_DEFAULT); CHKERRQ(ierr); 535 ierr = KSPSetOptionsPrefix(ksp, "outer_"); CHKERRQ(ierr); 536 537 // -- Preconditioning 538 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 539 ierr = PCSetDM(pc, levelDMs[fineLevel]); CHKERRQ(ierr); 540 ierr = PCSetOptionsPrefix(pc, "outer_"); CHKERRQ(ierr); 541 542 if (appCtx->multigridChoice == MULTIGRID_NONE) { 543 // ---- No Multigrid 544 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 545 ierr = PCJacobiSetType(pc, PC_JACOBI_DIAGONAL); CHKERRQ(ierr); 546 } else if (appCtx->degree == 1) { 547 // ---- AMG for degree 1 548 ierr = PCSetType(pc, PCGAMG); CHKERRQ(ierr); 549 } else { 550 // ---- PCMG 551 ierr = PCSetType(pc, PCMG); CHKERRQ(ierr); 552 553 // ------ PCMG levels 554 ierr = PCMGSetLevels(pc, numLevels, NULL); CHKERRQ(ierr); 555 for (PetscInt level = 0; level < numLevels; level++) { 556 // -------- Smoother 557 KSP kspSmoother, kspEst; 558 PC pcSmoother; 559 560 // ---------- Smoother KSP 561 ierr = PCMGGetSmoother(pc, level, &kspSmoother); CHKERRQ(ierr); 562 ierr = KSPSetDM(kspSmoother, levelDMs[level]); CHKERRQ(ierr); 563 ierr = KSPSetDMActive(kspSmoother, PETSC_FALSE); CHKERRQ(ierr); 564 565 // ---------- Chebyshev options 566 ierr = KSPSetType(kspSmoother, KSPCHEBYSHEV); CHKERRQ(ierr); 567 ierr = KSPChebyshevEstEigSet(kspSmoother, 0, 0.1, 0, 1.1); 568 CHKERRQ(ierr); 569 ierr = KSPChebyshevEstEigGetKSP(kspSmoother, &kspEst); CHKERRQ(ierr); 570 ierr = KSPSetType(kspEst, KSPCG); CHKERRQ(ierr); 571 ierr = KSPChebyshevEstEigSetUseNoisy(kspSmoother, PETSC_TRUE); 572 CHKERRQ(ierr); 573 ierr = KSPSetOperators(kspSmoother, jacobMat[level], jacobMat[level]); 574 CHKERRQ(ierr); 575 576 // ---------- Smoother preconditioner 577 ierr = KSPGetPC(kspSmoother, &pcSmoother); CHKERRQ(ierr); 578 ierr = PCSetType(pcSmoother, PCJACOBI); CHKERRQ(ierr); 579 ierr = PCJacobiSetType(pcSmoother, PC_JACOBI_DIAGONAL); CHKERRQ(ierr); 580 581 // -------- Work vector 582 if (level != fineLevel) { 583 ierr = PCMGSetX(pc, level, Ug[level]); CHKERRQ(ierr); 584 } 585 586 // -------- Level prolongation/restriction operator 587 if (level > 0) { 588 ierr = PCMGSetInterpolation(pc, level, prolongRestrMat[level]); 589 CHKERRQ(ierr); 590 ierr = PCMGSetRestriction(pc, level, prolongRestrMat[level]); 591 CHKERRQ(ierr); 592 } 593 } 594 595 // ------ PCMG coarse solve 596 KSP kspCoarse; 597 PC pcCoarse; 598 599 // -------- Coarse KSP 600 ierr = PCMGGetCoarseSolve(pc, &kspCoarse); CHKERRQ(ierr); 601 ierr = KSPSetType(kspCoarse, KSPPREONLY); CHKERRQ(ierr); 602 ierr = KSPSetOperators(kspCoarse, jacobMatCoarse, jacobMatCoarse); 603 CHKERRQ(ierr); 604 ierr = KSPSetOptionsPrefix(kspCoarse, "coarse_"); CHKERRQ(ierr); 605 606 // -------- Coarse preconditioner 607 ierr = KSPGetPC(kspCoarse, &pcCoarse); CHKERRQ(ierr); 608 ierr = PCSetType(pcCoarse, PCGAMG); CHKERRQ(ierr); 609 ierr = PCSetOptionsPrefix(pcCoarse, "coarse_"); CHKERRQ(ierr); 610 611 ierr = KSPSetFromOptions(kspCoarse); CHKERRQ(ierr); 612 ierr = PCSetFromOptions(pcCoarse); CHKERRQ(ierr); 613 614 // ------ PCMG options 615 ierr = PCMGSetType(pc, PC_MG_MULTIPLICATIVE); CHKERRQ(ierr); 616 ierr = PCMGSetNumberSmooth(pc, 3); CHKERRQ(ierr); 617 ierr = PCMGSetCycleType(pc, pcmgCycleType); CHKERRQ(ierr); 618 } 619 ierr = KSPSetFromOptions(ksp); 620 ierr = PCSetFromOptions(pc); 621 } 622 { 623 // Default to critical-point (CP) line search (related to Wolfe's curvature condition) 624 SNESLineSearch linesearch; 625 626 ierr = SNESGetLineSearch(snes, &linesearch); CHKERRQ(ierr); 627 ierr = SNESLineSearchSetType(linesearch, SNESLINESEARCHCP); CHKERRQ(ierr); 628 } 629 630 ierr = SNESSetFromOptions(snes); CHKERRQ(ierr); 631 632 // Performance logging 633 ierr = PetscLogStagePop(); 634 635 // --------------------------------------------------------------------------- 636 // Set initial guess 637 // --------------------------------------------------------------------------- 638 ierr = PetscObjectSetName((PetscObject)U, ""); CHKERRQ(ierr); 639 ierr = VecSet(U, 0.0); CHKERRQ(ierr); 640 641 // View solution 642 if (appCtx->viewSoln) { 643 ierr = ViewSolution(comm, appCtx, U, 0, 0.0); CHKERRQ(ierr); 644 } 645 646 // --------------------------------------------------------------------------- 647 // Solve SNES 648 // --------------------------------------------------------------------------- 649 PetscBool snesMonitor = PETSC_FALSE; 650 ierr = PetscOptionsHasName(NULL, NULL, "-snes_monitor", &snesMonitor); 651 CHKERRQ(ierr); 652 653 // Performance logging 654 ierr = PetscLogStageRegister("SNES Solve Stage", &stageSnesSolve); 655 CHKERRQ(ierr); 656 ierr = PetscLogStagePush(stageSnesSolve); CHKERRQ(ierr); 657 658 // Timing 659 ierr = PetscBarrier((PetscObject)snes); CHKERRQ(ierr); 660 startTime = MPI_Wtime(); 661 662 // Solve for each load increment 663 PetscInt increment; 664 for (increment = 1; increment <= appCtx->numIncrements; increment++) { 665 // -- Log increment count 666 if (snesMonitor) { 667 ierr = PetscPrintf(comm, "%d Load Increment\n", increment - 1); 668 CHKERRQ(ierr); 669 } 670 671 // -- Scale the problem 672 PetscScalar loadIncrement = 1.0*increment / appCtx->numIncrements, 673 scalingFactor = loadIncrement / 674 (increment == 1 ? 1 : resCtx->loadIncrement); 675 resCtx->loadIncrement = loadIncrement; 676 if (appCtx->numIncrements > 1 && appCtx->forcingChoice != FORCE_NONE) { 677 ierr = VecScale(F, scalingFactor); CHKERRQ(ierr); 678 } 679 680 // -- Solve 681 ierr = SNESSolve(snes, F, U); CHKERRQ(ierr); 682 683 // -- View solution 684 if (appCtx->viewSoln) { 685 ierr = ViewSolution(comm, appCtx, U, increment, loadIncrement); CHKERRQ(ierr); 686 } 687 688 // -- Update SNES iteration count 689 PetscInt its; 690 ierr = SNESGetIterationNumber(snes, &its); CHKERRQ(ierr); 691 snesIts += its; 692 ierr = SNESGetLinearSolveIterations(snes, &its); CHKERRQ(ierr); 693 kspIts += its; 694 695 // -- Check for divergence 696 SNESConvergedReason reason; 697 ierr = SNESGetConvergedReason(snes, &reason); CHKERRQ(ierr); 698 if (reason < 0) 699 break; 700 } 701 702 // Timing 703 elapsedTime = MPI_Wtime() - startTime; 704 705 // Performance logging 706 ierr = PetscLogStagePop(); 707 708 // --------------------------------------------------------------------------- 709 // Output summary 710 // --------------------------------------------------------------------------- 711 if (!appCtx->testMode) { 712 // -- SNES 713 SNESType snesType; 714 SNESConvergedReason reason; 715 PetscReal rnorm; 716 ierr = SNESGetType(snes, &snesType); CHKERRQ(ierr); 717 ierr = SNESGetConvergedReason(snes, &reason); CHKERRQ(ierr); 718 ierr = SNESGetFunctionNorm(snes, &rnorm); CHKERRQ(ierr); 719 ierr = PetscPrintf(comm, 720 " SNES:\n" 721 " SNES Type : %s\n" 722 " SNES Convergence : %s\n" 723 " Number of Load Increments : %d\n" 724 " Completed Load Increments : %d\n" 725 " Total SNES Iterations : %D\n" 726 " Final rnorm : %e\n", 727 snesType, SNESConvergedReasons[reason], 728 appCtx->numIncrements, increment - 1, 729 snesIts, (double)rnorm); CHKERRQ(ierr); 730 731 // -- KSP 732 KSP ksp; 733 KSPType kspType; 734 ierr = SNESGetKSP(snes, &ksp); CHKERRQ(ierr); 735 ierr = KSPGetType(ksp, &kspType); CHKERRQ(ierr); 736 ierr = PetscPrintf(comm, 737 " Linear Solver:\n" 738 " KSP Type : %s\n" 739 " Total KSP Iterations : %D\n", 740 kspType, kspIts); CHKERRQ(ierr); 741 742 // -- PC 743 PC pc; 744 PCType pcType; 745 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 746 ierr = PCGetType(pc, &pcType); CHKERRQ(ierr); 747 ierr = PetscPrintf(comm, 748 " PC Type : %s\n", 749 pcType); CHKERRQ(ierr); 750 751 if (!strcmp(pcType, PCMG)) { 752 PCMGType pcmgType; 753 ierr = PCMGGetType(pc, &pcmgType); CHKERRQ(ierr); 754 ierr = PetscPrintf(comm, 755 " P-Multigrid:\n" 756 " PCMG Type : %s\n" 757 " PCMG Cycle Type : %s\n", 758 PCMGTypes[pcmgType], 759 PCMGCycleTypes[pcmgCycleType]); CHKERRQ(ierr); 760 761 // -- Coarse Solve 762 KSP kspCoarse; 763 PC pcCoarse; 764 PCType pcType; 765 766 ierr = PCMGGetCoarseSolve(pc, &kspCoarse); CHKERRQ(ierr); 767 ierr = KSPGetType(kspCoarse, &kspType); CHKERRQ(ierr); 768 ierr = KSPGetPC(kspCoarse, &pcCoarse); CHKERRQ(ierr); 769 ierr = PCGetType(pcCoarse, &pcType); CHKERRQ(ierr); 770 ierr = PetscPrintf(comm, 771 " Coarse Solve:\n" 772 " KSP Type : %s\n" 773 " PC Type : %s\n", 774 kspType, pcType); CHKERRQ(ierr); 775 } 776 } 777 778 // --------------------------------------------------------------------------- 779 // Compute solve time 780 // --------------------------------------------------------------------------- 781 if (!appCtx->testMode) { 782 ierr = MPI_Allreduce(&elapsedTime, &minTime, 1, MPI_DOUBLE, MPI_MIN, comm); 783 CHKERRQ(ierr); 784 ierr = MPI_Allreduce(&elapsedTime, &maxTime, 1, MPI_DOUBLE, MPI_MAX, comm); 785 CHKERRQ(ierr); 786 ierr = PetscPrintf(comm, 787 " Performance:\n" 788 " SNES Solve Time : %g (%g) sec\n" 789 " DoFs/Sec in SNES : %g (%g) million\n", 790 maxTime, minTime, 1e-6*Ugsz[fineLevel]*kspIts/maxTime, 791 1e-6*Ugsz[fineLevel]*kspIts/minTime); CHKERRQ(ierr); 792 } 793 794 // --------------------------------------------------------------------------- 795 // Compute error 796 // --------------------------------------------------------------------------- 797 if (appCtx->forcingChoice == FORCE_MMS) { 798 CeedScalar l2Error = 1., l2Unorm = 1.; 799 const CeedScalar *truearray; 800 Vec errorVec, trueVec; 801 802 // -- Work vectors 803 ierr = VecDuplicate(U, &errorVec); CHKERRQ(ierr); 804 ierr = VecSet(errorVec, 0.0); CHKERRQ(ierr); 805 ierr = VecDuplicate(U, &trueVec); CHKERRQ(ierr); 806 ierr = VecSet(trueVec, 0.0); CHKERRQ(ierr); 807 808 // -- Assemble global true solution vector 809 CeedVectorGetArrayRead(ceedData[fineLevel]->truesoln, 810 CEED_MEM_HOST, &truearray); 811 ierr = VecPlaceArray(resCtx->Yloc, (PetscScalar *)truearray); 812 CHKERRQ(ierr); 813 ierr = DMLocalToGlobal(resCtx->dm, resCtx->Yloc, INSERT_VALUES, trueVec); 814 CHKERRQ(ierr); 815 ierr = VecResetArray(resCtx->Yloc); CHKERRQ(ierr); 816 CeedVectorRestoreArrayRead(ceedData[fineLevel]->truesoln, &truearray); 817 818 // -- Compute L2 error 819 ierr = VecWAXPY(errorVec, -1.0, U, trueVec); CHKERRQ(ierr); 820 ierr = VecNorm(errorVec, NORM_2, &l2Error); CHKERRQ(ierr); 821 ierr = VecNorm(U, NORM_2, &l2Unorm); CHKERRQ(ierr); 822 l2Error /= l2Unorm; 823 824 // -- Output 825 if (!appCtx->testMode || l2Error > 0.05) { 826 ierr = PetscPrintf(comm, 827 " L2 Error : %e\n", 828 l2Error); CHKERRQ(ierr); 829 } 830 831 // -- Cleanup 832 ierr = VecDestroy(&errorVec); CHKERRQ(ierr); 833 ierr = VecDestroy(&trueVec); CHKERRQ(ierr); 834 } 835 836 // --------------------------------------------------------------------------- 837 // Compute energy 838 // --------------------------------------------------------------------------- 839 if (!appCtx->testMode) { 840 // -- Compute L2 error 841 CeedScalar energy; 842 ierr = ComputeStrainEnergy(dmEnergy, resCtx, ceedData[fineLevel]->opEnergy, 843 U, &energy); CHKERRQ(ierr); 844 845 // -- Output 846 ierr = PetscPrintf(comm, 847 " Strain Energy : %.12e\n", 848 energy); CHKERRQ(ierr); 849 } 850 851 // --------------------------------------------------------------------------- 852 // Output diagnostic quantities 853 // --------------------------------------------------------------------------- 854 if (appCtx->viewSoln || appCtx->viewFinalSoln) { 855 // -- Setup context 856 UserMult diagnosticCtx; 857 ierr = PetscMalloc1(1, &diagnosticCtx); CHKERRQ(ierr); 858 ierr = PetscMemcpy(diagnosticCtx, resCtx, sizeof(*resCtx)); CHKERRQ(ierr); 859 diagnosticCtx->dm = dmDiagnostic; 860 diagnosticCtx->op = ceedData[fineLevel]->opDiagnostic; 861 862 // -- Compute and output 863 ierr = ViewDiagnosticQuantities(comm, levelDMs[fineLevel], diagnosticCtx, 864 appCtx, U, 865 ceedData[fineLevel]->ErestrictDiagnostic); 866 CHKERRQ(ierr); 867 868 // -- Cleanup 869 ierr = PetscFree(diagnosticCtx); CHKERRQ(ierr); 870 } 871 872 // --------------------------------------------------------------------------- 873 // Free objects 874 // --------------------------------------------------------------------------- 875 // Data in arrays per level 876 for (PetscInt level = 0; level < numLevels; level++) { 877 // Vectors 878 ierr = VecDestroy(&Ug[level]); CHKERRQ(ierr); 879 ierr = VecDestroy(&Uloc[level]); CHKERRQ(ierr); 880 881 // Jacobian matrix and data 882 ierr = VecDestroy(&jacobCtx[level]->Yloc); CHKERRQ(ierr); 883 ierr = MatDestroy(&jacobMat[level]); CHKERRQ(ierr); 884 ierr = PetscFree(jacobCtx[level]); CHKERRQ(ierr); 885 886 // Prolongation/Restriction matrix and data 887 if (level > 0) { 888 ierr = PetscFree(prolongRestrCtx[level]); CHKERRQ(ierr); 889 ierr = MatDestroy(&prolongRestrMat[level]); CHKERRQ(ierr); 890 } 891 892 // DM 893 ierr = DMDestroy(&levelDMs[level]); CHKERRQ(ierr); 894 895 // libCEED objects 896 ierr = CeedDataDestroy(level, ceedData[level]); CHKERRQ(ierr); 897 } 898 899 // Arrays 900 ierr = PetscFree(Ug); CHKERRQ(ierr); 901 ierr = PetscFree(Uloc); CHKERRQ(ierr); 902 ierr = PetscFree(Ugsz); CHKERRQ(ierr); 903 ierr = PetscFree(Ulsz); CHKERRQ(ierr); 904 ierr = PetscFree(Ulocsz); CHKERRQ(ierr); 905 ierr = PetscFree(jacobCtx); CHKERRQ(ierr); 906 ierr = PetscFree(jacobMat); CHKERRQ(ierr); 907 ierr = PetscFree(prolongRestrCtx); CHKERRQ(ierr); 908 ierr = PetscFree(prolongRestrMat); CHKERRQ(ierr); 909 ierr = PetscFree(appCtx->levelDegrees); CHKERRQ(ierr); 910 ierr = PetscFree(ceedData); CHKERRQ(ierr); 911 912 // libCEED objects 913 CeedQFunctionContextDestroy(&ctxPhys); 914 CeedQFunctionContextDestroy(&ctxPhysSmoother); 915 CeedDestroy(&ceed); 916 917 // PETSc objects 918 ierr = VecDestroy(&U); CHKERRQ(ierr); 919 ierr = VecDestroy(&R); CHKERRQ(ierr); 920 ierr = VecDestroy(&Rloc); CHKERRQ(ierr); 921 ierr = VecDestroy(&F); CHKERRQ(ierr); 922 ierr = VecDestroy(&Floc); CHKERRQ(ierr); 923 ierr = VecDestroy(&NBCs); CHKERRQ(ierr); 924 ierr = VecDestroy(&NBCsloc); CHKERRQ(ierr); 925 ierr = MatDestroy(&jacobMatCoarse); CHKERRQ(ierr); 926 ierr = SNESDestroy(&snes); CHKERRQ(ierr); 927 ierr = SNESDestroy(&snesCoarse); CHKERRQ(ierr); 928 ierr = DMDestroy(&dmOrig); CHKERRQ(ierr); 929 ierr = DMDestroy(&dmEnergy); CHKERRQ(ierr); 930 ierr = DMDestroy(&dmDiagnostic); CHKERRQ(ierr); 931 ierr = PetscFree(levelDMs); CHKERRQ(ierr); 932 933 // Structs 934 ierr = PetscFree(resCtx); CHKERRQ(ierr); 935 ierr = PetscFree(formJacobCtx); CHKERRQ(ierr); 936 ierr = PetscFree(jacobCoarseCtx); CHKERRQ(ierr); 937 ierr = PetscFree(appCtx); CHKERRQ(ierr); 938 ierr = PetscFree(phys); CHKERRQ(ierr); 939 ierr = PetscFree(physSmoother); CHKERRQ(ierr); 940 ierr = PetscFree(units); CHKERRQ(ierr); 941 942 return PetscFinalize(); 943 } 944