1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: Elasticity 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve 20 // elasticity problems. 21 // 22 // The code uses higher level communication protocols in DMPlex. 23 // 24 // Build with: 25 // 26 // make elasticity [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 27 // 28 // Sample runs: 29 // 30 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -problem linElas -forcing mms 31 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -bc_clamp 998,999 -bc_clamp_998_translate 0.1,0.2,0.3 -problem hyperSS -forcing none -ceed /cpu/self 32 // ./elasticity -mesh [.exo file] -degree 2 -E 1 -nu 0.3 -bc_clamp 998,999 -bc_clamp_998_rotate 1,0,0,0.2 -problem hyperFS -forcing none -ceed /gpu/occa 33 // 34 // Sample meshes can be found at https://github.com/jeremylt/ceedSampleMeshes 35 // 36 //TESTARGS -ceed {ceed_resource} -test -degree 3 -nu 0.3 -E 1 -dm_plex_box_faces 3,3,3 37 38 /// @file 39 /// CEED elasticity example using PETSc with DMPlex 40 41 const char help[] = "Solve solid Problems with CEED and PETSc DMPlex\n"; 42 43 #include "elasticity.h" 44 45 int main(int argc, char **argv) { 46 PetscInt ierr; 47 MPI_Comm comm; 48 // Context structs 49 AppCtx appCtx; // Contains problem options 50 Physics phys; // Contains physical constants 51 Physics physSmoother = NULL; // Separate context if nuSmoother set 52 Units units; // Contains units scaling 53 // PETSc objects 54 PetscLogStage stageDMSetup, stageLibceedSetup, 55 stageSnesSetup, stageSnesSolve; 56 DM dmOrig; // Distributed DM to clone 57 DM dmEnergy, dmDiagnostic; // DMs for postprocessing 58 DM *levelDMs; 59 Vec U, *Ug, *Uloc; // U: solution, R: residual, F: forcing 60 Vec R, Rloc, F, Floc; // g: global, loc: local 61 SNES snes, snesCoarse = NULL; 62 Mat *jacobMat, jacobMatCoarse, *prolongRestrMat; 63 // PETSc data 64 UserMult resCtx, jacobCoarseCtx = NULL, *jacobCtx; 65 FormJacobCtx formJacobCtx; 66 UserMultProlongRestr *prolongRestrCtx; 67 PCMGCycleType pcmgCycleType = PC_MG_CYCLE_V; 68 // libCEED objects 69 Ceed ceed; 70 CeedData *ceedData; 71 CeedQFunctionContext ctxPhys, ctxPhysSmoother = NULL; 72 // Parameters 73 PetscInt ncompu = 3; // 3 DoFs in 3D 74 PetscInt ncompe = 1, ncompd = 5; // 1 energy output, 5 diagnostic 75 PetscInt numLevels = 1, fineLevel = 0; 76 PetscInt *Ugsz, *Ulsz, *Ulocsz; // sz: size 77 PetscInt snesIts = 0, kspIts = 0; 78 // Timing 79 double startTime, elapsedTime, minTime, maxTime; 80 81 ierr = PetscInitialize(&argc, &argv, NULL, help); 82 if (ierr) 83 return ierr; 84 85 // --------------------------------------------------------------------------- 86 // Process command line options 87 // --------------------------------------------------------------------------- 88 comm = PETSC_COMM_WORLD; 89 90 // -- Set mesh file, polynomial degree, problem type 91 ierr = PetscCalloc1(1, &appCtx); CHKERRQ(ierr); 92 ierr = ProcessCommandLineOptions(comm, appCtx); CHKERRQ(ierr); 93 numLevels = appCtx->numLevels; 94 fineLevel = numLevels - 1; 95 96 // -- Set Poison's ratio, Young's Modulus 97 ierr = PetscMalloc1(1, &phys); CHKERRQ(ierr); 98 ierr = PetscMalloc1(1, &units); CHKERRQ(ierr); 99 ierr = ProcessPhysics(comm, phys, units); CHKERRQ(ierr); 100 if (fabs(appCtx->nuSmoother) > 1E-14) { 101 ierr = PetscMalloc1(1, &physSmoother); CHKERRQ(ierr); 102 ierr = PetscMemcpy(physSmoother, phys, sizeof(*phys)); CHKERRQ(ierr); 103 physSmoother->nu = appCtx->nuSmoother; 104 } 105 106 // --------------------------------------------------------------------------- 107 // Initalize libCEED 108 // --------------------------------------------------------------------------- 109 // Initalize backend 110 CeedInit(appCtx->ceedResource, &ceed); 111 112 // Check preferred MemType 113 CeedMemType memTypeBackend; 114 CeedGetPreferredMemType(ceed, &memTypeBackend); 115 if (!appCtx->setMemTypeRequest) 116 appCtx->memTypeRequested = memTypeBackend; 117 else if (!appCtx->petscHaveCuda && appCtx->memTypeRequested == CEED_MEM_DEVICE) 118 SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS, 119 "PETSc was not built with CUDA. " 120 "Requested MemType CEED_MEM_DEVICE is not supported.", NULL); 121 122 // Wrap context in libCEED objects 123 CeedQFunctionContextCreate(ceed, &ctxPhys); 124 CeedQFunctionContextSetData(ctxPhys, CEED_MEM_HOST, CEED_USE_POINTER, 125 sizeof(*phys), phys); 126 if (physSmoother) { 127 CeedQFunctionContextCreate(ceed, &ctxPhysSmoother); 128 CeedQFunctionContextSetData(ctxPhysSmoother, CEED_MEM_HOST, CEED_USE_POINTER, 129 sizeof(*physSmoother), physSmoother); 130 } 131 132 // --------------------------------------------------------------------------- 133 // Setup DM 134 // --------------------------------------------------------------------------- 135 // Performance logging 136 ierr = PetscLogStageRegister("DM and Vector Setup Stage", &stageDMSetup); 137 CHKERRQ(ierr); 138 ierr = PetscLogStagePush(stageDMSetup); CHKERRQ(ierr); 139 140 // -- Create distributed DM from mesh file 141 ierr = CreateDistributedDM(comm, appCtx, &dmOrig); CHKERRQ(ierr); 142 143 // -- Setup DM by polynomial degree 144 ierr = PetscMalloc1(numLevels, &levelDMs); CHKERRQ(ierr); 145 for (PetscInt level = 0; level < numLevels; level++) { 146 ierr = DMClone(dmOrig, &levelDMs[level]); CHKERRQ(ierr); 147 if (appCtx->memTypeRequested == CEED_MEM_DEVICE) { 148 ierr = DMSetVecType(levelDMs[level], VECCUDA); CHKERRQ(ierr); 149 } 150 ierr = SetupDMByDegree(levelDMs[level], appCtx, appCtx->levelDegrees[level], 151 PETSC_TRUE, ncompu); CHKERRQ(ierr); 152 // -- Label field components for viewing 153 // Empty name for conserved field (because there is only one field) 154 PetscSection section; 155 ierr = DMGetLocalSection(levelDMs[level], §ion); CHKERRQ(ierr); 156 ierr = PetscSectionSetFieldName(section, 0, "Displacement"); CHKERRQ(ierr); 157 ierr = PetscSectionSetComponentName(section, 0, 0, "DisplacementX"); 158 CHKERRQ(ierr); 159 ierr = PetscSectionSetComponentName(section, 0, 1, "DisplacementY"); 160 CHKERRQ(ierr); 161 ierr = PetscSectionSetComponentName(section, 0, 2, "DisplacementZ"); 162 CHKERRQ(ierr); 163 } 164 165 // -- Setup postprocessing DMs 166 ierr = DMClone(dmOrig, &dmEnergy); CHKERRQ(ierr); 167 ierr = SetupDMByDegree(dmEnergy, appCtx, appCtx->levelDegrees[fineLevel], 168 PETSC_FALSE, ncompe); CHKERRQ(ierr); 169 ierr = DMClone(dmOrig, &dmDiagnostic); CHKERRQ(ierr); 170 ierr = SetupDMByDegree(dmDiagnostic, appCtx, appCtx->levelDegrees[fineLevel], 171 PETSC_FALSE, ncompu + ncompd); CHKERRQ(ierr); 172 if (appCtx->memTypeRequested == CEED_MEM_DEVICE) { 173 ierr = DMSetVecType(dmEnergy, VECCUDA); CHKERRQ(ierr); 174 ierr = DMSetVecType(dmDiagnostic, VECCUDA); CHKERRQ(ierr); 175 } 176 { 177 // -- Label field components for viewing 178 // Empty name for conserved field (because there is only one field) 179 PetscSection section; 180 ierr = DMGetLocalSection(dmDiagnostic, §ion); CHKERRQ(ierr); 181 ierr = PetscSectionSetFieldName(section, 0, "Diagnostics"); CHKERRQ(ierr); 182 ierr = PetscSectionSetComponentName(section, 0, 0, "DisplacementX"); 183 CHKERRQ(ierr); 184 ierr = PetscSectionSetComponentName(section, 0, 1, "DisplacementY"); 185 CHKERRQ(ierr); 186 ierr = PetscSectionSetComponentName(section, 0, 2, "DisplacementZ"); 187 CHKERRQ(ierr); 188 ierr = PetscSectionSetComponentName(section, 0, 3, "Pressure"); 189 CHKERRQ(ierr); 190 ierr = PetscSectionSetComponentName(section, 0, 4, "VolumentricStrain"); 191 CHKERRQ(ierr); 192 ierr = PetscSectionSetComponentName(section, 0, 5, "TraceE2"); 193 CHKERRQ(ierr); 194 ierr = PetscSectionSetComponentName(section, 0, 6, "detJ"); 195 CHKERRQ(ierr); 196 ierr = PetscSectionSetComponentName(section, 0, 7, "StrainEnergyDensity"); 197 CHKERRQ(ierr); 198 } 199 200 // --------------------------------------------------------------------------- 201 // Setup solution and work vectors 202 // --------------------------------------------------------------------------- 203 // Allocate arrays 204 ierr = PetscMalloc1(numLevels, &Ug); CHKERRQ(ierr); 205 ierr = PetscMalloc1(numLevels, &Uloc); CHKERRQ(ierr); 206 ierr = PetscMalloc1(numLevels, &Ugsz); CHKERRQ(ierr); 207 ierr = PetscMalloc1(numLevels, &Ulsz); CHKERRQ(ierr); 208 ierr = PetscMalloc1(numLevels, &Ulocsz); CHKERRQ(ierr); 209 210 // -- Setup solution vectors for each level 211 for (PetscInt level = 0; level < numLevels; level++) { 212 // -- Create global unknown vector U 213 ierr = DMCreateGlobalVector(levelDMs[level], &Ug[level]); CHKERRQ(ierr); 214 ierr = VecGetSize(Ug[level], &Ugsz[level]); CHKERRQ(ierr); 215 // Note: Local size for matShell 216 ierr = VecGetLocalSize(Ug[level], &Ulsz[level]); CHKERRQ(ierr); 217 218 // -- Create local unknown vector Uloc 219 ierr = DMCreateLocalVector(levelDMs[level], &Uloc[level]); CHKERRQ(ierr); 220 // Note: local size for libCEED 221 ierr = VecGetSize(Uloc[level], &Ulocsz[level]); CHKERRQ(ierr); 222 } 223 224 // -- Create residual and forcing vectors 225 ierr = VecDuplicate(Ug[fineLevel], &U); CHKERRQ(ierr); 226 ierr = VecDuplicate(Ug[fineLevel], &R); CHKERRQ(ierr); 227 ierr = VecDuplicate(Ug[fineLevel], &F); CHKERRQ(ierr); 228 ierr = VecDuplicate(Uloc[fineLevel], &Rloc); CHKERRQ(ierr); 229 ierr = VecDuplicate(Uloc[fineLevel], &Floc); CHKERRQ(ierr); 230 231 // Performance logging 232 ierr = PetscLogStagePop(); 233 234 // --------------------------------------------------------------------------- 235 // Set up libCEED 236 // --------------------------------------------------------------------------- 237 // Performance logging 238 ierr = PetscLogStageRegister("libCEED Setup Stage", &stageLibceedSetup); 239 CHKERRQ(ierr); 240 ierr = PetscLogStagePush(stageLibceedSetup); CHKERRQ(ierr); 241 242 // -- Create libCEED local forcing vector 243 CeedVector forceCeed; 244 CeedScalar *f; 245 if (appCtx->forcingChoice != FORCE_NONE) { 246 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 247 ierr = VecGetArray(Floc, &f); CHKERRQ(ierr); 248 } else { 249 ierr = VecCUDAGetArray(Floc, &f); CHKERRQ(ierr); 250 } 251 CeedVectorCreate(ceed, Ulocsz[fineLevel], &forceCeed); 252 CeedVectorSetArray(forceCeed, appCtx->memTypeRequested, CEED_USE_POINTER, f); 253 } 254 255 // -- Setup libCEED objects 256 ierr = PetscMalloc1(numLevels, &ceedData); CHKERRQ(ierr); 257 // ---- Setup residual, Jacobian evaluator and geometric information 258 ierr = PetscCalloc1(1, &ceedData[fineLevel]); CHKERRQ(ierr); 259 { 260 ierr = SetupLibceedFineLevel(levelDMs[fineLevel], dmEnergy, dmDiagnostic, 261 ceed, appCtx, ctxPhys, ceedData, fineLevel, 262 ncompu, Ugsz[fineLevel], Ulocsz[fineLevel], 263 forceCeed); 264 CHKERRQ(ierr); 265 } 266 // ---- Setup coarse Jacobian evaluator and prolongation/restriction 267 for (PetscInt level = numLevels - 2; level >= 0; level--) { 268 ierr = PetscCalloc1(1, &ceedData[level]); CHKERRQ(ierr); 269 270 // Get global communication restriction 271 ierr = VecZeroEntries(Ug[level+1]); CHKERRQ(ierr); 272 ierr = VecSet(Uloc[level+1], 1.0); CHKERRQ(ierr); 273 ierr = DMLocalToGlobal(levelDMs[level+1], Uloc[level+1], ADD_VALUES, 274 Ug[level+1]); CHKERRQ(ierr); 275 ierr = DMGlobalToLocal(levelDMs[level+1], Ug[level+1], INSERT_VALUES, 276 Uloc[level+1]); CHKERRQ(ierr); 277 278 // Place in libCEED array 279 const PetscScalar *m; 280 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 281 ierr = VecGetArrayRead(Uloc[level+1], &m); CHKERRQ(ierr); 282 } else { 283 ierr = VecCUDAGetArrayRead(Uloc[level+1], &m); CHKERRQ(ierr); 284 } 285 CeedVectorSetArray(ceedData[level+1]->xceed, appCtx->memTypeRequested, 286 CEED_USE_POINTER, (CeedScalar *)m); 287 288 // Note: use high order ceed, if specified and degree > 4 289 ierr = SetupLibceedLevel(levelDMs[level], ceed, appCtx, 290 ceedData, level, ncompu, Ugsz[level], 291 Ulocsz[level], ceedData[level+1]->xceed); 292 CHKERRQ(ierr); 293 294 // Restore PETSc vector 295 CeedVectorTakeArray(ceedData[level+1]->xceed, appCtx->memTypeRequested, 296 (CeedScalar **)&m); 297 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 298 ierr = VecRestoreArrayRead(Uloc[level+1], &m); CHKERRQ(ierr); 299 } else { 300 ierr = VecCUDARestoreArrayRead(Uloc[level+1], &m); CHKERRQ(ierr); 301 } 302 ierr = VecZeroEntries(Ug[level+1]); CHKERRQ(ierr); 303 ierr = VecZeroEntries(Uloc[level+1]); CHKERRQ(ierr); 304 } 305 306 // Performance logging 307 ierr = PetscLogStagePop(); 308 309 // --------------------------------------------------------------------------- 310 // Setup global forcing vector 311 // --------------------------------------------------------------------------- 312 ierr = VecZeroEntries(F); CHKERRQ(ierr); 313 314 if (appCtx->forcingChoice != FORCE_NONE) { 315 CeedVectorTakeArray(forceCeed, appCtx->memTypeRequested, NULL); 316 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 317 ierr = VecRestoreArray(Floc, &f); CHKERRQ(ierr); 318 } else { 319 ierr = VecCUDARestoreArray(Floc, &f); CHKERRQ(ierr); 320 } 321 ierr = DMLocalToGlobal(levelDMs[fineLevel], Floc, ADD_VALUES, F); 322 CHKERRQ(ierr); 323 CeedVectorDestroy(&forceCeed); 324 } 325 326 // --------------------------------------------------------------------------- 327 // Print problem summary 328 // --------------------------------------------------------------------------- 329 if (!appCtx->testMode) { 330 const char *usedresource; 331 CeedGetResource(ceed, &usedresource); 332 333 ierr = PetscPrintf(comm, 334 "\n-- Elasticity Example - libCEED + PETSc --\n" 335 " libCEED:\n" 336 " libCEED Backend : %s\n" 337 " libCEED Backend MemType : %s\n" 338 " libCEED User Requested MemType : %s\n", 339 usedresource, CeedMemTypes[memTypeBackend], 340 (appCtx->setMemTypeRequest) ? 341 CeedMemTypes[appCtx->memTypeRequested] : "none"); 342 CHKERRQ(ierr); 343 344 VecType vecType; 345 ierr = VecGetType(U, &vecType); CHKERRQ(ierr); 346 ierr = PetscPrintf(comm, 347 " PETSc:\n" 348 " PETSc Vec Type : %s\n", 349 vecType); CHKERRQ(ierr); 350 351 ierr = PetscPrintf(comm, 352 " Problem:\n" 353 " Problem Name : %s\n" 354 " Forcing Function : %s\n" 355 " Mesh:\n" 356 " File : %s\n" 357 " Number of 1D Basis Nodes (p) : %d\n" 358 " Number of 1D Quadrature Points (q) : %d\n" 359 " Global nodes : %D\n" 360 " Owned nodes : %D\n" 361 " DoF per node : %D\n" 362 " Multigrid:\n" 363 " Type : %s\n" 364 " Number of Levels : %d\n", 365 problemTypesForDisp[appCtx->problemChoice], 366 forcingTypesForDisp[appCtx->forcingChoice], 367 appCtx->meshFile[0] ? appCtx->meshFile : "Box Mesh", 368 appCtx->degree + 1, appCtx->degree + 1, 369 Ugsz[fineLevel]/ncompu, Ulsz[fineLevel]/ncompu, ncompu, 370 (appCtx->degree == 1 && 371 appCtx->multigridChoice != MULTIGRID_NONE) ? 372 "Algebraic multigrid" : 373 multigridTypesForDisp[appCtx->multigridChoice], 374 (appCtx->degree == 1 || 375 appCtx->multigridChoice == MULTIGRID_NONE) ? 376 0 : numLevels); CHKERRQ(ierr); 377 378 if (appCtx->multigridChoice != MULTIGRID_NONE) { 379 for (PetscInt i = 0; i < 2; i++) { 380 CeedInt level = i ? fineLevel : 0; 381 ierr = PetscPrintf(comm, 382 " Level %D (%s):\n" 383 " Number of 1D Basis Nodes (p) : %d\n" 384 " Global Nodes : %D\n" 385 " Owned Nodes : %D\n", 386 level, i ? "fine" : "coarse", 387 appCtx->levelDegrees[level] + 1, 388 Ugsz[level]/ncompu, Ulsz[level]/ncompu); 389 CHKERRQ(ierr); 390 } 391 } 392 } 393 394 // --------------------------------------------------------------------------- 395 // Setup SNES 396 // --------------------------------------------------------------------------- 397 // Performance logging 398 ierr = PetscLogStageRegister("SNES Setup Stage", &stageSnesSetup); 399 CHKERRQ(ierr); 400 ierr = PetscLogStagePush(stageSnesSetup); CHKERRQ(ierr); 401 402 // Create SNES 403 ierr = SNESCreate(comm, &snes); CHKERRQ(ierr); 404 ierr = SNESSetDM(snes, levelDMs[fineLevel]); CHKERRQ(ierr); 405 406 // -- Jacobian evaluators 407 ierr = PetscMalloc1(numLevels, &jacobCtx); CHKERRQ(ierr); 408 ierr = PetscMalloc1(numLevels, &jacobMat); CHKERRQ(ierr); 409 for (PetscInt level = 0; level < numLevels; level++) { 410 // -- Jacobian context for level 411 ierr = PetscMalloc1(1, &jacobCtx[level]); CHKERRQ(ierr); 412 ierr = SetupJacobianCtx(comm, appCtx, levelDMs[level], Ug[level], 413 Uloc[level], ceedData[level], ceed, ctxPhys, 414 ctxPhysSmoother, jacobCtx[level]); CHKERRQ(ierr); 415 416 // -- Form Action of Jacobian on delta_u 417 ierr = MatCreateShell(comm, Ulsz[level], Ulsz[level], Ugsz[level], 418 Ugsz[level], jacobCtx[level], &jacobMat[level]); 419 CHKERRQ(ierr); 420 ierr = MatShellSetOperation(jacobMat[level], MATOP_MULT, 421 (void (*)(void))ApplyJacobian_Ceed); 422 CHKERRQ(ierr); 423 ierr = MatShellSetOperation(jacobMat[level], MATOP_GET_DIAGONAL, 424 (void(*)(void))GetDiag_Ceed); 425 if (appCtx->memTypeRequested == CEED_MEM_DEVICE) { 426 ierr = MatShellSetVecType(jacobMat[level], VECCUDA); CHKERRQ(ierr); 427 } 428 } 429 // Note: FormJacobian updates Jacobian matrices on each level 430 // and assembles the Jpre matrix, if needed 431 ierr = PetscMalloc1(1, &formJacobCtx); CHKERRQ(ierr); 432 formJacobCtx->jacobCtx = jacobCtx; 433 formJacobCtx->numLevels = numLevels; 434 formJacobCtx->jacobMat = jacobMat; 435 436 // -- Residual evaluation function 437 ierr = PetscMalloc1(1, &resCtx); CHKERRQ(ierr); 438 ierr = PetscMemcpy(resCtx, jacobCtx[fineLevel], 439 sizeof(*jacobCtx[fineLevel])); CHKERRQ(ierr); 440 resCtx->op = ceedData[fineLevel]->opApply; 441 resCtx->qf = ceedData[fineLevel]->qfApply; 442 ierr = SNESSetFunction(snes, R, FormResidual_Ceed, resCtx); CHKERRQ(ierr); 443 444 // -- Prolongation/Restriction evaluation 445 ierr = PetscMalloc1(numLevels, &prolongRestrCtx); CHKERRQ(ierr); 446 ierr = PetscMalloc1(numLevels, &prolongRestrMat); CHKERRQ(ierr); 447 for (PetscInt level = 1; level < numLevels; level++) { 448 // ---- Prolongation/restriction context for level 449 ierr = PetscMalloc1(1, &prolongRestrCtx[level]); CHKERRQ(ierr); 450 ierr = SetupProlongRestrictCtx(comm, appCtx, levelDMs[level-1], 451 levelDMs[level], Ug[level], Uloc[level-1], 452 Uloc[level], ceedData[level-1], 453 ceedData[level], ceed, 454 prolongRestrCtx[level]); CHKERRQ(ierr); 455 456 // ---- Form Action of Jacobian on delta_u 457 ierr = MatCreateShell(comm, Ulsz[level], Ulsz[level-1], Ugsz[level], 458 Ugsz[level-1], prolongRestrCtx[level], 459 &prolongRestrMat[level]); CHKERRQ(ierr); 460 // Note: In PCMG, restriction is the transpose of prolongation 461 ierr = MatShellSetOperation(prolongRestrMat[level], MATOP_MULT, 462 (void (*)(void))Prolong_Ceed); 463 ierr = MatShellSetOperation(prolongRestrMat[level], MATOP_MULT_TRANSPOSE, 464 (void (*)(void))Restrict_Ceed); 465 CHKERRQ(ierr); 466 if (appCtx->memTypeRequested == CEED_MEM_DEVICE) { 467 ierr = MatShellSetVecType(prolongRestrMat[level], VECCUDA); CHKERRQ(ierr); 468 } 469 } 470 471 // --------------------------------------------------------------------------- 472 // Setup dummy SNES for AMG coarse solve 473 // --------------------------------------------------------------------------- 474 if (appCtx->multigridChoice != MULTIGRID_NONE) { 475 // -- Jacobian Matrix 476 ierr = DMSetMatType(levelDMs[0], MATAIJ); CHKERRQ(ierr); 477 ierr = DMCreateMatrix(levelDMs[0], &jacobMatCoarse); CHKERRQ(ierr); 478 479 if (appCtx->degree > 1) { 480 ierr = SNESCreate(comm, &snesCoarse); CHKERRQ(ierr); 481 ierr = SNESSetDM(snesCoarse, levelDMs[0]); CHKERRQ(ierr); 482 ierr = SNESSetSolution(snesCoarse, Ug[0]); CHKERRQ(ierr); 483 484 // -- Jacobian function 485 ierr = SNESSetJacobian(snesCoarse, jacobMatCoarse, jacobMatCoarse, NULL, 486 NULL); CHKERRQ(ierr); 487 488 // -- Residual evaluation function 489 ierr = PetscMalloc1(1, &jacobCoarseCtx); CHKERRQ(ierr); 490 ierr = PetscMemcpy(jacobCoarseCtx, jacobCtx[0], sizeof(*jacobCtx[0])); 491 CHKERRQ(ierr); 492 ierr = SNESSetFunction(snesCoarse, Ug[0], ApplyJacobianCoarse_Ceed, 493 jacobCoarseCtx); CHKERRQ(ierr); 494 495 // -- Update formJacobCtx 496 formJacobCtx->Ucoarse = Ug[0]; 497 formJacobCtx->snesCoarse = snesCoarse; 498 formJacobCtx->jacobMatCoarse = jacobMatCoarse; 499 } 500 } 501 502 // Set Jacobian function 503 if (appCtx->degree > 1) { 504 ierr = SNESSetJacobian(snes, jacobMat[fineLevel], jacobMat[fineLevel], 505 FormJacobian, formJacobCtx); CHKERRQ(ierr); 506 } else { 507 ierr = SNESSetJacobian(snes, jacobMat[0], jacobMatCoarse, 508 SNESComputeJacobianDefaultColor, NULL); 509 CHKERRQ(ierr); 510 } 511 512 // --------------------------------------------------------------------------- 513 // Setup KSP 514 // --------------------------------------------------------------------------- 515 { 516 PC pc; 517 KSP ksp; 518 519 // -- KSP 520 ierr = SNESGetKSP(snes, &ksp); CHKERRQ(ierr); 521 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 522 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 523 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 524 PETSC_DEFAULT); CHKERRQ(ierr); 525 ierr = KSPSetOptionsPrefix(ksp, "outer_"); CHKERRQ(ierr); 526 527 // -- Preconditioning 528 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 529 ierr = PCSetDM(pc, levelDMs[fineLevel]); CHKERRQ(ierr); 530 ierr = PCSetOptionsPrefix(pc, "outer_"); CHKERRQ(ierr); 531 532 if (appCtx->multigridChoice == MULTIGRID_NONE) { 533 // ---- No Multigrid 534 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 535 ierr = PCJacobiSetType(pc, PC_JACOBI_DIAGONAL); CHKERRQ(ierr); 536 } else if (appCtx->degree == 1) { 537 // ---- AMG for degree 1 538 ierr = PCSetType(pc, PCGAMG); CHKERRQ(ierr); 539 } else { 540 // ---- PCMG 541 ierr = PCSetType(pc, PCMG); CHKERRQ(ierr); 542 543 // ------ PCMG levels 544 ierr = PCMGSetLevels(pc, numLevels, NULL); CHKERRQ(ierr); 545 for (PetscInt level = 0; level < numLevels; level++) { 546 // -------- Smoother 547 KSP kspSmoother, kspEst; 548 PC pcSmoother; 549 550 // ---------- Smoother KSP 551 ierr = PCMGGetSmoother(pc, level, &kspSmoother); CHKERRQ(ierr); 552 ierr = KSPSetDM(kspSmoother, levelDMs[level]); CHKERRQ(ierr); 553 ierr = KSPSetDMActive(kspSmoother, PETSC_FALSE); CHKERRQ(ierr); 554 555 // ---------- Chebyshev options 556 ierr = KSPSetType(kspSmoother, KSPCHEBYSHEV); CHKERRQ(ierr); 557 ierr = KSPChebyshevEstEigSet(kspSmoother, 0, 0.1, 0, 1.1); 558 CHKERRQ(ierr); 559 ierr = KSPChebyshevEstEigGetKSP(kspSmoother, &kspEst); CHKERRQ(ierr); 560 ierr = KSPSetType(kspEst, KSPCG); CHKERRQ(ierr); 561 ierr = KSPChebyshevEstEigSetUseNoisy(kspSmoother, PETSC_TRUE); 562 CHKERRQ(ierr); 563 ierr = KSPSetOperators(kspSmoother, jacobMat[level], jacobMat[level]); 564 CHKERRQ(ierr); 565 566 // ---------- Smoother preconditioner 567 ierr = KSPGetPC(kspSmoother, &pcSmoother); CHKERRQ(ierr); 568 ierr = PCSetType(pcSmoother, PCJACOBI); CHKERRQ(ierr); 569 ierr = PCJacobiSetType(pcSmoother, PC_JACOBI_DIAGONAL); CHKERRQ(ierr); 570 571 // -------- Work vector 572 if (level != fineLevel) { 573 ierr = PCMGSetX(pc, level, Ug[level]); CHKERRQ(ierr); 574 } 575 576 // -------- Level prolongation/restriction operator 577 if (level > 0) { 578 ierr = PCMGSetInterpolation(pc, level, prolongRestrMat[level]); 579 CHKERRQ(ierr); 580 ierr = PCMGSetRestriction(pc, level, prolongRestrMat[level]); 581 CHKERRQ(ierr); 582 } 583 } 584 585 // ------ PCMG coarse solve 586 KSP kspCoarse; 587 PC pcCoarse; 588 589 // -------- Coarse KSP 590 ierr = PCMGGetCoarseSolve(pc, &kspCoarse); CHKERRQ(ierr); 591 ierr = KSPSetType(kspCoarse, KSPPREONLY); CHKERRQ(ierr); 592 ierr = KSPSetOperators(kspCoarse, jacobMatCoarse, jacobMatCoarse); 593 CHKERRQ(ierr); 594 ierr = KSPSetOptionsPrefix(kspCoarse, "coarse_"); CHKERRQ(ierr); 595 596 // -------- Coarse preconditioner 597 ierr = KSPGetPC(kspCoarse, &pcCoarse); CHKERRQ(ierr); 598 ierr = PCSetType(pcCoarse, PCGAMG); CHKERRQ(ierr); 599 ierr = PCSetOptionsPrefix(pcCoarse, "coarse_"); CHKERRQ(ierr); 600 601 ierr = KSPSetFromOptions(kspCoarse); CHKERRQ(ierr); 602 ierr = PCSetFromOptions(pcCoarse); CHKERRQ(ierr); 603 604 // ------ PCMG options 605 ierr = PCMGSetType(pc, PC_MG_MULTIPLICATIVE); CHKERRQ(ierr); 606 ierr = PCMGSetNumberSmooth(pc, 3); CHKERRQ(ierr); 607 ierr = PCMGSetCycleType(pc, pcmgCycleType); CHKERRQ(ierr); 608 } 609 ierr = KSPSetFromOptions(ksp); 610 ierr = PCSetFromOptions(pc); 611 } 612 { 613 // Default to critical-point (CP) line search (related to Wolfe's curvature condition) 614 SNESLineSearch linesearch; 615 616 ierr = SNESGetLineSearch(snes, &linesearch); CHKERRQ(ierr); 617 ierr = SNESLineSearchSetType(linesearch, SNESLINESEARCHCP); CHKERRQ(ierr); 618 } 619 620 ierr = SNESSetFromOptions(snes); CHKERRQ(ierr); 621 622 // Performance logging 623 ierr = PetscLogStagePop(); 624 625 // --------------------------------------------------------------------------- 626 // Set initial guess 627 // --------------------------------------------------------------------------- 628 ierr = PetscObjectSetName((PetscObject)U, ""); CHKERRQ(ierr); 629 ierr = VecSet(U, 0.0); CHKERRQ(ierr); 630 631 // View solution 632 if (appCtx->viewSoln) { 633 ierr = ViewSolution(comm, U, 0, 0.0); CHKERRQ(ierr); 634 } 635 636 // --------------------------------------------------------------------------- 637 // Solve SNES 638 // --------------------------------------------------------------------------- 639 PetscBool snesMonitor = PETSC_FALSE; 640 ierr = PetscOptionsHasName(NULL, NULL, "-snes_monitor", &snesMonitor); 641 CHKERRQ(ierr); 642 643 // Performance logging 644 ierr = PetscLogStageRegister("SNES Solve Stage", &stageSnesSolve); 645 CHKERRQ(ierr); 646 ierr = PetscLogStagePush(stageSnesSolve); CHKERRQ(ierr); 647 648 // Timing 649 ierr = PetscBarrier((PetscObject)snes); CHKERRQ(ierr); 650 startTime = MPI_Wtime(); 651 652 // Solve for each load increment 653 PetscInt increment; 654 for (increment = 1; increment <= appCtx->numIncrements; increment++) { 655 // -- Log increment count 656 if (snesMonitor) { 657 ierr = PetscPrintf(comm, "%d Load Increment\n", increment - 1); 658 CHKERRQ(ierr); 659 } 660 661 // -- Scale the problem 662 PetscScalar loadIncrement = 1.0*increment / appCtx->numIncrements, 663 scalingFactor = loadIncrement / 664 (increment == 1 ? 1 : resCtx->loadIncrement); 665 resCtx->loadIncrement = loadIncrement; 666 if (appCtx->numIncrements > 1 && appCtx->forcingChoice != FORCE_NONE) { 667 ierr = VecScale(F, scalingFactor); CHKERRQ(ierr); 668 } 669 670 // -- Solve 671 ierr = SNESSolve(snes, F, U); CHKERRQ(ierr); 672 673 // -- View solution 674 if (appCtx->viewSoln) { 675 ierr = ViewSolution(comm, U, increment, loadIncrement); CHKERRQ(ierr); 676 } 677 678 // -- Update SNES iteration count 679 PetscInt its; 680 ierr = SNESGetIterationNumber(snes, &its); CHKERRQ(ierr); 681 snesIts += its; 682 ierr = SNESGetLinearSolveIterations(snes, &its); CHKERRQ(ierr); 683 kspIts += its; 684 685 // -- Check for divergence 686 SNESConvergedReason reason; 687 ierr = SNESGetConvergedReason(snes, &reason); CHKERRQ(ierr); 688 if (reason < 0) 689 break; 690 } 691 692 // Timing 693 elapsedTime = MPI_Wtime() - startTime; 694 695 // Performance logging 696 ierr = PetscLogStagePop(); 697 698 // --------------------------------------------------------------------------- 699 // Output summary 700 // --------------------------------------------------------------------------- 701 if (!appCtx->testMode) { 702 // -- SNES 703 SNESType snesType; 704 SNESConvergedReason reason; 705 PetscReal rnorm; 706 ierr = SNESGetType(snes, &snesType); CHKERRQ(ierr); 707 ierr = SNESGetConvergedReason(snes, &reason); CHKERRQ(ierr); 708 ierr = SNESGetFunctionNorm(snes, &rnorm); CHKERRQ(ierr); 709 ierr = PetscPrintf(comm, 710 " SNES:\n" 711 " SNES Type : %s\n" 712 " SNES Convergence : %s\n" 713 " Number of Load Increments : %d\n" 714 " Completed Load Increments : %d\n" 715 " Total SNES Iterations : %D\n" 716 " Final rnorm : %e\n", 717 snesType, SNESConvergedReasons[reason], 718 appCtx->numIncrements, increment - 1, 719 snesIts, (double)rnorm); CHKERRQ(ierr); 720 721 // -- KSP 722 KSP ksp; 723 KSPType kspType; 724 ierr = SNESGetKSP(snes, &ksp); CHKERRQ(ierr); 725 ierr = KSPGetType(ksp, &kspType); CHKERRQ(ierr); 726 ierr = PetscPrintf(comm, 727 " Linear Solver:\n" 728 " KSP Type : %s\n" 729 " Total KSP Iterations : %D\n", 730 kspType, kspIts); CHKERRQ(ierr); 731 732 // -- PC 733 PC pc; 734 PCType pcType; 735 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 736 ierr = PCGetType(pc, &pcType); CHKERRQ(ierr); 737 ierr = PetscPrintf(comm, 738 " PC Type : %s\n", 739 pcType); CHKERRQ(ierr); 740 741 if (!strcmp(pcType, PCMG)) { 742 PCMGType pcmgType; 743 ierr = PCMGGetType(pc, &pcmgType); CHKERRQ(ierr); 744 ierr = PetscPrintf(comm, 745 " P-Multigrid:\n" 746 " PCMG Type : %s\n" 747 " PCMG Cycle Type : %s\n", 748 PCMGTypes[pcmgType], 749 PCMGCycleTypes[pcmgCycleType]); CHKERRQ(ierr); 750 751 // -- Coarse Solve 752 KSP kspCoarse; 753 PC pcCoarse; 754 PCType pcType; 755 756 ierr = PCMGGetCoarseSolve(pc, &kspCoarse); CHKERRQ(ierr); 757 ierr = KSPGetType(kspCoarse, &kspType); CHKERRQ(ierr); 758 ierr = KSPGetPC(kspCoarse, &pcCoarse); CHKERRQ(ierr); 759 ierr = PCGetType(pcCoarse, &pcType); CHKERRQ(ierr); 760 ierr = PetscPrintf(comm, 761 " Coarse Solve:\n" 762 " KSP Type : %s\n" 763 " PC Type : %s\n", 764 kspType, pcType); CHKERRQ(ierr); 765 } 766 } 767 768 // --------------------------------------------------------------------------- 769 // Compute solve time 770 // --------------------------------------------------------------------------- 771 if (!appCtx->testMode) { 772 ierr = MPI_Allreduce(&elapsedTime, &minTime, 1, MPI_DOUBLE, MPI_MIN, comm); 773 CHKERRQ(ierr); 774 ierr = MPI_Allreduce(&elapsedTime, &maxTime, 1, MPI_DOUBLE, MPI_MAX, comm); 775 CHKERRQ(ierr); 776 ierr = PetscPrintf(comm, 777 " Performance:\n" 778 " SNES Solve Time : %g (%g) sec\n" 779 " DoFs/Sec in SNES : %g (%g) million\n", 780 maxTime, minTime, 1e-6*Ugsz[fineLevel]*kspIts/maxTime, 781 1e-6*Ugsz[fineLevel]*kspIts/minTime); CHKERRQ(ierr); 782 } 783 784 // --------------------------------------------------------------------------- 785 // Compute error 786 // --------------------------------------------------------------------------- 787 if (appCtx->forcingChoice == FORCE_MMS) { 788 CeedScalar l2Error = 1., l2Unorm = 1.; 789 const CeedScalar *truearray; 790 Vec errorVec, trueVec; 791 792 // -- Work vectors 793 ierr = VecDuplicate(U, &errorVec); CHKERRQ(ierr); 794 ierr = VecSet(errorVec, 0.0); CHKERRQ(ierr); 795 ierr = VecDuplicate(U, &trueVec); CHKERRQ(ierr); 796 ierr = VecSet(trueVec, 0.0); CHKERRQ(ierr); 797 798 // -- Assemble global true solution vector 799 CeedVectorGetArrayRead(ceedData[fineLevel]->truesoln, 800 appCtx->memTypeRequested, &truearray); 801 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 802 ierr = VecPlaceArray(resCtx->Yloc, (PetscScalar *)truearray); 803 CHKERRQ(ierr); 804 } else { 805 ierr = VecCUDAPlaceArray(resCtx->Yloc, (PetscScalar *)truearray); 806 CHKERRQ(ierr); 807 } 808 ierr = DMLocalToGlobal(resCtx->dm, resCtx->Yloc, INSERT_VALUES, trueVec); 809 CHKERRQ(ierr); 810 if (appCtx->memTypeRequested == CEED_MEM_HOST) { 811 ierr = VecResetArray(resCtx->Yloc); CHKERRQ(ierr); 812 } else { 813 ierr = VecCUDAResetArray(resCtx->Yloc); CHKERRQ(ierr); 814 } 815 CeedVectorRestoreArrayRead(ceedData[fineLevel]->truesoln, &truearray); 816 817 // -- Compute L2 error 818 ierr = VecWAXPY(errorVec, -1.0, U, trueVec); CHKERRQ(ierr); 819 ierr = VecNorm(errorVec, NORM_2, &l2Error); CHKERRQ(ierr); 820 ierr = VecNorm(U, NORM_2, &l2Unorm); CHKERRQ(ierr); 821 l2Error /= l2Unorm; 822 823 // -- Output 824 if (!appCtx->testMode || l2Error > 0.05) { 825 ierr = PetscPrintf(comm, 826 " L2 Error : %e\n", 827 l2Error); CHKERRQ(ierr); 828 } 829 830 // -- Cleanup 831 ierr = VecDestroy(&errorVec); CHKERRQ(ierr); 832 ierr = VecDestroy(&trueVec); CHKERRQ(ierr); 833 } 834 835 // --------------------------------------------------------------------------- 836 // Compute energy 837 // --------------------------------------------------------------------------- 838 if (!appCtx->testMode) { 839 // -- Compute L2 error 840 CeedScalar energy; 841 ierr = ComputeStrainEnergy(dmEnergy, resCtx, ceedData[fineLevel]->opEnergy, 842 U, &energy); CHKERRQ(ierr); 843 844 // -- Output 845 ierr = PetscPrintf(comm, 846 " Strain Energy : %e\n", 847 energy); CHKERRQ(ierr); 848 } 849 850 // --------------------------------------------------------------------------- 851 // Output diagnostic quantities 852 // --------------------------------------------------------------------------- 853 if (appCtx->viewSoln || appCtx->viewFinalSoln) { 854 // -- Setup context 855 UserMult diagnosticCtx; 856 ierr = PetscMalloc1(1, &diagnosticCtx); CHKERRQ(ierr); 857 ierr = PetscMemcpy(diagnosticCtx, resCtx, sizeof(*resCtx)); CHKERRQ(ierr); 858 diagnosticCtx->dm = dmDiagnostic; 859 diagnosticCtx->op = ceedData[fineLevel]->opDiagnostic; 860 861 // -- Compute and output 862 ierr = ViewDiagnosticQuantities(comm, levelDMs[fineLevel], diagnosticCtx, U, 863 ceedData[fineLevel]->ErestrictDiagnostic); 864 CHKERRQ(ierr); 865 866 // -- Cleanup 867 ierr = PetscFree(diagnosticCtx); CHKERRQ(ierr); 868 } 869 870 // --------------------------------------------------------------------------- 871 // Free objects 872 // --------------------------------------------------------------------------- 873 // Data in arrays per level 874 for (PetscInt level = 0; level < numLevels; level++) { 875 // Vectors 876 ierr = VecDestroy(&Ug[level]); CHKERRQ(ierr); 877 ierr = VecDestroy(&Uloc[level]); CHKERRQ(ierr); 878 879 // Jacobian matrix and data 880 ierr = VecDestroy(&jacobCtx[level]->Yloc); CHKERRQ(ierr); 881 ierr = MatDestroy(&jacobMat[level]); CHKERRQ(ierr); 882 ierr = PetscFree(jacobCtx[level]); CHKERRQ(ierr); 883 884 // Prolongation/Restriction matrix and data 885 if (level > 0) { 886 ierr = PetscFree(prolongRestrCtx[level]); CHKERRQ(ierr); 887 ierr = MatDestroy(&prolongRestrMat[level]); CHKERRQ(ierr); 888 } 889 890 // DM 891 ierr = DMDestroy(&levelDMs[level]); CHKERRQ(ierr); 892 893 // libCEED objects 894 ierr = CeedDataDestroy(level, ceedData[level]); CHKERRQ(ierr); 895 } 896 897 // Arrays 898 ierr = PetscFree(Ug); CHKERRQ(ierr); 899 ierr = PetscFree(Uloc); CHKERRQ(ierr); 900 ierr = PetscFree(Ugsz); CHKERRQ(ierr); 901 ierr = PetscFree(Ulsz); CHKERRQ(ierr); 902 ierr = PetscFree(Ulocsz); CHKERRQ(ierr); 903 ierr = PetscFree(jacobCtx); CHKERRQ(ierr); 904 ierr = PetscFree(jacobMat); CHKERRQ(ierr); 905 ierr = PetscFree(prolongRestrCtx); CHKERRQ(ierr); 906 ierr = PetscFree(prolongRestrMat); CHKERRQ(ierr); 907 ierr = PetscFree(appCtx->levelDegrees); CHKERRQ(ierr); 908 ierr = PetscFree(ceedData); CHKERRQ(ierr); 909 910 // libCEED objects 911 CeedQFunctionContextDestroy(&ctxPhys); 912 CeedQFunctionContextDestroy(&ctxPhysSmoother); 913 CeedDestroy(&ceed); 914 915 // PETSc objects 916 ierr = VecDestroy(&U); CHKERRQ(ierr); 917 ierr = VecDestroy(&R); CHKERRQ(ierr); 918 ierr = VecDestroy(&Rloc); CHKERRQ(ierr); 919 ierr = VecDestroy(&F); CHKERRQ(ierr); 920 ierr = VecDestroy(&Floc); CHKERRQ(ierr); 921 ierr = MatDestroy(&jacobMatCoarse); CHKERRQ(ierr); 922 ierr = SNESDestroy(&snes); CHKERRQ(ierr); 923 ierr = SNESDestroy(&snesCoarse); CHKERRQ(ierr); 924 ierr = DMDestroy(&dmOrig); CHKERRQ(ierr); 925 ierr = DMDestroy(&dmEnergy); CHKERRQ(ierr); 926 ierr = DMDestroy(&dmDiagnostic); CHKERRQ(ierr); 927 ierr = PetscFree(levelDMs); CHKERRQ(ierr); 928 929 // Structs 930 ierr = PetscFree(resCtx); CHKERRQ(ierr); 931 ierr = PetscFree(formJacobCtx); CHKERRQ(ierr); 932 ierr = PetscFree(jacobCoarseCtx); CHKERRQ(ierr); 933 ierr = PetscFree(appCtx); CHKERRQ(ierr); 934 ierr = PetscFree(phys); CHKERRQ(ierr); 935 ierr = PetscFree(physSmoother); CHKERRQ(ierr); 936 ierr = PetscFree(units); CHKERRQ(ierr); 937 938 return PetscFinalize(); 939 } 940