1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: CEED BPs 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve the 20 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 21 // 22 // The code is intentionally "raw", using only low-level communication 23 // primitives. 24 // 25 // Build with: 26 // 27 // make bpsraw [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 28 // 29 // Sample runs: 30 // 31 // ./bpsraw -problem bp1 32 // ./bpsraw -problem bp2 33 // ./bpsraw -problem bp3 34 // ./bpsraw -problem bp4 35 // ./bpsraw -problem bp5 -ceed /cpu/self 36 // ./bpsraw -problem bp6 -ceed /gpu/cuda 37 // 38 //TESTARGS -ceed {ceed_resource} -test -problem bp2 -degree 5 -qextra 1 -ksp_max_it_clip 15,15 39 40 /// @file 41 /// CEED BPs example using PETSc 42 /// See bps.c for an implementation using DMPlex unstructured grids. 43 const char help[] = "Solve CEED BPs using PETSc\n"; 44 45 #include <ceed.h> 46 #include <petscksp.h> 47 #include <petscsys.h> 48 #include <stdbool.h> 49 #include <string.h> 50 #include "qfunctions/bps/bp1.h" 51 #include "qfunctions/bps/bp2.h" 52 #include "qfunctions/bps/bp3.h" 53 #include "qfunctions/bps/bp4.h" 54 #include "qfunctions/bps/common.h" 55 56 #if PETSC_VERSION_LT(3,12,0) 57 #ifdef PETSC_HAVE_CUDA 58 #include <petsccuda.h> 59 // Note: With PETSc prior to version 3.12.0, providing the source path to 60 // include 'cublas_v2.h' will be needed to use 'petsccuda.h'. 61 #endif 62 #endif 63 64 static CeedMemType MemTypeP2C(PetscMemType mtype) { 65 return PetscMemTypeDevice(mtype) ? CEED_MEM_DEVICE : CEED_MEM_HOST; 66 } 67 68 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 69 for (PetscInt d=0,sizeleft=size; d<3; d++) { 70 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 71 while (try * (sizeleft / try) != sizeleft) try++; 72 m[reverse ? 2-d : d] = try; 73 sizeleft /= try; 74 } 75 } 76 77 static PetscInt Max3(const PetscInt a[3]) { 78 return PetscMax(a[0], PetscMax(a[1], a[2])); 79 } 80 static PetscInt Min3(const PetscInt a[3]) { 81 return PetscMin(a[0], PetscMin(a[1], a[2])); 82 } 83 static void GlobalNodes(const PetscInt p[3], const PetscInt irank[3], 84 PetscInt degree, const PetscInt melem[3], 85 PetscInt mnodes[3]) { 86 for (int d=0; d<3; d++) 87 mnodes[d] = degree*melem[d] + (irank[d] == p[d]-1); 88 } 89 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3], 90 PetscInt degree, const PetscInt melem[3]) { 91 PetscInt start = 0; 92 // Dumb brute-force is easier to read 93 for (PetscInt i=0; i<p[0]; i++) { 94 for (PetscInt j=0; j<p[1]; j++) { 95 for (PetscInt k=0; k<p[2]; k++) { 96 PetscInt mnodes[3], ijkrank[] = {i,j,k}; 97 if (i == irank[0] && j == irank[1] && k == irank[2]) return start; 98 GlobalNodes(p, ijkrank, degree, melem, mnodes); 99 start += mnodes[0] * mnodes[1] * mnodes[2]; 100 } 101 } 102 } 103 return -1; 104 } 105 static int CreateRestriction(Ceed ceed, const CeedInt melem[3], CeedInt P, 106 CeedInt ncomp, CeedElemRestriction *Erestrict) { 107 const PetscInt nelem = melem[0]*melem[1]*melem[2]; 108 PetscInt mnodes[3], *idx, *idxp; 109 110 // Get indicies 111 for (int d=0; d<3; d++) mnodes[d] = melem[d]*(P-1) + 1; 112 idxp = idx = malloc(nelem*P*P*P*sizeof idx[0]); 113 for (CeedInt i=0; i<melem[0]; i++) 114 for (CeedInt j=0; j<melem[1]; j++) 115 for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P) 116 for (CeedInt ii=0; ii<P; ii++) 117 for (CeedInt jj=0; jj<P; jj++) 118 for (CeedInt kk=0; kk<P; kk++) { 119 if (0) { // This is the C-style (i,j,k) ordering that I prefer 120 idxp[(ii*P+jj)*P+kk] = ncomp*(((i*(P-1)+ii)*mnodes[1] 121 + (j*(P-1)+jj))*mnodes[2] 122 + (k*(P-1)+kk)); 123 } else { // (k,j,i) ordering for consistency with MFEM example 124 idxp[ii+P*(jj+P*kk)] = ncomp*(((i*(P-1)+ii)*mnodes[1] 125 + (j*(P-1)+jj))*mnodes[2] 126 + (k*(P-1)+kk)); 127 } 128 } 129 130 // Setup CEED restriction 131 CeedElemRestrictionCreate(ceed, nelem, P*P*P, ncomp, 1, 132 mnodes[0]*mnodes[1]*mnodes[2]*ncomp, 133 CEED_MEM_HOST, CEED_OWN_POINTER, idx, Erestrict); 134 135 PetscFunctionReturn(0); 136 } 137 138 // Data for PETSc 139 typedef struct User_ *User; 140 struct User_ { 141 MPI_Comm comm; 142 VecScatter ltog; // Scatter for all entries 143 VecScatter ltog0; // Skip Dirichlet values 144 VecScatter gtogD; // global-to-global; only Dirichlet values 145 Vec Xloc, Yloc; 146 CeedVector xceed, yceed; 147 CeedOperator op; 148 CeedVector qdata; 149 Ceed ceed; 150 }; 151 152 // BP Options 153 typedef enum { 154 CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, 155 CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 156 } bpType; 157 static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6", 158 "bpType","CEED_BP",0 159 }; 160 161 // BP specific data 162 typedef struct { 163 CeedInt ncompu, qdatasize, qextra; 164 CeedQFunctionUser setupgeo, setuprhs, apply, error; 165 const char *setupgeofname, *setuprhsfname, *applyfname, *errorfname; 166 CeedEvalMode inmode, outmode; 167 CeedQuadMode qmode; 168 } bpData; 169 170 bpData bpOptions[6] = { 171 [CEED_BP1] = { 172 .ncompu = 1, 173 .qdatasize = 1, 174 .qextra = 1, 175 .setupgeo = SetupMassGeo, 176 .setuprhs = SetupMassRhs, 177 .apply = Mass, 178 .error = Error, 179 .setupgeofname = SetupMassGeo_loc, 180 .setuprhsfname = SetupMassRhs_loc, 181 .applyfname = Mass_loc, 182 .errorfname = Error_loc, 183 .inmode = CEED_EVAL_INTERP, 184 .outmode = CEED_EVAL_INTERP, 185 .qmode = CEED_GAUSS 186 }, 187 [CEED_BP2] = { 188 .ncompu = 3, 189 .qdatasize = 1, 190 .qextra = 1, 191 .setupgeo = SetupMassGeo, 192 .setuprhs = SetupMassRhs3, 193 .apply = Mass3, 194 .error = Error3, 195 .setupgeofname = SetupMassGeo_loc, 196 .setuprhsfname = SetupMassRhs3_loc, 197 .applyfname = Mass3_loc, 198 .errorfname = Error3_loc, 199 .inmode = CEED_EVAL_INTERP, 200 .outmode = CEED_EVAL_INTERP, 201 .qmode = CEED_GAUSS 202 }, 203 [CEED_BP3] = { 204 .ncompu = 1, 205 .qdatasize = 6, 206 .qextra = 1, 207 .setupgeo = SetupDiffGeo, 208 .setuprhs = SetupDiffRhs, 209 .apply = Diff, 210 .error = Error, 211 .setupgeofname = SetupDiffGeo_loc, 212 .setuprhsfname = SetupDiffRhs_loc, 213 .applyfname = Diff_loc, 214 .errorfname = Error_loc, 215 .inmode = CEED_EVAL_GRAD, 216 .outmode = CEED_EVAL_GRAD, 217 .qmode = CEED_GAUSS 218 }, 219 [CEED_BP4] = { 220 .ncompu = 3, 221 .qdatasize = 6, 222 .qextra = 1, 223 .setupgeo = SetupDiffGeo, 224 .setuprhs = SetupDiffRhs3, 225 .apply = Diff3, 226 .error = Error3, 227 .setupgeofname = SetupDiffGeo_loc, 228 .setuprhsfname = SetupDiffRhs3_loc, 229 .applyfname = Diff3_loc, 230 .errorfname = Error3_loc, 231 .inmode = CEED_EVAL_GRAD, 232 .outmode = CEED_EVAL_GRAD, 233 .qmode = CEED_GAUSS 234 }, 235 [CEED_BP5] = { 236 .ncompu = 1, 237 .qdatasize = 6, 238 .qextra = 0, 239 .setupgeo = SetupDiffGeo, 240 .setuprhs = SetupDiffRhs, 241 .apply = Diff, 242 .error = Error, 243 .setupgeofname = SetupDiffGeo_loc, 244 .setuprhsfname = SetupDiffRhs_loc, 245 .applyfname = Diff_loc, 246 .errorfname = Error_loc, 247 .inmode = CEED_EVAL_GRAD, 248 .outmode = CEED_EVAL_GRAD, 249 .qmode = CEED_GAUSS_LOBATTO 250 }, 251 [CEED_BP6] = { 252 .ncompu = 3, 253 .qdatasize = 6, 254 .qextra = 0, 255 .setupgeo = SetupDiffGeo, 256 .setuprhs = SetupDiffRhs3, 257 .apply = Diff3, 258 .error = Error3, 259 .setupgeofname = SetupDiffGeo_loc, 260 .setuprhsfname = SetupDiffRhs3_loc, 261 .applyfname = Diff3_loc, 262 .errorfname = Error3_loc, 263 .inmode = CEED_EVAL_GRAD, 264 .outmode = CEED_EVAL_GRAD, 265 .qmode = CEED_GAUSS_LOBATTO 266 } 267 }; 268 269 // This function uses libCEED to compute the action of the mass matrix 270 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) { 271 PetscErrorCode ierr; 272 User user; 273 PetscScalar *x, *y; 274 PetscMemType xmemtype, ymemtype; 275 276 PetscFunctionBeginUser; 277 278 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 279 280 // Global-to-local 281 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 282 SCATTER_REVERSE); CHKERRQ(ierr); 283 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, 284 SCATTER_REVERSE); CHKERRQ(ierr); 285 286 // Setup libCEED vectors 287 ierr = VecGetArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x, 288 &xmemtype); CHKERRQ(ierr); 289 ierr = VecGetArrayAndMemType(user->Yloc, &y, &ymemtype); CHKERRQ(ierr); 290 CeedVectorSetArray(user->xceed, MemTypeP2C(xmemtype), CEED_USE_POINTER, x); 291 CeedVectorSetArray(user->yceed, MemTypeP2C(ymemtype), CEED_USE_POINTER, y); 292 293 // Apply libCEED operator 294 CeedOperatorApply(user->op, user->xceed, user->yceed, 295 CEED_REQUEST_IMMEDIATE); 296 297 // Restore PETSc vectors 298 CeedVectorTakeArray(user->xceed, MemTypeP2C(xmemtype), NULL); 299 CeedVectorTakeArray(user->yceed, MemTypeP2C(ymemtype), NULL); 300 ierr = VecRestoreArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x); 301 CHKERRQ(ierr); 302 ierr = VecRestoreArrayAndMemType(user->Yloc, &y); CHKERRQ(ierr); 303 304 // Local-to-global 305 if (Y) { 306 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 307 ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, 308 SCATTER_FORWARD); CHKERRQ(ierr); 309 ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, 310 SCATTER_FORWARD); CHKERRQ(ierr); 311 } 312 PetscFunctionReturn(0); 313 } 314 315 // This function uses libCEED to compute the action of the Laplacian with 316 // Dirichlet boundary conditions 317 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { 318 PetscErrorCode ierr; 319 User user; 320 PetscScalar *x, *y; 321 PetscMemType xmemtype, ymemtype; 322 323 PetscFunctionBeginUser; 324 325 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 326 327 // Global-to-local 328 ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES, 329 SCATTER_REVERSE); CHKERRQ(ierr); 330 ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES, 331 SCATTER_REVERSE); 332 CHKERRQ(ierr); 333 334 // Setup libCEED vectors 335 ierr = VecGetArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x, 336 &xmemtype); CHKERRQ(ierr); 337 ierr = VecGetArrayAndMemType(user->Yloc, &y, &ymemtype); CHKERRQ(ierr); 338 CeedVectorSetArray(user->xceed, MemTypeP2C(xmemtype), CEED_USE_POINTER, x); 339 CeedVectorSetArray(user->yceed, MemTypeP2C(ymemtype), CEED_USE_POINTER, y); 340 341 // Apply libCEED operator 342 CeedOperatorApply(user->op, user->xceed, user->yceed, 343 CEED_REQUEST_IMMEDIATE); 344 345 // Restore PETSc vectors 346 CeedVectorTakeArray(user->xceed, MemTypeP2C(xmemtype), NULL); 347 CeedVectorTakeArray(user->yceed, MemTypeP2C(ymemtype), NULL); 348 ierr = VecRestoreArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x); 349 CHKERRQ(ierr); 350 ierr = VecRestoreArrayAndMemType(user->Yloc, &y); CHKERRQ(ierr); 351 352 // Local-to-global 353 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 354 ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 355 CHKERRQ(ierr); 356 ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 357 CHKERRQ(ierr); 358 ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, 359 SCATTER_FORWARD); CHKERRQ(ierr); 360 ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 361 CHKERRQ(ierr); 362 363 PetscFunctionReturn(0); 364 } 365 366 // This function calculates the error in the final solution 367 static PetscErrorCode ComputeErrorMax(User user, CeedOperator operror, Vec X, 368 CeedVector target, PetscReal *maxerror) { 369 PetscErrorCode ierr; 370 PetscScalar *x; 371 PetscMemType memtype; 372 CeedVector collocated_error; 373 CeedInt length; 374 375 PetscFunctionBeginUser; 376 377 CeedVectorGetLength(target, &length); 378 CeedVectorCreate(user->ceed, length, &collocated_error); 379 380 // Global-to-local 381 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 382 SCATTER_REVERSE); CHKERRQ(ierr); 383 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, 384 SCATTER_REVERSE); CHKERRQ(ierr); 385 386 // Setup libCEED vector 387 ierr = VecGetArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x, 388 &memtype); CHKERRQ(ierr); 389 CeedVectorSetArray(user->xceed, MemTypeP2C(memtype), CEED_USE_POINTER, x); 390 391 // Apply libCEED operator 392 CeedOperatorApply(operror, user->xceed, collocated_error, 393 CEED_REQUEST_IMMEDIATE); 394 395 // Restore PETSc vector 396 CeedVectorTakeArray(user->xceed, MemTypeP2C(memtype), NULL); 397 ierr = VecRestoreArrayReadAndMemType(user->Xloc, (const PetscScalar **)&x); 398 CHKERRQ(ierr); 399 400 // Reduce max error 401 *maxerror = 0; 402 const CeedScalar *e; 403 CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); 404 for (CeedInt i=0; i<length; i++) { 405 *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i])); 406 } 407 CeedVectorRestoreArrayRead(collocated_error, &e); 408 ierr = MPI_Allreduce(MPI_IN_PLACE, maxerror, 1, MPIU_REAL, MPIU_MAX, 409 user->comm); CHKERRQ(ierr); 410 411 // Cleanup 412 CeedVectorDestroy(&collocated_error); 413 414 PetscFunctionReturn(0); 415 } 416 417 int main(int argc, char **argv) { 418 PetscInt ierr; 419 MPI_Comm comm; 420 char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 421 double my_rt_start, my_rt, rt_min, rt_max; 422 PetscInt degree, qextra, localnodes, localelem, melem[3], mnodes[3], p[3], 423 irank[3], lnodes[3], lsize, ncompu = 1, ksp_max_it_clip[2]; 424 PetscScalar *r; 425 PetscBool test_mode, benchmark_mode, write_solution; 426 PetscMPIInt size, rank; 427 PetscLogStage solvestage; 428 Vec X, Xloc, rhs, rhsloc; 429 Mat mat; 430 KSP ksp; 431 VecScatter ltog, ltog0, gtogD; 432 PetscMemType memtype; 433 User user; 434 Ceed ceed; 435 CeedBasis basisx, basisu; 436 CeedElemRestriction Erestrictx, Erestrictu, Erestrictui, Erestrictqdi; 437 CeedQFunction qfsetupgeo, qfsetuprhs, qfapply, qferror; 438 CeedOperator opsetupgeo, opsetuprhs, opapply, operror; 439 CeedVector xcoord, qdata, rhsceed, target; 440 CeedInt P, Q; 441 const CeedInt dim = 3, ncompx = 3; 442 bpType bpchoice; 443 444 ierr = PetscInitialize(&argc, &argv, NULL, help); 445 if (ierr) return ierr; 446 comm = PETSC_COMM_WORLD; 447 448 // Read command line options 449 ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); 450 bpchoice = CEED_BP1; 451 ierr = PetscOptionsEnum("-problem", 452 "CEED benchmark problem to solve", NULL, 453 bpTypes, (PetscEnum)bpchoice, (PetscEnum *)&bpchoice, 454 NULL); CHKERRQ(ierr); 455 ncompu = bpOptions[bpchoice].ncompu; 456 test_mode = PETSC_FALSE; 457 ierr = PetscOptionsBool("-test", 458 "Testing mode (do not print unless error is large)", 459 NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 460 benchmark_mode = PETSC_FALSE; 461 ierr = PetscOptionsBool("-benchmark", 462 "Benchmarking mode (prints benchmark statistics)", 463 NULL, benchmark_mode, &benchmark_mode, NULL); 464 CHKERRQ(ierr); 465 write_solution = PETSC_FALSE; 466 ierr = PetscOptionsBool("-write_solution", 467 "Write solution for visualization", 468 NULL, write_solution, &write_solution, NULL); 469 CHKERRQ(ierr); 470 degree = test_mode ? 3 : 1; 471 ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", 472 NULL, degree, °ree, NULL); CHKERRQ(ierr); 473 qextra = bpOptions[bpchoice].qextra; 474 ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", 475 NULL, qextra, &qextra, NULL); CHKERRQ(ierr); 476 ierr = PetscOptionsString("-ceed", "CEED resource specifier", 477 NULL, ceedresource, ceedresource, 478 sizeof(ceedresource), NULL); CHKERRQ(ierr); 479 localnodes = 1000; 480 ierr = PetscOptionsInt("-local", 481 "Target number of locally owned nodes per process", 482 NULL, localnodes, &localnodes, NULL); CHKERRQ(ierr); 483 PetscInt two = 2; 484 ksp_max_it_clip[0] = 5; 485 ksp_max_it_clip[1] = 20; 486 ierr = PetscOptionsIntArray("-ksp_max_it_clip", 487 "Min and max number of iterations to use during benchmarking", 488 NULL, ksp_max_it_clip, &two, NULL); 489 CHKERRQ(ierr); 490 ierr = PetscOptionsEnd(); CHKERRQ(ierr); 491 P = degree + 1; 492 Q = P + qextra; 493 494 // Set up libCEED 495 CeedInit(ceedresource, &ceed); 496 CeedMemType memtypebackend; 497 CeedGetPreferredMemType(ceed, &memtypebackend); 498 499 VecType default_vectype = NULL, vectype; 500 switch (memtypebackend) { 501 case CEED_MEM_HOST: default_vectype = VECSTANDARD; break; 502 case CEED_MEM_DEVICE: { 503 const char *resolved; 504 CeedGetResource(ceed, &resolved); 505 if (strstr(resolved, "/gpu/cuda")) default_vectype = VECCUDA; 506 else if (strstr(resolved, "/gpu/hip/occa")) 507 default_vectype = VECSTANDARD; // https://github.com/CEED/libCEED/issues/678 508 else if (strstr(resolved, "/gpu/hip")) default_vectype = VECHIP; 509 else default_vectype = VECSTANDARD; 510 } 511 } 512 513 // Determine size of process grid 514 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 515 Split3(size, p, false); 516 517 // Find a nicely composite number of elements no less than localnodes 518 for (localelem = PetscMax(1, localnodes / (degree*degree*degree)); ; 519 localelem++) { 520 Split3(localelem, melem, true); 521 if (Max3(melem) / Min3(melem) <= 2) break; 522 } 523 524 // Find my location in the process grid 525 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 526 for (int d=0,rankleft=rank; d<dim; d++) { 527 const int pstride[3] = {p[1] *p[2], p[2], 1}; 528 irank[d] = rankleft / pstride[d]; 529 rankleft -= irank[d] * pstride[d]; 530 } 531 532 GlobalNodes(p, irank, degree, melem, mnodes); 533 534 // Setup global vector 535 ierr = VecCreate(comm, &X); CHKERRQ(ierr); 536 ierr = VecSetType(X, default_vectype); CHKERRQ(ierr); 537 ierr = VecSetSizes(X, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, PETSC_DECIDE); 538 CHKERRQ(ierr); 539 ierr = VecSetFromOptions(X); CHKERRQ(ierr); 540 ierr = VecSetUp(X); CHKERRQ(ierr); 541 542 // Set up libCEED 543 CeedInit(ceedresource, &ceed); 544 545 // Print summary 546 CeedInt gsize; 547 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 548 if (!test_mode) { 549 const char *usedresource; 550 CeedGetResource(ceed, &usedresource); 551 552 ierr = VecGetType(X, &vectype); CHKERRQ(ierr); 553 554 ierr = PetscPrintf(comm, 555 "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 556 " PETSc:\n" 557 " PETSc Vec Type : %s\n" 558 " libCEED:\n" 559 " libCEED Backend : %s\n" 560 " libCEED Backend MemType : %s\n" 561 " Mesh:\n" 562 " Number of 1D Basis Nodes (P) : %d\n" 563 " Number of 1D Quadrature Points (Q) : %d\n" 564 " Global nodes : %D\n" 565 " Process Decomposition : %D %D %D\n" 566 " Local Elements : %D = %D %D %D\n" 567 " Owned nodes : %D = %D %D %D\n" 568 " DoF per node : %D\n", 569 bpchoice+1, vectype, usedresource, 570 CeedMemTypes[memtypebackend], 571 P, Q, gsize/ncompu, p[0], p[1], p[2], localelem, 572 melem[0], melem[1], melem[2], 573 mnodes[0]*mnodes[1]*mnodes[2], mnodes[0], mnodes[1], 574 mnodes[2], ncompu); CHKERRQ(ierr); 575 } 576 577 { 578 lsize = 1; 579 for (int d=0; d<dim; d++) { 580 lnodes[d] = melem[d]*degree + 1; 581 lsize *= lnodes[d]; 582 } 583 ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr); 584 ierr = VecSetType(Xloc, default_vectype); CHKERRQ(ierr); 585 ierr = VecSetSizes(Xloc, lsize*ncompu, PETSC_DECIDE); CHKERRQ(ierr); 586 ierr = VecSetFromOptions(Xloc); CHKERRQ(ierr); 587 ierr = VecSetUp(Xloc); CHKERRQ(ierr); 588 589 // Create local-to-global scatter 590 PetscInt *ltogind, *ltogind0, *locind, l0count; 591 IS ltogis, ltogis0, locis; 592 PetscInt gstart[2][2][2], gmnodes[2][2][2][dim]; 593 594 for (int i=0; i<2; i++) { 595 for (int j=0; j<2; j++) { 596 for (int k=0; k<2; k++) { 597 PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k}; 598 gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem); 599 GlobalNodes(p, ijkrank, degree, melem, gmnodes[i][j][k]); 600 } 601 } 602 } 603 604 ierr = PetscMalloc1(lsize, <ogind); CHKERRQ(ierr); 605 ierr = PetscMalloc1(lsize, <ogind0); CHKERRQ(ierr); 606 ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr); 607 l0count = 0; 608 for (PetscInt i=0,ir,ii; ir=i>=mnodes[0], ii=i-ir*mnodes[0], i<lnodes[0]; i++) 609 for (PetscInt j=0,jr,jj; jr=j>=mnodes[1], jj=j-jr*mnodes[1], j<lnodes[1]; j++) 610 for (PetscInt k=0,kr,kk; kr=k>=mnodes[2], kk=k-kr*mnodes[2], k<lnodes[2]; k++) { 611 PetscInt here = (i*lnodes[1]+j)*lnodes[2]+k; 612 ltogind[here] = 613 gstart[ir][jr][kr] + (ii*gmnodes[ir][jr][kr][1]+jj)*gmnodes[ir][jr][kr][2]+kk; 614 if ((irank[0] == 0 && i == 0) 615 || (irank[1] == 0 && j == 0) 616 || (irank[2] == 0 && k == 0) 617 || (irank[0]+1 == p[0] && i+1 == lnodes[0]) 618 || (irank[1]+1 == p[1] && j+1 == lnodes[1]) 619 || (irank[2]+1 == p[2] && k+1 == lnodes[2])) 620 continue; 621 ltogind0[l0count] = ltogind[here]; 622 locind[l0count++] = here; 623 } 624 ierr = ISCreateBlock(comm, ncompu, lsize, ltogind, PETSC_OWN_POINTER, 625 <ogis); CHKERRQ(ierr); 626 ierr = VecScatterCreate(Xloc, NULL, X, ltogis, <og); CHKERRQ(ierr); 627 CHKERRQ(ierr); 628 ierr = ISCreateBlock(comm, ncompu, l0count, ltogind0, PETSC_OWN_POINTER, 629 <ogis0); CHKERRQ(ierr); 630 ierr = ISCreateBlock(comm, ncompu, l0count, locind, PETSC_OWN_POINTER, 631 &locis); CHKERRQ(ierr); 632 ierr = VecScatterCreate(Xloc, locis, X, ltogis0, <og0); CHKERRQ(ierr); 633 { 634 // Create global-to-global scatter for Dirichlet values (everything not in 635 // ltogis0, which is the range of ltog0) 636 PetscInt xstart, xend, *indD, countD = 0; 637 IS isD; 638 const PetscScalar *x; 639 ierr = VecZeroEntries(Xloc); CHKERRQ(ierr); 640 ierr = VecSet(X, 1.0); CHKERRQ(ierr); 641 ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 642 CHKERRQ(ierr); 643 ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 644 CHKERRQ(ierr); 645 ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr); 646 ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr); 647 ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr); 648 for (PetscInt i=0; i<xend-xstart; i++) { 649 if (x[i] == 1.) indD[countD++] = xstart + i; 650 } 651 ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr); 652 ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD); 653 CHKERRQ(ierr); 654 ierr = PetscFree(indD); CHKERRQ(ierr); 655 ierr = VecScatterCreate(X, isD, X, isD, >ogD); CHKERRQ(ierr); 656 ierr = ISDestroy(&isD); CHKERRQ(ierr); 657 } 658 ierr = ISDestroy(<ogis); CHKERRQ(ierr); 659 ierr = ISDestroy(<ogis0); CHKERRQ(ierr); 660 ierr = ISDestroy(&locis); CHKERRQ(ierr); 661 } 662 663 // CEED bases 664 CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompu, P, Q, 665 bpOptions[bpchoice].qmode, &basisu); 666 CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, Q, 667 bpOptions[bpchoice].qmode, &basisx); 668 669 // CEED restrictions 670 CreateRestriction(ceed, melem, P, ncompu, &Erestrictu); 671 CreateRestriction(ceed, melem, 2, dim, &Erestrictx); 672 CeedInt nelem = melem[0]*melem[1]*melem[2]; 673 CeedElemRestrictionCreateStrided(ceed, nelem, Q*Q*Q, ncompu, 674 ncompu*nelem*Q*Q*Q, 675 CEED_STRIDES_BACKEND, &Erestrictui); 676 CeedElemRestrictionCreateStrided(ceed, nelem, Q*Q*Q, 677 bpOptions[bpchoice].qdatasize, 678 bpOptions[bpchoice].qdatasize*nelem*Q*Q*Q, 679 CEED_STRIDES_BACKEND, &Erestrictqdi); 680 { 681 CeedScalar *xloc; 682 CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len = 683 shape[0]*shape[1]*shape[2]; 684 xloc = malloc(len*ncompx*sizeof xloc[0]); 685 for (CeedInt i=0; i<shape[0]; i++) { 686 for (CeedInt j=0; j<shape[1]; j++) { 687 for (CeedInt k=0; k<shape[2]; k++) { 688 xloc[dim*((i*shape[1]+j)*shape[2]+k) + 0] = 1.*(irank[0]*melem[0]+i) / 689 (p[0]*melem[0]); 690 xloc[dim*((i*shape[1]+j)*shape[2]+k) + 1] = 1.*(irank[1]*melem[1]+j) / 691 (p[1]*melem[1]); 692 xloc[dim*((i*shape[1]+j)*shape[2]+k) + 2] = 1.*(irank[2]*melem[2]+k) / 693 (p[2]*melem[2]); 694 } 695 } 696 } 697 CeedVectorCreate(ceed, len*ncompx, &xcoord); 698 CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc); 699 } 700 701 // Create the Qfunction that builds the operator quadrature data 702 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].setupgeo, 703 bpOptions[bpchoice].setupgeofname, &qfsetupgeo); 704 CeedQFunctionAddInput(qfsetupgeo, "dx", ncompx*dim, CEED_EVAL_GRAD); 705 CeedQFunctionAddInput(qfsetupgeo, "weight", 1, CEED_EVAL_WEIGHT); 706 CeedQFunctionAddOutput(qfsetupgeo, "qdata", bpOptions[bpchoice].qdatasize, 707 CEED_EVAL_NONE); 708 709 // Create the Qfunction that sets up the RHS and true solution 710 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].setuprhs, 711 bpOptions[bpchoice].setuprhsfname, &qfsetuprhs); 712 CeedQFunctionAddInput(qfsetuprhs, "x", ncompx, CEED_EVAL_INTERP); 713 CeedQFunctionAddInput(qfsetuprhs, "dx", ncompx*dim, CEED_EVAL_GRAD); 714 CeedQFunctionAddInput(qfsetuprhs, "weight", 1, CEED_EVAL_WEIGHT); 715 CeedQFunctionAddOutput(qfsetuprhs, "true_soln", ncompu, CEED_EVAL_NONE); 716 CeedQFunctionAddOutput(qfsetuprhs, "rhs", ncompu, CEED_EVAL_INTERP); 717 718 // Set up PDE operator 719 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].apply, 720 bpOptions[bpchoice].applyfname, &qfapply); 721 // Add inputs and outputs 722 CeedInt inscale = bpOptions[bpchoice].inmode==CEED_EVAL_GRAD ? 3 : 1; 723 CeedInt outscale = bpOptions[bpchoice].outmode==CEED_EVAL_GRAD ? 3 : 1; 724 CeedQFunctionAddInput(qfapply, "u", ncompu*inscale, 725 bpOptions[bpchoice].inmode); 726 CeedQFunctionAddInput(qfapply, "qdata", bpOptions[bpchoice].qdatasize, 727 CEED_EVAL_NONE); 728 CeedQFunctionAddOutput(qfapply, "v", ncompu*outscale, 729 bpOptions[bpchoice].outmode); 730 731 // Create the error qfunction 732 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].error, 733 bpOptions[bpchoice].errorfname, &qferror); 734 CeedQFunctionAddInput(qferror, "u", ncompu, CEED_EVAL_INTERP); 735 CeedQFunctionAddInput(qferror, "true_soln", ncompu, CEED_EVAL_NONE); 736 CeedQFunctionAddOutput(qferror, "error", ncompu, CEED_EVAL_NONE); 737 738 // Create the persistent vectors that will be needed in setup 739 CeedInt nqpts; 740 CeedBasisGetNumQuadraturePoints(basisu, &nqpts); 741 CeedVectorCreate(ceed, bpOptions[bpchoice].qdatasize*nelem*nqpts, &qdata); 742 CeedVectorCreate(ceed, nelem*nqpts*ncompu, &target); 743 CeedVectorCreate(ceed, lsize*ncompu, &rhsceed); 744 745 // Create the operator that builds the quadrature data for the ceed operator 746 CeedOperatorCreate(ceed, qfsetupgeo, CEED_QFUNCTION_NONE, 747 CEED_QFUNCTION_NONE, &opsetupgeo); 748 CeedOperatorSetField(opsetupgeo, "dx", Erestrictx, basisx, 749 CEED_VECTOR_ACTIVE); 750 CeedOperatorSetField(opsetupgeo, "weight", CEED_ELEMRESTRICTION_NONE, basisx, 751 CEED_VECTOR_NONE); 752 CeedOperatorSetField(opsetupgeo, "qdata", Erestrictqdi, 753 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 754 755 // Create the operator that builds the RHS and true solution 756 CeedOperatorCreate(ceed, qfsetuprhs, CEED_QFUNCTION_NONE, 757 CEED_QFUNCTION_NONE, &opsetuprhs); 758 CeedOperatorSetField(opsetuprhs, "x", Erestrictx, basisx, 759 CEED_VECTOR_ACTIVE); 760 CeedOperatorSetField(opsetuprhs, "dx", Erestrictx, basisx, 761 CEED_VECTOR_ACTIVE); 762 CeedOperatorSetField(opsetuprhs, "weight", CEED_ELEMRESTRICTION_NONE, basisx, 763 CEED_VECTOR_NONE); 764 CeedOperatorSetField(opsetuprhs, "true_soln", Erestrictui, 765 CEED_BASIS_COLLOCATED, target); 766 CeedOperatorSetField(opsetuprhs, "rhs", Erestrictu, basisu, 767 CEED_VECTOR_ACTIVE); 768 769 // Create the mass or diff operator 770 CeedOperatorCreate(ceed, qfapply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 771 &opapply); 772 CeedOperatorSetField(opapply, "u", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 773 CeedOperatorSetField(opapply, "qdata", Erestrictqdi, CEED_BASIS_COLLOCATED, 774 qdata); 775 CeedOperatorSetField(opapply, "v", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 776 777 // Create the error operator 778 CeedOperatorCreate(ceed, qferror, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 779 &operror); 780 CeedOperatorSetField(operror, "u", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 781 CeedOperatorSetField(operror, "true_soln", Erestrictui, 782 CEED_BASIS_COLLOCATED, target); 783 CeedOperatorSetField(operror, "error", Erestrictui, CEED_BASIS_COLLOCATED, 784 CEED_VECTOR_ACTIVE); 785 786 // Set up Mat 787 ierr = PetscMalloc1(1, &user); CHKERRQ(ierr); 788 user->comm = comm; 789 user->ltog = ltog; 790 if (bpchoice != CEED_BP1 && bpchoice != CEED_BP2) { 791 user->ltog0 = ltog0; 792 user->gtogD = gtogD; 793 } 794 user->Xloc = Xloc; 795 ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr); 796 CeedVectorCreate(ceed, lsize*ncompu, &user->xceed); 797 CeedVectorCreate(ceed, lsize*ncompu, &user->yceed); 798 user->op = opapply; 799 user->qdata = qdata; 800 user->ceed = ceed; 801 802 ierr = MatCreateShell(comm, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 803 mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 804 PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr); 805 if (bpchoice == CEED_BP1 || bpchoice == CEED_BP2) { 806 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass); 807 CHKERRQ(ierr); 808 } else { 809 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff); 810 CHKERRQ(ierr); 811 } 812 ierr = VecGetType(X, &vectype); CHKERRQ(ierr); 813 ierr = MatShellSetVecType(mat, vectype); CHKERRQ(ierr); 814 815 // Get RHS vector 816 ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr); 817 ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 818 ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 819 ierr = VecGetArrayAndMemType(rhsloc, &r, &memtype); CHKERRQ(ierr); 820 CeedVectorSetArray(rhsceed, MemTypeP2C(memtype), CEED_USE_POINTER, r); 821 822 // Setup qdata, rhs, and target 823 CeedOperatorApply(opsetupgeo, xcoord, qdata, CEED_REQUEST_IMMEDIATE); 824 CeedOperatorApply(opsetuprhs, xcoord, rhsceed, CEED_REQUEST_IMMEDIATE); 825 CeedVectorDestroy(&xcoord); 826 827 // Gather RHS 828 ierr = CeedVectorTakeArray(rhsceed, MemTypeP2C(memtype), NULL); CHKERRQ(ierr); 829 ierr = VecRestoreArrayAndMemType(rhsloc, &r); CHKERRQ(ierr); 830 ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 831 ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 832 CHKERRQ(ierr); 833 ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 834 CHKERRQ(ierr); 835 CeedVectorDestroy(&rhsceed); 836 837 ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); 838 { 839 PC pc; 840 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 841 if (bpchoice == CEED_BP1 || bpchoice == CEED_BP2) { 842 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 843 ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 844 } else { 845 ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 846 } 847 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 848 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 849 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 850 PETSC_DEFAULT); CHKERRQ(ierr); 851 } 852 ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr); 853 // First run's performance log is not considered for benchmarking purposes 854 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 855 CHKERRQ(ierr); 856 my_rt_start = MPI_Wtime(); 857 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 858 my_rt = MPI_Wtime() - my_rt_start; 859 ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm); 860 CHKERRQ(ierr); 861 // Set maxits based on first iteration timing 862 if (my_rt > 0.02) { 863 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 864 ksp_max_it_clip[0]); CHKERRQ(ierr); 865 } else { 866 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 867 ksp_max_it_clip[1]); CHKERRQ(ierr); 868 } 869 ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 870 871 // Timed solve 872 ierr = VecZeroEntries(X); CHKERRQ(ierr); 873 ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); 874 875 // -- Performance logging 876 ierr = PetscLogStageRegister("Solve Stage", &solvestage); CHKERRQ(ierr); 877 ierr = PetscLogStagePush(solvestage); CHKERRQ(ierr); 878 879 // -- Solve 880 my_rt_start = MPI_Wtime(); 881 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 882 my_rt = MPI_Wtime() - my_rt_start; 883 884 // -- Performance logging 885 ierr = PetscLogStagePop(); 886 887 // Output results 888 { 889 KSPType ksptype; 890 KSPConvergedReason reason; 891 PetscReal rnorm; 892 PetscInt its; 893 ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 894 ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 895 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 896 ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 897 if (!test_mode || reason < 0 || rnorm > 1e-9) { 898 ierr = PetscPrintf(comm, 899 " KSP:\n" 900 " KSP Type : %s\n" 901 " KSP Convergence : %s\n" 902 " Total KSP Iterations : %D\n" 903 " Final rnorm : %e\n", 904 ksptype, KSPConvergedReasons[reason], its, 905 (double)rnorm); CHKERRQ(ierr); 906 } 907 if (!test_mode) { 908 ierr = PetscPrintf(comm," Performance:\n"); CHKERRQ(ierr); 909 } 910 { 911 PetscReal maxerror; 912 ierr = ComputeErrorMax(user, operror, X, target, &maxerror); 913 CHKERRQ(ierr); 914 PetscReal tol = 5e-2; 915 if (!test_mode || maxerror > tol) { 916 ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); 917 CHKERRQ(ierr); 918 ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); 919 CHKERRQ(ierr); 920 ierr = PetscPrintf(comm, 921 " Pointwise Error (max) : %e\n" 922 " CG Solve Time : %g (%g) sec\n", 923 (double)maxerror, rt_max, rt_min); CHKERRQ(ierr); 924 } 925 } 926 if (!test_mode) { 927 ierr = PetscPrintf(comm, 928 " DoFs/Sec in CG : %g (%g) million\n", 929 1e-6*gsize*its/rt_max, 930 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 931 } 932 } 933 934 if (write_solution) { 935 PetscViewer vtkviewersoln; 936 937 ierr = PetscViewerCreate(comm, &vtkviewersoln); CHKERRQ(ierr); 938 ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); 939 ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); 940 ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); 941 ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); 942 } 943 944 ierr = VecDestroy(&rhs); CHKERRQ(ierr); 945 ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 946 ierr = VecDestroy(&X); CHKERRQ(ierr); 947 ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr); 948 ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr); 949 ierr = VecScatterDestroy(<og); CHKERRQ(ierr); 950 ierr = VecScatterDestroy(<og0); CHKERRQ(ierr); 951 ierr = VecScatterDestroy(>ogD); CHKERRQ(ierr); 952 ierr = MatDestroy(&mat); CHKERRQ(ierr); 953 ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 954 955 CeedVectorDestroy(&user->xceed); 956 CeedVectorDestroy(&user->yceed); 957 CeedVectorDestroy(&user->qdata); 958 CeedVectorDestroy(&target); 959 CeedOperatorDestroy(&opsetupgeo); 960 CeedOperatorDestroy(&opsetuprhs); 961 CeedOperatorDestroy(&opapply); 962 CeedOperatorDestroy(&operror); 963 CeedElemRestrictionDestroy(&Erestrictu); 964 CeedElemRestrictionDestroy(&Erestrictx); 965 CeedElemRestrictionDestroy(&Erestrictui); 966 CeedElemRestrictionDestroy(&Erestrictqdi); 967 CeedQFunctionDestroy(&qfsetupgeo); 968 CeedQFunctionDestroy(&qfsetuprhs); 969 CeedQFunctionDestroy(&qfapply); 970 CeedQFunctionDestroy(&qferror); 971 CeedBasisDestroy(&basisu); 972 CeedBasisDestroy(&basisx); 973 CeedDestroy(&ceed); 974 ierr = PetscFree(user); CHKERRQ(ierr); 975 return PetscFinalize(); 976 } 977