1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: CEED BPs 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve the 20 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 21 // 22 // The code is intentionally "raw", using only low-level communication 23 // primitives. 24 // 25 // Build with: 26 // 27 // make bpsraw [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 28 // 29 // Sample runs: 30 // 31 // ./bpsraw -problem bp1 32 // ./bpsraw -problem bp2 33 // ./bpsraw -problem bp3 34 // ./bpsraw -problem bp4 35 // ./bpsraw -problem bp5 -ceed /cpu/self 36 // ./bpsraw -problem bp6 -ceed /gpu/cuda 37 // 38 //TESTARGS -ceed {ceed_resource} -test -problem bp2 -degree 5 -qextra 1 -ksp_max_it_clip 15,15 39 40 /// @file 41 /// CEED BPs example using PETSc 42 /// See bps.c for an implementation using DMPlex unstructured grids. 43 const char help[] = "Solve CEED BPs using PETSc\n"; 44 45 #include <stdbool.h> 46 #include <string.h> 47 #include <petscksp.h> 48 #include <ceed.h> 49 #include "qfunctions/bps/common.h" 50 #include "qfunctions/bps/bp1.h" 51 #include "qfunctions/bps/bp2.h" 52 #include "qfunctions/bps/bp3.h" 53 #include "qfunctions/bps/bp4.h" 54 55 #include <petscsys.h> 56 #if PETSC_VERSION_LT(3,12,0) 57 #ifdef PETSC_HAVE_CUDA 58 #include <petsccuda.h> 59 // Note: With PETSc prior to version 3.12.0, providing the source path to 60 // include 'cublas_v2.h' will be needed to use 'petsccuda.h'. 61 #endif 62 #endif 63 64 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 65 for (PetscInt d=0,sizeleft=size; d<3; d++) { 66 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 67 while (try * (sizeleft / try) != sizeleft) try++; 68 m[reverse ? 2-d : d] = try; 69 sizeleft /= try; 70 } 71 } 72 73 static PetscInt Max3(const PetscInt a[3]) { 74 return PetscMax(a[0], PetscMax(a[1], a[2])); 75 } 76 static PetscInt Min3(const PetscInt a[3]) { 77 return PetscMin(a[0], PetscMin(a[1], a[2])); 78 } 79 static void GlobalNodes(const PetscInt p[3], const PetscInt irank[3], 80 PetscInt degree, const PetscInt melem[3], 81 PetscInt mnodes[3]) { 82 for (int d=0; d<3; d++) 83 mnodes[d] = degree*melem[d] + (irank[d] == p[d]-1); 84 } 85 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3], 86 PetscInt degree, const PetscInt melem[3]) { 87 PetscInt start = 0; 88 // Dumb brute-force is easier to read 89 for (PetscInt i=0; i<p[0]; i++) { 90 for (PetscInt j=0; j<p[1]; j++) { 91 for (PetscInt k=0; k<p[2]; k++) { 92 PetscInt mnodes[3], ijkrank[] = {i,j,k}; 93 if (i == irank[0] && j == irank[1] && k == irank[2]) return start; 94 GlobalNodes(p, ijkrank, degree, melem, mnodes); 95 start += mnodes[0] * mnodes[1] * mnodes[2]; 96 } 97 } 98 } 99 return -1; 100 } 101 static int CreateRestriction(Ceed ceed, const CeedInt melem[3], CeedInt P, 102 CeedInt ncomp, CeedElemRestriction *Erestrict) { 103 const PetscInt nelem = melem[0]*melem[1]*melem[2]; 104 PetscInt mnodes[3], *idx, *idxp; 105 106 // Get indicies 107 for (int d=0; d<3; d++) mnodes[d] = melem[d]*(P-1) + 1; 108 idxp = idx = malloc(nelem*P*P*P*sizeof idx[0]); 109 for (CeedInt i=0; i<melem[0]; i++) 110 for (CeedInt j=0; j<melem[1]; j++) 111 for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P) 112 for (CeedInt ii=0; ii<P; ii++) 113 for (CeedInt jj=0; jj<P; jj++) 114 for (CeedInt kk=0; kk<P; kk++) { 115 if (0) { // This is the C-style (i,j,k) ordering that I prefer 116 idxp[(ii*P+jj)*P+kk] = ncomp*(((i*(P-1)+ii)*mnodes[1] 117 + (j*(P-1)+jj))*mnodes[2] 118 + (k*(P-1)+kk)); 119 } else { // (k,j,i) ordering for consistency with MFEM example 120 idxp[ii+P*(jj+P*kk)] = ncomp*(((i*(P-1)+ii)*mnodes[1] 121 + (j*(P-1)+jj))*mnodes[2] 122 + (k*(P-1)+kk)); 123 } 124 } 125 126 // Setup CEED restriction 127 CeedElemRestrictionCreate(ceed, nelem, P*P*P, ncomp, 1, 128 mnodes[0]*mnodes[1]*mnodes[2]*ncomp, 129 CEED_MEM_HOST, CEED_OWN_POINTER, idx, Erestrict); 130 131 PetscFunctionReturn(0); 132 } 133 134 // Data for PETSc 135 typedef struct User_ *User; 136 struct User_ { 137 MPI_Comm comm; 138 VecScatter ltog; // Scatter for all entries 139 VecScatter ltog0; // Skip Dirichlet values 140 VecScatter gtogD; // global-to-global; only Dirichlet values 141 Vec Xloc, Yloc; 142 CeedVector xceed, yceed; 143 CeedOperator op; 144 CeedVector qdata; 145 Ceed ceed; 146 CeedMemType memtype; 147 int (*VecGetArray)(Vec, PetscScalar **); 148 int (*VecGetArrayRead)(Vec, const PetscScalar **); 149 int (*VecRestoreArray)(Vec, PetscScalar **); 150 int (*VecRestoreArrayRead)(Vec, const PetscScalar **); 151 }; 152 153 // MemType Options 154 static const char *const memTypes[] = {"host","device","memType", 155 "CEED_MEM_",0 156 }; 157 158 // BP Options 159 typedef enum { 160 CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, 161 CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 162 } bpType; 163 static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6", 164 "bpType","CEED_BP",0 165 }; 166 167 // BP specific data 168 typedef struct { 169 CeedInt ncompu, qdatasize, qextra; 170 CeedQFunctionUser setupgeo, setuprhs, apply, error; 171 const char *setupgeofname, *setuprhsfname, *applyfname, *errorfname; 172 CeedEvalMode inmode, outmode; 173 CeedQuadMode qmode; 174 } bpData; 175 176 bpData bpOptions[6] = { 177 [CEED_BP1] = { 178 .ncompu = 1, 179 .qdatasize = 1, 180 .qextra = 1, 181 .setupgeo = SetupMassGeo, 182 .setuprhs = SetupMassRhs, 183 .apply = Mass, 184 .error = Error, 185 .setupgeofname = SetupMassGeo_loc, 186 .setuprhsfname = SetupMassRhs_loc, 187 .applyfname = Mass_loc, 188 .errorfname = Error_loc, 189 .inmode = CEED_EVAL_INTERP, 190 .outmode = CEED_EVAL_INTERP, 191 .qmode = CEED_GAUSS 192 }, 193 [CEED_BP2] = { 194 .ncompu = 3, 195 .qdatasize = 1, 196 .qextra = 1, 197 .setupgeo = SetupMassGeo, 198 .setuprhs = SetupMassRhs3, 199 .apply = Mass3, 200 .error = Error3, 201 .setupgeofname = SetupMassGeo_loc, 202 .setuprhsfname = SetupMassRhs3_loc, 203 .applyfname = Mass3_loc, 204 .errorfname = Error3_loc, 205 .inmode = CEED_EVAL_INTERP, 206 .outmode = CEED_EVAL_INTERP, 207 .qmode = CEED_GAUSS 208 }, 209 [CEED_BP3] = { 210 .ncompu = 1, 211 .qdatasize = 6, 212 .qextra = 1, 213 .setupgeo = SetupDiffGeo, 214 .setuprhs = SetupDiffRhs, 215 .apply = Diff, 216 .error = Error, 217 .setupgeofname = SetupDiffGeo_loc, 218 .setuprhsfname = SetupDiffRhs_loc, 219 .applyfname = Diff_loc, 220 .errorfname = Error_loc, 221 .inmode = CEED_EVAL_GRAD, 222 .outmode = CEED_EVAL_GRAD, 223 .qmode = CEED_GAUSS 224 }, 225 [CEED_BP4] = { 226 .ncompu = 3, 227 .qdatasize = 6, 228 .qextra = 1, 229 .setupgeo = SetupDiffGeo, 230 .setuprhs = SetupDiffRhs3, 231 .apply = Diff3, 232 .error = Error3, 233 .setupgeofname = SetupDiffGeo_loc, 234 .setuprhsfname = SetupDiffRhs3_loc, 235 .applyfname = Diff3_loc, 236 .errorfname = Error3_loc, 237 .inmode = CEED_EVAL_GRAD, 238 .outmode = CEED_EVAL_GRAD, 239 .qmode = CEED_GAUSS 240 }, 241 [CEED_BP5] = { 242 .ncompu = 1, 243 .qdatasize = 6, 244 .qextra = 0, 245 .setupgeo = SetupDiffGeo, 246 .setuprhs = SetupDiffRhs, 247 .apply = Diff, 248 .error = Error, 249 .setupgeofname = SetupDiffGeo_loc, 250 .setuprhsfname = SetupDiffRhs_loc, 251 .applyfname = Diff_loc, 252 .errorfname = Error_loc, 253 .inmode = CEED_EVAL_GRAD, 254 .outmode = CEED_EVAL_GRAD, 255 .qmode = CEED_GAUSS_LOBATTO 256 }, 257 [CEED_BP6] = { 258 .ncompu = 3, 259 .qdatasize = 6, 260 .qextra = 0, 261 .setupgeo = SetupDiffGeo, 262 .setuprhs = SetupDiffRhs3, 263 .apply = Diff3, 264 .error = Error3, 265 .setupgeofname = SetupDiffGeo_loc, 266 .setuprhsfname = SetupDiffRhs3_loc, 267 .applyfname = Diff3_loc, 268 .errorfname = Error3_loc, 269 .inmode = CEED_EVAL_GRAD, 270 .outmode = CEED_EVAL_GRAD, 271 .qmode = CEED_GAUSS_LOBATTO 272 } 273 }; 274 275 // This function uses libCEED to compute the action of the mass matrix 276 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) { 277 PetscErrorCode ierr; 278 User user; 279 PetscScalar *x, *y; 280 281 PetscFunctionBeginUser; 282 283 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 284 285 // Global-to-local 286 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 287 SCATTER_REVERSE); CHKERRQ(ierr); 288 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, 289 SCATTER_REVERSE); CHKERRQ(ierr); 290 291 // Setup libCEED vectors 292 ierr = user->VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); 293 CHKERRQ(ierr); 294 ierr = user->VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 295 CeedVectorSetArray(user->xceed, user->memtype, CEED_USE_POINTER, x); 296 CeedVectorSetArray(user->yceed, user->memtype, CEED_USE_POINTER, y); 297 298 // Apply libCEED operator 299 CeedOperatorApply(user->op, user->xceed, user->yceed, 300 CEED_REQUEST_IMMEDIATE); 301 302 // Restore PETSc vectors 303 CeedVectorTakeArray(user->xceed, user->memtype, NULL); 304 CeedVectorTakeArray(user->yceed, user->memtype, NULL); 305 ierr = user->VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); 306 CHKERRQ(ierr); 307 ierr = user->VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 308 309 // Local-to-global 310 if (Y) { 311 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 312 ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, 313 SCATTER_FORWARD); CHKERRQ(ierr); 314 ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, 315 SCATTER_FORWARD); CHKERRQ(ierr); 316 } 317 PetscFunctionReturn(0); 318 } 319 320 // This function uses libCEED to compute the action of the Laplacian with 321 // Dirichlet boundary conditions 322 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { 323 PetscErrorCode ierr; 324 User user; 325 PetscScalar *x, *y; 326 327 PetscFunctionBeginUser; 328 329 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 330 331 // Global-to-local 332 ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES, 333 SCATTER_REVERSE); CHKERRQ(ierr); 334 ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES, 335 SCATTER_REVERSE); 336 CHKERRQ(ierr); 337 338 // Setup libCEED vectors 339 ierr = user->VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); 340 CHKERRQ(ierr); 341 ierr = user->VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 342 CeedVectorSetArray(user->xceed, user->memtype, CEED_USE_POINTER, x); 343 CeedVectorSetArray(user->yceed, user->memtype, CEED_USE_POINTER, y); 344 345 // Apply libCEED operator 346 CeedOperatorApply(user->op, user->xceed, user->yceed, 347 CEED_REQUEST_IMMEDIATE); 348 349 // Restore PETSc vectors 350 CeedVectorTakeArray(user->xceed, user->memtype, NULL); 351 CeedVectorTakeArray(user->yceed, user->memtype, NULL); 352 ierr = user->VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); 353 CHKERRQ(ierr); 354 ierr = user->VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 355 356 // Local-to-global 357 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 358 ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 359 CHKERRQ(ierr); 360 ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 361 CHKERRQ(ierr); 362 ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, 363 SCATTER_FORWARD); CHKERRQ(ierr); 364 ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 365 CHKERRQ(ierr); 366 367 PetscFunctionReturn(0); 368 } 369 370 // This function calculates the error in the final solution 371 static PetscErrorCode ComputeErrorMax(User user, CeedOperator operror, Vec X, 372 CeedVector target, PetscReal *maxerror) { 373 PetscErrorCode ierr; 374 PetscScalar *x; 375 CeedVector collocated_error; 376 CeedInt length; 377 378 PetscFunctionBeginUser; 379 380 CeedVectorGetLength(target, &length); 381 CeedVectorCreate(user->ceed, length, &collocated_error); 382 383 // Global-to-local 384 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 385 SCATTER_REVERSE); CHKERRQ(ierr); 386 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, 387 SCATTER_REVERSE); CHKERRQ(ierr); 388 389 // Setup libCEED vector 390 ierr = user->VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); 391 CHKERRQ(ierr); 392 CeedVectorSetArray(user->xceed, user->memtype, CEED_USE_POINTER, x); 393 394 // Apply libCEED operator 395 CeedOperatorApply(operror, user->xceed, collocated_error, 396 CEED_REQUEST_IMMEDIATE); 397 398 // Restore PETSc vector 399 ierr = user->VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); 400 CHKERRQ(ierr); 401 402 // Reduce max error 403 *maxerror = 0; 404 const CeedScalar *e; 405 CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); 406 for (CeedInt i=0; i<length; i++) { 407 *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i])); 408 } 409 CeedVectorRestoreArrayRead(collocated_error, &e); 410 ierr = MPI_Allreduce(MPI_IN_PLACE, maxerror, 1, MPIU_REAL, MPIU_MAX, 411 user->comm); CHKERRQ(ierr); 412 413 // Cleanup 414 CeedVectorDestroy(&collocated_error); 415 416 PetscFunctionReturn(0); 417 } 418 419 int main(int argc, char **argv) { 420 PetscInt ierr; 421 MPI_Comm comm; 422 char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 423 double my_rt_start, my_rt, rt_min, rt_max; 424 PetscInt degree, qextra, localnodes, localelem, melem[3], mnodes[3], p[3], 425 irank[3], lnodes[3], lsize, ncompu = 1, ksp_max_it_clip[2]; 426 PetscScalar *r; 427 PetscBool test_mode, benchmark_mode, write_solution; 428 PetscMPIInt size, rank; 429 PetscLogStage solvestage; 430 Vec X, Xloc, rhs, rhsloc; 431 Mat mat; 432 KSP ksp; 433 VecScatter ltog, ltog0, gtogD; 434 User user; 435 Ceed ceed; 436 CeedBasis basisx, basisu; 437 CeedElemRestriction Erestrictx, Erestrictu, Erestrictui, Erestrictqdi; 438 CeedQFunction qfsetupgeo, qfsetuprhs, qfapply, qferror; 439 CeedOperator opsetupgeo, opsetuprhs, opapply, operror; 440 CeedVector xcoord, qdata, rhsceed, target; 441 CeedMemType memtyperequested; 442 CeedInt P, Q; 443 const CeedInt dim = 3, ncompx = 3; 444 bpType bpchoice; 445 446 // Check for PETSc CUDA availability 447 PetscBool petschavecuda, setmemtyperequest = PETSC_FALSE; 448 // *INDENT-OFF* 449 #ifdef PETSC_HAVE_CUDA 450 petschavecuda = PETSC_TRUE; 451 #else 452 petschavecuda = PETSC_FALSE; 453 #endif 454 // *INDENT-ON* 455 456 ierr = PetscInitialize(&argc, &argv, NULL, help); 457 if (ierr) return ierr; 458 comm = PETSC_COMM_WORLD; 459 460 // Read command line options 461 ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); 462 bpchoice = CEED_BP1; 463 ierr = PetscOptionsEnum("-problem", 464 "CEED benchmark problem to solve", NULL, 465 bpTypes, (PetscEnum)bpchoice, (PetscEnum *)&bpchoice, 466 NULL); CHKERRQ(ierr); 467 ncompu = bpOptions[bpchoice].ncompu; 468 test_mode = PETSC_FALSE; 469 ierr = PetscOptionsBool("-test", 470 "Testing mode (do not print unless error is large)", 471 NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 472 benchmark_mode = PETSC_FALSE; 473 ierr = PetscOptionsBool("-benchmark", 474 "Benchmarking mode (prints benchmark statistics)", 475 NULL, benchmark_mode, &benchmark_mode, NULL); 476 CHKERRQ(ierr); 477 write_solution = PETSC_FALSE; 478 ierr = PetscOptionsBool("-write_solution", 479 "Write solution for visualization", 480 NULL, write_solution, &write_solution, NULL); 481 CHKERRQ(ierr); 482 degree = test_mode ? 3 : 1; 483 ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", 484 NULL, degree, °ree, NULL); CHKERRQ(ierr); 485 qextra = bpOptions[bpchoice].qextra; 486 ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", 487 NULL, qextra, &qextra, NULL); CHKERRQ(ierr); 488 ierr = PetscOptionsString("-ceed", "CEED resource specifier", 489 NULL, ceedresource, ceedresource, 490 sizeof(ceedresource), NULL); CHKERRQ(ierr); 491 localnodes = 1000; 492 ierr = PetscOptionsInt("-local", 493 "Target number of locally owned nodes per process", 494 NULL, localnodes, &localnodes, NULL); CHKERRQ(ierr); 495 PetscInt two = 2; 496 ksp_max_it_clip[0] = 5; 497 ksp_max_it_clip[1] = 20; 498 ierr = PetscOptionsIntArray("-ksp_max_it_clip", 499 "Min and max number of iterations to use during benchmarking", 500 NULL, ksp_max_it_clip, &two, NULL); 501 CHKERRQ(ierr); 502 memtyperequested = petschavecuda ? CEED_MEM_DEVICE : CEED_MEM_HOST; 503 ierr = PetscOptionsEnum("-memtype", 504 "CEED MemType requested", NULL, 505 memTypes, (PetscEnum)memtyperequested, 506 (PetscEnum *)&memtyperequested, &setmemtyperequest); 507 CHKERRQ(ierr); 508 ierr = PetscOptionsEnd(); CHKERRQ(ierr); 509 P = degree + 1; 510 Q = P + qextra; 511 512 // Set up libCEED 513 CeedInit(ceedresource, &ceed); 514 CeedMemType memtypebackend; 515 CeedGetPreferredMemType(ceed, &memtypebackend); 516 517 // Check memtype compatibility 518 if (!setmemtyperequest) 519 memtyperequested = memtypebackend; 520 else if (!petschavecuda && memtyperequested == CEED_MEM_DEVICE) 521 SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_SUP_SYS, 522 "PETSc was not built with CUDA. " 523 "Requested MemType CEED_MEM_DEVICE is not supported.", NULL); 524 525 // Determine size of process grid 526 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 527 Split3(size, p, false); 528 529 // Find a nicely composite number of elements no less than localnodes 530 for (localelem = PetscMax(1, localnodes / (degree*degree*degree)); ; 531 localelem++) { 532 Split3(localelem, melem, true); 533 if (Max3(melem) / Min3(melem) <= 2) break; 534 } 535 536 // Find my location in the process grid 537 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 538 for (int d=0,rankleft=rank; d<dim; d++) { 539 const int pstride[3] = {p[1] *p[2], p[2], 1}; 540 irank[d] = rankleft / pstride[d]; 541 rankleft -= irank[d] * pstride[d]; 542 } 543 544 GlobalNodes(p, irank, degree, melem, mnodes); 545 546 // Setup global vector 547 ierr = VecCreate(comm, &X); CHKERRQ(ierr); 548 if (memtyperequested == CEED_MEM_DEVICE) { 549 ierr = VecSetType(X, VECCUDA); CHKERRQ(ierr); 550 } 551 ierr = VecSetSizes(X, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, PETSC_DECIDE); 552 CHKERRQ(ierr); 553 ierr = VecSetUp(X); CHKERRQ(ierr); 554 555 // Set up libCEED 556 CeedInit(ceedresource, &ceed); 557 558 // Print summary 559 CeedInt gsize; 560 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 561 if (!test_mode) { 562 const char *usedresource; 563 CeedGetResource(ceed, &usedresource); 564 565 VecType vectype; 566 ierr = VecGetType(X, &vectype); CHKERRQ(ierr); 567 568 ierr = PetscPrintf(comm, 569 "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 570 " PETSc:\n" 571 " PETSc Vec Type : %s\n" 572 " libCEED:\n" 573 " libCEED Backend : %s\n" 574 " libCEED Backend MemType : %s\n" 575 " libCEED User Requested MemType : %s\n" 576 " Mesh:\n" 577 " Number of 1D Basis Nodes (P) : %d\n" 578 " Number of 1D Quadrature Points (Q) : %d\n" 579 " Global nodes : %D\n" 580 " Process Decomposition : %D %D %D\n" 581 " Local Elements : %D = %D %D %D\n" 582 " Owned nodes : %D = %D %D %D\n" 583 " DoF per node : %D\n", 584 bpchoice+1, vectype, usedresource, 585 CeedMemTypes[memtypebackend], 586 (setmemtyperequest) ? 587 CeedMemTypes[memtyperequested] : "none", 588 P, Q, gsize/ncompu, p[0], p[1], p[2], localelem, 589 melem[0], melem[1], melem[2], 590 mnodes[0]*mnodes[1]*mnodes[2], mnodes[0], mnodes[1], 591 mnodes[2], ncompu); CHKERRQ(ierr); 592 } 593 594 { 595 lsize = 1; 596 for (int d=0; d<dim; d++) { 597 lnodes[d] = melem[d]*degree + 1; 598 lsize *= lnodes[d]; 599 } 600 ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr); 601 if (memtyperequested == CEED_MEM_DEVICE) { 602 ierr = VecSetType(Xloc, VECCUDA); CHKERRQ(ierr); 603 } 604 ierr = VecSetSizes(Xloc, lsize*ncompu, PETSC_DECIDE); CHKERRQ(ierr); 605 ierr = VecSetUp(Xloc); CHKERRQ(ierr); 606 607 // Create local-to-global scatter 608 PetscInt *ltogind, *ltogind0, *locind, l0count; 609 IS ltogis, ltogis0, locis; 610 PetscInt gstart[2][2][2], gmnodes[2][2][2][dim]; 611 612 for (int i=0; i<2; i++) { 613 for (int j=0; j<2; j++) { 614 for (int k=0; k<2; k++) { 615 PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k}; 616 gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem); 617 GlobalNodes(p, ijkrank, degree, melem, gmnodes[i][j][k]); 618 } 619 } 620 } 621 622 ierr = PetscMalloc1(lsize, <ogind); CHKERRQ(ierr); 623 ierr = PetscMalloc1(lsize, <ogind0); CHKERRQ(ierr); 624 ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr); 625 l0count = 0; 626 for (PetscInt i=0,ir,ii; ir=i>=mnodes[0], ii=i-ir*mnodes[0], i<lnodes[0]; i++) 627 for (PetscInt j=0,jr,jj; jr=j>=mnodes[1], jj=j-jr*mnodes[1], j<lnodes[1]; j++) 628 for (PetscInt k=0,kr,kk; kr=k>=mnodes[2], kk=k-kr*mnodes[2], k<lnodes[2]; k++) { 629 PetscInt here = (i*lnodes[1]+j)*lnodes[2]+k; 630 ltogind[here] = 631 gstart[ir][jr][kr] + (ii*gmnodes[ir][jr][kr][1]+jj)*gmnodes[ir][jr][kr][2]+kk; 632 if ((irank[0] == 0 && i == 0) 633 || (irank[1] == 0 && j == 0) 634 || (irank[2] == 0 && k == 0) 635 || (irank[0]+1 == p[0] && i+1 == lnodes[0]) 636 || (irank[1]+1 == p[1] && j+1 == lnodes[1]) 637 || (irank[2]+1 == p[2] && k+1 == lnodes[2])) 638 continue; 639 ltogind0[l0count] = ltogind[here]; 640 locind[l0count++] = here; 641 } 642 ierr = ISCreateBlock(comm, ncompu, lsize, ltogind, PETSC_OWN_POINTER, 643 <ogis); CHKERRQ(ierr); 644 ierr = VecScatterCreate(Xloc, NULL, X, ltogis, <og); CHKERRQ(ierr); 645 CHKERRQ(ierr); 646 ierr = ISCreateBlock(comm, ncompu, l0count, ltogind0, PETSC_OWN_POINTER, 647 <ogis0); CHKERRQ(ierr); 648 ierr = ISCreateBlock(comm, ncompu, l0count, locind, PETSC_OWN_POINTER, 649 &locis); CHKERRQ(ierr); 650 ierr = VecScatterCreate(Xloc, locis, X, ltogis0, <og0); CHKERRQ(ierr); 651 { 652 // Create global-to-global scatter for Dirichlet values (everything not in 653 // ltogis0, which is the range of ltog0) 654 PetscInt xstart, xend, *indD, countD = 0; 655 IS isD; 656 const PetscScalar *x; 657 ierr = VecZeroEntries(Xloc); CHKERRQ(ierr); 658 ierr = VecSet(X, 1.0); CHKERRQ(ierr); 659 ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 660 CHKERRQ(ierr); 661 ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 662 CHKERRQ(ierr); 663 ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr); 664 ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr); 665 ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr); 666 for (PetscInt i=0; i<xend-xstart; i++) { 667 if (x[i] == 1.) indD[countD++] = xstart + i; 668 } 669 ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr); 670 ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD); 671 CHKERRQ(ierr); 672 ierr = PetscFree(indD); CHKERRQ(ierr); 673 ierr = VecScatterCreate(X, isD, X, isD, >ogD); CHKERRQ(ierr); 674 ierr = ISDestroy(&isD); CHKERRQ(ierr); 675 } 676 ierr = ISDestroy(<ogis); CHKERRQ(ierr); 677 ierr = ISDestroy(<ogis0); CHKERRQ(ierr); 678 ierr = ISDestroy(&locis); CHKERRQ(ierr); 679 } 680 681 // CEED bases 682 CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompu, P, Q, 683 bpOptions[bpchoice].qmode, &basisu); 684 CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, Q, 685 bpOptions[bpchoice].qmode, &basisx); 686 687 // CEED restrictions 688 CreateRestriction(ceed, melem, P, ncompu, &Erestrictu); 689 CreateRestriction(ceed, melem, 2, dim, &Erestrictx); 690 CeedInt nelem = melem[0]*melem[1]*melem[2]; 691 CeedElemRestrictionCreateStrided(ceed, nelem, Q*Q*Q, ncompu, 692 ncompu*nelem*Q*Q*Q, 693 CEED_STRIDES_BACKEND, &Erestrictui); 694 CeedElemRestrictionCreateStrided(ceed, nelem, Q*Q*Q, 695 bpOptions[bpchoice].qdatasize, 696 bpOptions[bpchoice].qdatasize*nelem*Q*Q*Q, 697 CEED_STRIDES_BACKEND, &Erestrictqdi); 698 { 699 CeedScalar *xloc; 700 CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len = 701 shape[0]*shape[1]*shape[2]; 702 xloc = malloc(len*ncompx*sizeof xloc[0]); 703 for (CeedInt i=0; i<shape[0]; i++) { 704 for (CeedInt j=0; j<shape[1]; j++) { 705 for (CeedInt k=0; k<shape[2]; k++) { 706 xloc[dim*((i*shape[1]+j)*shape[2]+k) + 0] = 1.*(irank[0]*melem[0]+i) / 707 (p[0]*melem[0]); 708 xloc[dim*((i*shape[1]+j)*shape[2]+k) + 1] = 1.*(irank[1]*melem[1]+j) / 709 (p[1]*melem[1]); 710 xloc[dim*((i*shape[1]+j)*shape[2]+k) + 2] = 1.*(irank[2]*melem[2]+k) / 711 (p[2]*melem[2]); 712 } 713 } 714 } 715 CeedVectorCreate(ceed, len*ncompx, &xcoord); 716 CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc); 717 } 718 719 // Create the Qfunction that builds the operator quadrature data 720 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].setupgeo, 721 bpOptions[bpchoice].setupgeofname, &qfsetupgeo); 722 CeedQFunctionAddInput(qfsetupgeo, "dx", ncompx*dim, CEED_EVAL_GRAD); 723 CeedQFunctionAddInput(qfsetupgeo, "weight", 1, CEED_EVAL_WEIGHT); 724 CeedQFunctionAddOutput(qfsetupgeo, "qdata", bpOptions[bpchoice].qdatasize, 725 CEED_EVAL_NONE); 726 727 // Create the Qfunction that sets up the RHS and true solution 728 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].setuprhs, 729 bpOptions[bpchoice].setuprhsfname, &qfsetuprhs); 730 CeedQFunctionAddInput(qfsetuprhs, "x", ncompx, CEED_EVAL_INTERP); 731 CeedQFunctionAddInput(qfsetuprhs, "dx", ncompx*dim, CEED_EVAL_GRAD); 732 CeedQFunctionAddInput(qfsetuprhs, "weight", 1, CEED_EVAL_WEIGHT); 733 CeedQFunctionAddOutput(qfsetuprhs, "true_soln", ncompu, CEED_EVAL_NONE); 734 CeedQFunctionAddOutput(qfsetuprhs, "rhs", ncompu, CEED_EVAL_INTERP); 735 736 // Set up PDE operator 737 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].apply, 738 bpOptions[bpchoice].applyfname, &qfapply); 739 // Add inputs and outputs 740 CeedInt inscale = bpOptions[bpchoice].inmode==CEED_EVAL_GRAD ? 3 : 1; 741 CeedInt outscale = bpOptions[bpchoice].outmode==CEED_EVAL_GRAD ? 3 : 1; 742 CeedQFunctionAddInput(qfapply, "u", ncompu*inscale, 743 bpOptions[bpchoice].inmode); 744 CeedQFunctionAddInput(qfapply, "qdata", bpOptions[bpchoice].qdatasize, 745 CEED_EVAL_NONE); 746 CeedQFunctionAddOutput(qfapply, "v", ncompu*outscale, 747 bpOptions[bpchoice].outmode); 748 749 // Create the error qfunction 750 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpchoice].error, 751 bpOptions[bpchoice].errorfname, &qferror); 752 CeedQFunctionAddInput(qferror, "u", ncompu, CEED_EVAL_INTERP); 753 CeedQFunctionAddInput(qferror, "true_soln", ncompu, CEED_EVAL_NONE); 754 CeedQFunctionAddOutput(qferror, "error", ncompu, CEED_EVAL_NONE); 755 756 // Create the persistent vectors that will be needed in setup 757 CeedInt nqpts; 758 CeedBasisGetNumQuadraturePoints(basisu, &nqpts); 759 CeedVectorCreate(ceed, bpOptions[bpchoice].qdatasize*nelem*nqpts, &qdata); 760 CeedVectorCreate(ceed, nelem*nqpts*ncompu, &target); 761 CeedVectorCreate(ceed, lsize*ncompu, &rhsceed); 762 763 // Create the operator that builds the quadrature data for the ceed operator 764 CeedOperatorCreate(ceed, qfsetupgeo, CEED_QFUNCTION_NONE, 765 CEED_QFUNCTION_NONE, &opsetupgeo); 766 CeedOperatorSetField(opsetupgeo, "dx", Erestrictx, basisx, 767 CEED_VECTOR_ACTIVE); 768 CeedOperatorSetField(opsetupgeo, "weight", CEED_ELEMRESTRICTION_NONE, basisx, 769 CEED_VECTOR_NONE); 770 CeedOperatorSetField(opsetupgeo, "qdata", Erestrictqdi, 771 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 772 773 // Create the operator that builds the RHS and true solution 774 CeedOperatorCreate(ceed, qfsetuprhs, CEED_QFUNCTION_NONE, 775 CEED_QFUNCTION_NONE, &opsetuprhs); 776 CeedOperatorSetField(opsetuprhs, "x", Erestrictx, basisx, 777 CEED_VECTOR_ACTIVE); 778 CeedOperatorSetField(opsetuprhs, "dx", Erestrictx, basisx, 779 CEED_VECTOR_ACTIVE); 780 CeedOperatorSetField(opsetuprhs, "weight", CEED_ELEMRESTRICTION_NONE, basisx, 781 CEED_VECTOR_NONE); 782 CeedOperatorSetField(opsetuprhs, "true_soln", Erestrictui, 783 CEED_BASIS_COLLOCATED, target); 784 CeedOperatorSetField(opsetuprhs, "rhs", Erestrictu, basisu, 785 CEED_VECTOR_ACTIVE); 786 787 // Create the mass or diff operator 788 CeedOperatorCreate(ceed, qfapply, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 789 &opapply); 790 CeedOperatorSetField(opapply, "u", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 791 CeedOperatorSetField(opapply, "qdata", Erestrictqdi, CEED_BASIS_COLLOCATED, 792 qdata); 793 CeedOperatorSetField(opapply, "v", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 794 795 // Create the error operator 796 CeedOperatorCreate(ceed, qferror, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, 797 &operror); 798 CeedOperatorSetField(operror, "u", Erestrictu, basisu, CEED_VECTOR_ACTIVE); 799 CeedOperatorSetField(operror, "true_soln", Erestrictui, 800 CEED_BASIS_COLLOCATED, target); 801 CeedOperatorSetField(operror, "error", Erestrictui, CEED_BASIS_COLLOCATED, 802 CEED_VECTOR_ACTIVE); 803 804 // Set up Mat 805 ierr = PetscMalloc1(1, &user); CHKERRQ(ierr); 806 user->comm = comm; 807 user->ltog = ltog; 808 if (bpchoice != CEED_BP1 && bpchoice != CEED_BP2) { 809 user->ltog0 = ltog0; 810 user->gtogD = gtogD; 811 } 812 user->Xloc = Xloc; 813 ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr); 814 CeedVectorCreate(ceed, lsize*ncompu, &user->xceed); 815 CeedVectorCreate(ceed, lsize*ncompu, &user->yceed); 816 user->op = opapply; 817 user->qdata = qdata; 818 user->ceed = ceed; 819 user->memtype = memtyperequested; 820 if (memtyperequested == CEED_MEM_HOST) { 821 user->VecGetArray = VecGetArray; 822 user->VecGetArrayRead = VecGetArrayRead; 823 user->VecRestoreArray = VecRestoreArray; 824 user->VecRestoreArrayRead = VecRestoreArrayRead; 825 } else { 826 user->VecGetArray = VecCUDAGetArray; 827 user->VecGetArrayRead = VecCUDAGetArrayRead; 828 user->VecRestoreArray = VecCUDARestoreArray; 829 user->VecRestoreArrayRead = VecCUDARestoreArrayRead; 830 } 831 832 ierr = MatCreateShell(comm, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 833 mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 834 PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr); 835 if (bpchoice == CEED_BP1 || bpchoice == CEED_BP2) { 836 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass); 837 CHKERRQ(ierr); 838 } else { 839 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff); 840 CHKERRQ(ierr); 841 } 842 if (user->memtype == CEED_MEM_DEVICE) { 843 ierr = MatShellSetVecType(mat, VECCUDA); CHKERRQ(ierr); 844 } 845 846 // Get RHS vector 847 ierr = VecDuplicate(X, &rhs); CHKERRQ(ierr); 848 ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 849 ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 850 ierr = user->VecGetArray(rhsloc, &r); CHKERRQ(ierr); 851 CeedVectorSetArray(rhsceed, user->memtype, CEED_USE_POINTER, r); 852 853 // Setup qdata, rhs, and target 854 CeedOperatorApply(opsetupgeo, xcoord, qdata, CEED_REQUEST_IMMEDIATE); 855 CeedOperatorApply(opsetuprhs, xcoord, rhsceed, CEED_REQUEST_IMMEDIATE); 856 CeedVectorDestroy(&xcoord); 857 858 // Gather RHS 859 ierr = CeedVectorTakeArray(rhsceed, user->memtype, NULL); CHKERRQ(ierr); 860 ierr = user->VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 861 ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 862 ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 863 CHKERRQ(ierr); 864 ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 865 CHKERRQ(ierr); 866 CeedVectorDestroy(&rhsceed); 867 868 ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); 869 { 870 PC pc; 871 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 872 if (bpchoice == CEED_BP1 || bpchoice == CEED_BP2) { 873 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 874 ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 875 } else { 876 ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 877 } 878 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 879 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 880 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 881 PETSC_DEFAULT); CHKERRQ(ierr); 882 } 883 ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr); 884 // First run's performance log is not considered for benchmarking purposes 885 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 886 CHKERRQ(ierr); 887 my_rt_start = MPI_Wtime(); 888 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 889 my_rt = MPI_Wtime() - my_rt_start; 890 ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm); 891 CHKERRQ(ierr); 892 // Set maxits based on first iteration timing 893 if (my_rt > 0.02) { 894 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 895 ksp_max_it_clip[0]); CHKERRQ(ierr); 896 } else { 897 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 898 ksp_max_it_clip[1]); CHKERRQ(ierr); 899 } 900 ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 901 902 // Timed solve 903 ierr = VecZeroEntries(X); CHKERRQ(ierr); 904 ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); 905 906 // -- Performance logging 907 ierr = PetscLogStageRegister("Solve Stage", &solvestage); CHKERRQ(ierr); 908 ierr = PetscLogStagePush(solvestage); CHKERRQ(ierr); 909 910 // -- Solve 911 my_rt_start = MPI_Wtime(); 912 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 913 my_rt = MPI_Wtime() - my_rt_start; 914 915 // -- Performance logging 916 ierr = PetscLogStagePop(); 917 918 // Output results 919 { 920 KSPType ksptype; 921 KSPConvergedReason reason; 922 PetscReal rnorm; 923 PetscInt its; 924 ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 925 ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 926 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 927 ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 928 if (!test_mode || reason < 0 || rnorm > 1e-9) { 929 ierr = PetscPrintf(comm, 930 " KSP:\n" 931 " KSP Type : %s\n" 932 " KSP Convergence : %s\n" 933 " Total KSP Iterations : %D\n" 934 " Final rnorm : %e\n", 935 ksptype, KSPConvergedReasons[reason], its, 936 (double)rnorm); CHKERRQ(ierr); 937 } 938 if (!test_mode) { 939 ierr = PetscPrintf(comm," Performance:\n"); CHKERRQ(ierr); 940 } 941 { 942 PetscReal maxerror; 943 ierr = ComputeErrorMax(user, operror, X, target, &maxerror); 944 CHKERRQ(ierr); 945 PetscReal tol = 5e-2; 946 if (!test_mode || maxerror > tol) { 947 ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); 948 CHKERRQ(ierr); 949 ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); 950 CHKERRQ(ierr); 951 ierr = PetscPrintf(comm, 952 " Pointwise Error (max) : %e\n" 953 " CG Solve Time : %g (%g) sec\n", 954 (double)maxerror, rt_max, rt_min); CHKERRQ(ierr); 955 } 956 } 957 if (!test_mode) { 958 ierr = PetscPrintf(comm, 959 " DoFs/Sec in CG : %g (%g) million\n", 960 1e-6*gsize*its/rt_max, 961 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 962 } 963 } 964 965 if (write_solution) { 966 PetscViewer vtkviewersoln; 967 968 ierr = PetscViewerCreate(comm, &vtkviewersoln); CHKERRQ(ierr); 969 ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); 970 ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); 971 ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); 972 ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); 973 } 974 975 ierr = VecDestroy(&rhs); CHKERRQ(ierr); 976 ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 977 ierr = VecDestroy(&X); CHKERRQ(ierr); 978 ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr); 979 ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr); 980 ierr = VecScatterDestroy(<og); CHKERRQ(ierr); 981 ierr = VecScatterDestroy(<og0); CHKERRQ(ierr); 982 ierr = VecScatterDestroy(>ogD); CHKERRQ(ierr); 983 ierr = MatDestroy(&mat); CHKERRQ(ierr); 984 ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 985 986 CeedVectorDestroy(&user->xceed); 987 CeedVectorDestroy(&user->yceed); 988 CeedVectorDestroy(&user->qdata); 989 CeedVectorDestroy(&target); 990 CeedOperatorDestroy(&opsetupgeo); 991 CeedOperatorDestroy(&opsetuprhs); 992 CeedOperatorDestroy(&opapply); 993 CeedOperatorDestroy(&operror); 994 CeedElemRestrictionDestroy(&Erestrictu); 995 CeedElemRestrictionDestroy(&Erestrictx); 996 CeedElemRestrictionDestroy(&Erestrictui); 997 CeedElemRestrictionDestroy(&Erestrictqdi); 998 CeedQFunctionDestroy(&qfsetupgeo); 999 CeedQFunctionDestroy(&qfsetuprhs); 1000 CeedQFunctionDestroy(&qfapply); 1001 CeedQFunctionDestroy(&qferror); 1002 CeedBasisDestroy(&basisu); 1003 CeedBasisDestroy(&basisx); 1004 CeedDestroy(&ceed); 1005 ierr = PetscFree(user); CHKERRQ(ierr); 1006 return PetscFinalize(); 1007 } 1008