1 // Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2 // the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3 // reserved. See files LICENSE and NOTICE for details. 4 // 5 // This file is part of CEED, a collection of benchmarks, miniapps, software 6 // libraries and APIs for efficient high-order finite element and spectral 7 // element discretizations for exascale applications. For more information and 8 // source code availability see http://github.com/ceed. 9 // 10 // The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 11 // a collaborative effort of two U.S. Department of Energy organizations (Office 12 // of Science and the National Nuclear Security Administration) responsible for 13 // the planning and preparation of a capable exascale ecosystem, including 14 // software, applications, hardware, advanced system engineering and early 15 // testbed platforms, in support of the nation's exascale computing imperative. 16 17 // libCEED + PETSc Example: CEED BPs 18 // 19 // This example demonstrates a simple usage of libCEED with PETSc to solve the 20 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 21 // 22 // The code is intentionally "raw", using only low-level communication 23 // primitives. 24 // 25 // Build with: 26 // 27 // make bps [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 28 // 29 // Sample runs: 30 // 31 // bps -problem bp1 32 // bps -problem bp2 -ceed /cpu/self 33 // bps -problem bp3 -ceed /gpu/occa 34 // bps -problem bp4 -ceed /cpu/occa 35 // bps -problem bp5 -ceed /omp/occa 36 // bps -problem bp6 -ceed /ocl/occa 37 // 38 //TESTARGS -ceed {ceed_resource} -test -problem bp2 39 40 /// @file 41 /// CEED BPs example using PETSc 42 /// See bpsdmplex.c for an implementation using DMPlex unstructured grids. 43 const char help[] = "Solve CEED BPs using PETSc\n"; 44 45 #include <stdbool.h> 46 #include <string.h> 47 #include <petscksp.h> 48 #include <ceed.h> 49 #include "common.h" 50 #include "bp1.h" 51 #include "bp2.h" 52 #include "bp3.h" 53 #include "bp4.h" 54 55 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 56 for (PetscInt d=0,sizeleft=size; d<3; d++) { 57 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 58 while (try * (sizeleft / try) != sizeleft) try++; 59 m[reverse ? 2-d : d] = try; 60 sizeleft /= try; 61 } 62 } 63 64 static PetscInt Max3(const PetscInt a[3]) { 65 return PetscMax(a[0], PetscMax(a[1], a[2])); 66 } 67 static PetscInt Min3(const PetscInt a[3]) { 68 return PetscMin(a[0], PetscMin(a[1], a[2])); 69 } 70 static void GlobalNodes(const PetscInt p[3], const PetscInt irank[3], 71 PetscInt degree, const PetscInt melem[3], 72 PetscInt mnodes[3]) { 73 for (int d=0; d<3; d++) 74 mnodes[d] = degree*melem[d] + (irank[d] == p[d]-1); 75 } 76 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3], 77 PetscInt degree, const PetscInt melem[3]) { 78 PetscInt start = 0; 79 // Dumb brute-force is easier to read 80 for (PetscInt i=0; i<p[0]; i++) { 81 for (PetscInt j=0; j<p[1]; j++) { 82 for (PetscInt k=0; k<p[2]; k++) { 83 PetscInt mnodes[3], ijkrank[] = {i,j,k}; 84 if (i == irank[0] && j == irank[1] && k == irank[2]) return start; 85 GlobalNodes(p, ijkrank, degree, melem, mnodes); 86 start += mnodes[0] * mnodes[1] * mnodes[2]; 87 } 88 } 89 } 90 return -1; 91 } 92 static int CreateRestriction(Ceed ceed, const CeedInt melem[3], 93 CeedInt P, CeedInt ncomp, 94 CeedElemRestriction *Erestrict) { 95 const PetscInt nelem = melem[0]*melem[1]*melem[2]; 96 PetscInt mnodes[3], *idx, *idxp; 97 98 // Get indicies 99 for (int d=0; d<3; d++) mnodes[d] = melem[d]*(P-1) + 1; 100 idxp = idx = malloc(nelem*P*P*P*sizeof idx[0]); 101 for (CeedInt i=0; i<melem[0]; i++) { 102 for (CeedInt j=0; j<melem[1]; j++) { 103 for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P) { 104 for (CeedInt ii=0; ii<P; ii++) { 105 for (CeedInt jj=0; jj<P; jj++) { 106 for (CeedInt kk=0; kk<P; kk++) { 107 if (0) { // This is the C-style (i,j,k) ordering that I prefer 108 idxp[(ii*P+jj)*P+kk] = (((i*(P-1)+ii)*mnodes[1] 109 + (j*(P-1)+jj))*mnodes[2] 110 + (k*(P-1)+kk)); 111 } else { // (k,j,i) ordering for consistency with MFEM example 112 idxp[ii+P*(jj+P*kk)] = (((i*(P-1)+ii)*mnodes[1] 113 + (j*(P-1)+jj))*mnodes[2] 114 + (k*(P-1)+kk)); 115 } 116 } 117 } 118 } 119 } 120 } 121 } 122 123 // Setup CEED restriction 124 CeedElemRestrictionCreate(ceed, nelem, P*P*P, mnodes[0]*mnodes[1]*mnodes[2], 125 ncomp, 126 CEED_MEM_HOST, CEED_OWN_POINTER, idx, Erestrict); 127 128 PetscFunctionReturn(0); 129 } 130 131 // Data for PETSc 132 typedef struct User_ *User; 133 struct User_ { 134 MPI_Comm comm; 135 VecScatter ltog; // Scatter for all entries 136 VecScatter ltog0; // Skip Dirichlet values 137 VecScatter gtogD; // global-to-global; only Dirichlet values 138 Vec Xloc, Yloc; 139 CeedVector xceed, yceed; 140 CeedOperator op; 141 CeedVector qdata; 142 Ceed ceed; 143 }; 144 145 // BP Options 146 typedef enum { 147 CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, 148 CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 149 } bpType; 150 static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6", 151 "bpType","CEED_BP",0 152 }; 153 154 // BP specific data 155 typedef struct { 156 CeedInt ncompu, qdatasize, qextra; 157 CeedQFunctionUser setup, apply, error; 158 const char *setupfname, *applyfname, *errorfname; 159 CeedEvalMode inmode, outmode; 160 CeedQuadMode qmode; 161 } bpData; 162 163 bpData bpOptions[6] = { 164 [CEED_BP1] = { 165 .ncompu = 1, 166 .qdatasize = 1, 167 .qextra = 1, 168 .setup = SetupMass, 169 .apply = Mass, 170 .error = Error, 171 .setupfname = SetupMass_loc, 172 .applyfname = Mass_loc, 173 .errorfname = Error_loc, 174 .inmode = CEED_EVAL_INTERP, 175 .outmode = CEED_EVAL_INTERP, 176 .qmode = CEED_GAUSS 177 }, 178 [CEED_BP2] = { 179 .ncompu = 3, 180 .qdatasize = 1, 181 .qextra = 1, 182 .setup = SetupMass3, 183 .apply = Mass3, 184 .error = Error3, 185 .setupfname = SetupMass3_loc, 186 .applyfname = Mass3_loc, 187 .errorfname = Error3_loc, 188 .inmode = CEED_EVAL_INTERP, 189 .outmode = CEED_EVAL_INTERP, 190 .qmode = CEED_GAUSS 191 }, 192 [CEED_BP3] = { 193 .ncompu = 1, 194 .qdatasize = 6, 195 .qextra = 1, 196 .setup = SetupDiff, 197 .apply = Diff, 198 .error = Error, 199 .setupfname = SetupDiff_loc, 200 .applyfname = Diff_loc, 201 .errorfname = Error_loc, 202 .inmode = CEED_EVAL_GRAD, 203 .outmode = CEED_EVAL_GRAD, 204 .qmode = CEED_GAUSS 205 }, 206 [CEED_BP4] = { 207 .ncompu = 3, 208 .qdatasize = 6, 209 .qextra = 1, 210 .setup = SetupDiff3, 211 .apply = Diff3, 212 .error = Error3, 213 .setupfname = SetupDiff3_loc, 214 .applyfname = Diff3_loc, 215 .errorfname = Error3_loc, 216 .inmode = CEED_EVAL_GRAD, 217 .outmode = CEED_EVAL_GRAD, 218 .qmode = CEED_GAUSS 219 }, 220 [CEED_BP5] = { 221 .ncompu = 1, 222 .qdatasize = 6, 223 .qextra = 0, 224 .setup = SetupDiff, 225 .apply = Diff, 226 .error = Error, 227 .setupfname = SetupDiff_loc, 228 .applyfname = Diff_loc, 229 .errorfname = Error_loc, 230 .inmode = CEED_EVAL_GRAD, 231 .outmode = CEED_EVAL_GRAD, 232 .qmode = CEED_GAUSS_LOBATTO 233 }, 234 [CEED_BP6] = { 235 .ncompu = 3, 236 .qdatasize = 6, 237 .qextra = 0, 238 .setup = SetupDiff3, 239 .apply = Diff3, 240 .error = Error3, 241 .setupfname = SetupDiff3_loc, 242 .applyfname = Diff3_loc, 243 .errorfname = Error3_loc, 244 .inmode = CEED_EVAL_GRAD, 245 .outmode = CEED_EVAL_GRAD, 246 .qmode = CEED_GAUSS_LOBATTO 247 } 248 }; 249 250 // This function uses libCEED to compute the action of the mass matrix 251 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) { 252 PetscErrorCode ierr; 253 User user; 254 PetscScalar *x, *y; 255 256 PetscFunctionBeginUser; 257 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 258 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 259 SCATTER_REVERSE); CHKERRQ(ierr); 260 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); 261 CHKERRQ(ierr); 262 ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 263 264 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 265 ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 266 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 267 CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); 268 269 CeedOperatorApply(user->op, user->xceed, user->yceed, 270 CEED_REQUEST_IMMEDIATE); 271 ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); 272 273 ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 274 ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 275 276 if (Y) { 277 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 278 ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 279 CHKERRQ(ierr); 280 ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 281 CHKERRQ(ierr); 282 } 283 PetscFunctionReturn(0); 284 } 285 286 // This function uses libCEED to compute the action of the Laplacian with 287 // Dirichlet boundary conditions 288 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { 289 PetscErrorCode ierr; 290 User user; 291 PetscScalar *x, *y; 292 293 PetscFunctionBeginUser; 294 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 295 296 // Global-to-local 297 ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES, 298 SCATTER_REVERSE); CHKERRQ(ierr); 299 ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES, 300 SCATTER_REVERSE); 301 CHKERRQ(ierr); 302 ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 303 304 // Setup CEED vectors 305 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 306 ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 307 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 308 CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); 309 310 // Apply CEED operator 311 CeedOperatorApply(user->op, user->xceed, user->yceed, 312 CEED_REQUEST_IMMEDIATE); 313 ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); 314 315 // Restore PETSc vectors 316 ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 317 ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 318 319 // Local-to-global 320 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 321 ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 322 CHKERRQ(ierr); 323 ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 324 CHKERRQ(ierr); 325 ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 326 CHKERRQ(ierr); 327 ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 328 CHKERRQ(ierr); 329 330 PetscFunctionReturn(0); 331 } 332 333 // This function calculates the error in the final solution 334 static PetscErrorCode ComputeErrorMax(User user, CeedOperator op_error, Vec X, 335 CeedVector target, PetscReal *maxerror) { 336 PetscErrorCode ierr; 337 PetscScalar *x; 338 CeedVector collocated_error; 339 CeedInt length; 340 341 PetscFunctionBeginUser; 342 CeedVectorGetLength(target, &length); 343 CeedVectorCreate(user->ceed, length, &collocated_error); 344 345 // Global-to-local 346 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 347 SCATTER_REVERSE); CHKERRQ(ierr); 348 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); 349 CHKERRQ(ierr); 350 351 // Setup CEED vector 352 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 353 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 354 355 // Apply CEED operator 356 CeedOperatorApply(op_error, user->xceed, collocated_error, 357 CEED_REQUEST_IMMEDIATE); 358 359 // Restore PETSc vector 360 VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 361 362 // Reduce max error 363 *maxerror = 0; 364 const CeedScalar *e; 365 CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); 366 for (CeedInt i=0; i<length; i++) { 367 *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i])); 368 } 369 CeedVectorRestoreArrayRead(collocated_error, &e); 370 ierr = MPI_Allreduce(MPI_IN_PLACE, maxerror, 371 1, MPIU_REAL, MPIU_MAX, user->comm); CHKERRQ(ierr); 372 373 // Cleanup 374 CeedVectorDestroy(&collocated_error); 375 376 PetscFunctionReturn(0); 377 } 378 379 int main(int argc, char **argv) { 380 PetscInt ierr; 381 MPI_Comm comm; 382 char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 383 double my_rt_start, my_rt, rt_min, rt_max; 384 PetscInt degree, qextra, localnodes, localelem, melem[3], mnodes[3], p[3], 385 irank[3], lnodes[3], lsize, ncompu = 1; 386 PetscScalar *r; 387 PetscBool test_mode, benchmark_mode, write_solution; 388 PetscMPIInt size, rank; 389 Vec X, Xloc, rhs, rhsloc; 390 Mat mat; 391 KSP ksp; 392 VecScatter ltog, ltog0, gtogD; 393 User user; 394 Ceed ceed; 395 CeedBasis basisx, basisu; 396 CeedElemRestriction Erestrictx, Erestrictu, Erestrictxi, Erestrictui, 397 Erestrictqdi; 398 CeedQFunction qf_setup, qf_apply, qf_error; 399 CeedOperator op_setup, op_apply, op_error; 400 CeedVector xcoord, qdata, rhsceed, target; 401 CeedInt P, Q; 402 const CeedInt dim = 3, ncompx = 3; 403 bpType bpChoice; 404 405 ierr = PetscInitialize(&argc, &argv, NULL, help); 406 if (ierr) return ierr; 407 comm = PETSC_COMM_WORLD; 408 ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); 409 bpChoice = CEED_BP1; 410 ierr = PetscOptionsEnum("-problem", 411 "CEED benchmark problem to solve", NULL, 412 bpTypes, (PetscEnum)bpChoice, (PetscEnum *)&bpChoice, 413 NULL); CHKERRQ(ierr); 414 ncompu = bpOptions[bpChoice].ncompu; 415 test_mode = PETSC_FALSE; 416 ierr = PetscOptionsBool("-test", 417 "Testing mode (do not print unless error is large)", 418 NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 419 benchmark_mode = PETSC_FALSE; 420 ierr = PetscOptionsBool("-benchmark", 421 "Benchmarking mode (prints benchmark statistics)", 422 NULL, benchmark_mode, &benchmark_mode, NULL); 423 CHKERRQ(ierr); 424 write_solution = PETSC_FALSE; 425 ierr = PetscOptionsBool("-write_solution", 426 "Write solution for visualization", 427 NULL, write_solution, &write_solution, NULL); 428 CHKERRQ(ierr); 429 degree = test_mode ? 3 : 1; 430 ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", 431 NULL, degree, °ree, NULL); CHKERRQ(ierr); 432 qextra = bpOptions[bpChoice].qextra; 433 ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", 434 NULL, qextra, &qextra, NULL); CHKERRQ(ierr); 435 ierr = PetscOptionsString("-ceed", "CEED resource specifier", 436 NULL, ceedresource, ceedresource, 437 sizeof(ceedresource), NULL); CHKERRQ(ierr); 438 localnodes = 1000; 439 ierr = PetscOptionsInt("-local", 440 "Target number of locally owned nodes per process", 441 NULL, localnodes, &localnodes, NULL); CHKERRQ(ierr); 442 ierr = PetscOptionsEnd(); CHKERRQ(ierr); 443 P = degree + 1; 444 Q = P + qextra; 445 446 // Determine size of process grid 447 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 448 Split3(size, p, false); 449 450 // Find a nicely composite number of elements no less than localnodes 451 for (localelem = PetscMax(1, localnodes / (degree*degree*degree)); ; 452 localelem++) { 453 Split3(localelem, melem, true); 454 if (Max3(melem) / Min3(melem) <= 2) break; 455 } 456 457 // Find my location in the process grid 458 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 459 for (int d=0,rankleft=rank; d<dim; d++) { 460 const int pstride[3] = {p[1] *p[2], p[2], 1}; 461 irank[d] = rankleft / pstride[d]; 462 rankleft -= irank[d] * pstride[d]; 463 } 464 465 GlobalNodes(p, irank, degree, melem, mnodes); 466 467 // Setup global vector 468 ierr = VecCreate(comm, &X); CHKERRQ(ierr); 469 ierr = VecSetSizes(X, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, PETSC_DECIDE); 470 CHKERRQ(ierr); 471 ierr = VecSetUp(X); CHKERRQ(ierr); 472 473 // Print summary 474 if (!test_mode) { 475 CeedInt gsize; 476 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 477 ierr = PetscPrintf(comm, 478 "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 479 " libCEED:\n" 480 " libCEED Backend : %s\n" 481 " Mesh:\n" 482 " Number of 1D Basis Nodes (p) : %d\n" 483 " Number of 1D Quadrature Points (q) : %d\n" 484 " Global nodes : %D\n" 485 " Process Decomposition : %D %D %D\n" 486 " Local Elements : %D = %D %D %D\n" 487 " Owned nodes : %D = %D %D %D\n", 488 bpChoice+1, ceedresource, P, Q, gsize/ncompu, p[0], 489 p[1], p[2], localelem, melem[0], melem[1], melem[2], 490 mnodes[0]*mnodes[1]*mnodes[2], mnodes[0], mnodes[1], mnodes[2]); 491 CHKERRQ(ierr); 492 } 493 494 { 495 lsize = 1; 496 for (int d=0; d<dim; d++) { 497 lnodes[d] = melem[d]*degree + 1; 498 lsize *= lnodes[d]; 499 } 500 ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr); 501 ierr = VecSetSizes(Xloc, lsize*ncompu, PETSC_DECIDE); CHKERRQ(ierr); 502 ierr = VecSetUp(Xloc); CHKERRQ(ierr); 503 504 // Create local-to-global scatter 505 PetscInt *ltogind, *ltogind0, *locind, l0count; 506 IS ltogis, ltogis0, locis; 507 PetscInt gstart[2][2][2], gmnodes[2][2][2][dim]; 508 509 for (int i=0; i<2; i++) { 510 for (int j=0; j<2; j++) { 511 for (int k=0; k<2; k++) { 512 PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k}; 513 gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem); 514 GlobalNodes(p, ijkrank, degree, melem, gmnodes[i][j][k]); 515 } 516 } 517 } 518 519 ierr = PetscMalloc1(lsize, <ogind); CHKERRQ(ierr); 520 ierr = PetscMalloc1(lsize, <ogind0); CHKERRQ(ierr); 521 ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr); 522 l0count = 0; 523 for (PetscInt i=0,ir,ii; ir=i>=mnodes[0], ii=i-ir*mnodes[0], i<lnodes[0]; i++) 524 for (PetscInt j=0,jr,jj; jr=j>=mnodes[1], jj=j-jr*mnodes[1], j<lnodes[1]; j++) 525 for (PetscInt k=0,kr,kk; kr=k>=mnodes[2], kk=k-kr*mnodes[2], k<lnodes[2]; k++) { 526 PetscInt here = (i*lnodes[1]+j)*lnodes[2]+k; 527 ltogind[here] = 528 gstart[ir][jr][kr] + (ii*gmnodes[ir][jr][kr][1]+jj)*gmnodes[ir][jr][kr][2]+kk; 529 if ((irank[0] == 0 && i == 0) 530 || (irank[1] == 0 && j == 0) 531 || (irank[2] == 0 && k == 0) 532 || (irank[0]+1 == p[0] && i+1 == lnodes[0]) 533 || (irank[1]+1 == p[1] && j+1 == lnodes[1]) 534 || (irank[2]+1 == p[2] && k+1 == lnodes[2])) 535 continue; 536 ltogind0[l0count] = ltogind[here]; 537 locind[l0count++] = here; 538 } 539 ierr = ISCreateBlock(comm, ncompu, lsize, ltogind, PETSC_OWN_POINTER, 540 <ogis); CHKERRQ(ierr); 541 ierr = VecScatterCreate(Xloc, NULL, X, ltogis, <og); CHKERRQ(ierr); 542 CHKERRQ(ierr); 543 ierr = ISCreateBlock(comm, ncompu, l0count, ltogind0, PETSC_OWN_POINTER, 544 <ogis0); CHKERRQ(ierr); 545 ierr = ISCreateBlock(comm, ncompu, l0count, locind, PETSC_OWN_POINTER, 546 &locis); CHKERRQ(ierr); 547 ierr = VecScatterCreate(Xloc, locis, X, ltogis0, <og0); CHKERRQ(ierr); 548 { 549 // Create global-to-global scatter for Dirichlet values (everything not in 550 // ltogis0, which is the range of ltog0) 551 PetscInt xstart, xend, *indD, countD = 0; 552 IS isD; 553 const PetscScalar *x; 554 ierr = VecZeroEntries(Xloc); CHKERRQ(ierr); 555 ierr = VecSet(X, 1.0); CHKERRQ(ierr); 556 ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 557 CHKERRQ(ierr); 558 ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 559 CHKERRQ(ierr); 560 ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr); 561 ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr); 562 ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr); 563 for (PetscInt i=0; i<xend-xstart; i++) { 564 if (x[i] == 1.) indD[countD++] = xstart + i; 565 } 566 ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr); 567 ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD); 568 CHKERRQ(ierr); 569 ierr = PetscFree(indD); CHKERRQ(ierr); 570 ierr = VecScatterCreate(X, isD, X, isD, >ogD); CHKERRQ(ierr); 571 ierr = ISDestroy(&isD); CHKERRQ(ierr); 572 } 573 ierr = ISDestroy(<ogis); CHKERRQ(ierr); 574 ierr = ISDestroy(<ogis0); CHKERRQ(ierr); 575 ierr = ISDestroy(&locis); CHKERRQ(ierr); 576 } 577 578 // Set up libCEED 579 CeedInit(ceedresource, &ceed); 580 581 // CEED bases 582 CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompu, P, Q, 583 bpOptions[bpChoice].qmode, &basisu); 584 CeedBasisCreateTensorH1Lagrange(ceed, dim, ncompx, 2, Q, 585 bpOptions[bpChoice].qmode, &basisx); 586 587 // CEED restrictions 588 CreateRestriction(ceed, melem, P, ncompu, &Erestrictu); 589 CreateRestriction(ceed, melem, 2, dim, &Erestrictx); 590 CeedInt nelem = melem[0]*melem[1]*melem[2]; 591 CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, ncompu, 592 &Erestrictui); 593 CeedElemRestrictionCreateIdentity(ceed, nelem, 594 Q*Q*Q, 595 nelem*Q*Q*Q, 596 bpOptions[bpChoice].qdatasize, &Erestrictqdi); 597 CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, 1, 598 &Erestrictxi); 599 { 600 CeedScalar *xloc; 601 CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len = 602 shape[0]*shape[1]*shape[2]; 603 xloc = malloc(len*ncompx*sizeof xloc[0]); 604 for (CeedInt i=0; i<shape[0]; i++) { 605 for (CeedInt j=0; j<shape[1]; j++) { 606 for (CeedInt k=0; k<shape[2]; k++) { 607 xloc[((i*shape[1]+j)*shape[2]+k) + 0*len] = 1.*(irank[0]*melem[0]+i) / 608 (p[0]*melem[0]); 609 xloc[((i*shape[1]+j)*shape[2]+k) + 1*len] = 1.*(irank[1]*melem[1]+j) / 610 (p[1]*melem[1]); 611 xloc[((i*shape[1]+j)*shape[2]+k) + 2*len] = 1.*(irank[2]*melem[2]+k) / 612 (p[2]*melem[2]); 613 } 614 } 615 } 616 CeedVectorCreate(ceed, len*ncompx, &xcoord); 617 CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc); 618 } 619 620 // Create the Q-function that builds the operator (i.e. computes its 621 // quadrature data) and set its context data 622 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].setup, 623 bpOptions[bpChoice].setupfname, &qf_setup); 624 CeedQFunctionAddInput(qf_setup, "x", ncompx, CEED_EVAL_INTERP); 625 CeedQFunctionAddInput(qf_setup, "dx", ncompx*dim, CEED_EVAL_GRAD); 626 CeedQFunctionAddInput(qf_setup, "weight", 1, CEED_EVAL_WEIGHT); 627 CeedQFunctionAddOutput(qf_setup, "qdata", bpOptions[bpChoice].qdatasize, 628 CEED_EVAL_NONE); 629 CeedQFunctionAddOutput(qf_setup, "true_soln", ncompu, CEED_EVAL_NONE); 630 CeedQFunctionAddOutput(qf_setup, "rhs", ncompu, CEED_EVAL_INTERP); 631 632 // Set up PDE operator 633 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].apply, 634 bpOptions[bpChoice].applyfname, &qf_apply); 635 // Add inputs and outputs 636 CeedInt gradInScale = bpOptions[bpChoice].inmode==CEED_EVAL_GRAD ? 3 : 1; 637 CeedInt gradOutScale = bpOptions[bpChoice].outmode==CEED_EVAL_GRAD ? 3 : 1; 638 CeedQFunctionAddInput(qf_apply, "u", ncompu*gradInScale, 639 bpOptions[bpChoice].inmode); 640 CeedQFunctionAddInput(qf_apply, "qdata", bpOptions[bpChoice].qdatasize, 641 CEED_EVAL_NONE); 642 CeedQFunctionAddOutput(qf_apply, "v", ncompu*gradOutScale, 643 bpOptions[bpChoice].outmode); 644 645 // Create the error qfunction 646 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].error, 647 bpOptions[bpChoice].errorfname, &qf_error); 648 CeedQFunctionAddInput(qf_error, "u", ncompu, CEED_EVAL_INTERP); 649 CeedQFunctionAddInput(qf_error, "true_soln", ncompu, CEED_EVAL_NONE); 650 CeedQFunctionAddOutput(qf_error, "error", ncompu, CEED_EVAL_NONE); 651 652 // Create the persistent vectors that will be needed in setup 653 CeedInt nqpts; 654 CeedBasisGetNumQuadraturePoints(basisu, &nqpts); 655 CeedVectorCreate(ceed, bpOptions[bpChoice].qdatasize*nelem*nqpts, &qdata); 656 CeedVectorCreate(ceed, nelem*nqpts*ncompu, &target); 657 CeedVectorCreate(ceed, lsize*ncompu, &rhsceed); 658 659 // Create the operator that builds the quadrature data for the ceed operator 660 CeedOperatorCreate(ceed, qf_setup, NULL, NULL, &op_setup); 661 CeedOperatorSetField(op_setup, "x", Erestrictx, CEED_NOTRANSPOSE, 662 basisx, CEED_VECTOR_ACTIVE); 663 CeedOperatorSetField(op_setup, "dx", Erestrictx, CEED_NOTRANSPOSE, 664 basisx, CEED_VECTOR_ACTIVE); 665 CeedOperatorSetField(op_setup, "weight", Erestrictxi, CEED_NOTRANSPOSE, 666 basisx, CEED_VECTOR_NONE); 667 CeedOperatorSetField(op_setup, "qdata", Erestrictqdi, CEED_NOTRANSPOSE, 668 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 669 CeedOperatorSetField(op_setup, "true_soln", Erestrictui, CEED_NOTRANSPOSE, 670 CEED_BASIS_COLLOCATED, target); 671 CeedOperatorSetField(op_setup, "rhs", Erestrictu, CEED_TRANSPOSE, 672 basisu, rhsceed); 673 674 // Create the mass or diff operator 675 CeedOperatorCreate(ceed, qf_apply, NULL, NULL, &op_apply); 676 CeedOperatorSetField(op_apply, "u", Erestrictu, CEED_TRANSPOSE, 677 basisu, CEED_VECTOR_ACTIVE); 678 CeedOperatorSetField(op_apply, "qdata", Erestrictqdi, CEED_NOTRANSPOSE, 679 CEED_BASIS_COLLOCATED, qdata); 680 CeedOperatorSetField(op_apply, "v", Erestrictu, CEED_TRANSPOSE, 681 basisu, CEED_VECTOR_ACTIVE); 682 683 // Create the error operator 684 CeedOperatorCreate(ceed, qf_error, NULL, NULL, &op_error); 685 CeedOperatorSetField(op_error, "u", Erestrictu, CEED_TRANSPOSE, 686 basisu, CEED_VECTOR_ACTIVE); 687 CeedOperatorSetField(op_error, "true_soln", Erestrictui, CEED_NOTRANSPOSE, 688 CEED_BASIS_COLLOCATED, target); 689 CeedOperatorSetField(op_error, "error", Erestrictui, CEED_NOTRANSPOSE, 690 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 691 692 693 // Set up Mat 694 ierr = PetscMalloc1(1, &user); CHKERRQ(ierr); 695 user->comm = comm; 696 user->ltog = ltog; 697 if (bpChoice != CEED_BP1 && bpChoice != CEED_BP2) { 698 user->ltog0 = ltog0; 699 user->gtogD = gtogD; 700 } 701 user->Xloc = Xloc; 702 ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr); 703 CeedVectorCreate(ceed, lsize*ncompu, &user->xceed); 704 CeedVectorCreate(ceed, lsize*ncompu, &user->yceed); 705 user->op = op_apply; 706 user->qdata = qdata; 707 user->ceed = ceed; 708 709 ierr = MatCreateShell(comm, mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 710 mnodes[0]*mnodes[1]*mnodes[2]*ncompu, 711 PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr); 712 if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { 713 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass); 714 CHKERRQ(ierr); 715 } else { 716 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff); 717 CHKERRQ(ierr); 718 } 719 ierr = MatCreateVecs(mat, &rhs, NULL); CHKERRQ(ierr); 720 721 // Get RHS vector 722 ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 723 ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 724 ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr); 725 CeedVectorSetArray(rhsceed, CEED_MEM_HOST, CEED_USE_POINTER, r); 726 727 // Setup qdata, rhs, and target 728 CeedOperatorApply(op_setup, xcoord, qdata, CEED_REQUEST_IMMEDIATE); 729 ierr = CeedVectorSyncArray(rhsceed, CEED_MEM_HOST); CHKERRQ(ierr); 730 CeedVectorDestroy(&xcoord); 731 732 // Gather RHS 733 ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 734 ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 735 ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 736 CHKERRQ(ierr); 737 ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 738 CHKERRQ(ierr); 739 CeedVectorDestroy(&rhsceed); 740 741 ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); 742 { 743 PC pc; 744 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 745 if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { 746 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 747 ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 748 } else { 749 ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 750 } 751 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 752 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 753 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 754 PETSC_DEFAULT); CHKERRQ(ierr); 755 } 756 ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 757 ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr); 758 // First run, if benchmarking 759 if (benchmark_mode) { 760 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 761 CHKERRQ(ierr); 762 my_rt_start = MPI_Wtime(); 763 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 764 my_rt = MPI_Wtime() - my_rt_start; 765 ierr = MPI_Allreduce(MPI_IN_PLACE, &my_rt, 1, MPI_DOUBLE, MPI_MIN, comm); 766 CHKERRQ(ierr); 767 // Set maxits based on first iteration timing 768 if (my_rt > 0.02) { 769 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); 770 CHKERRQ(ierr); 771 } else { 772 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); 773 CHKERRQ(ierr); 774 } 775 } 776 // Timed solve 777 ierr = PetscBarrier((PetscObject)ksp); CHKERRQ(ierr); 778 my_rt_start = MPI_Wtime(); 779 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 780 my_rt = MPI_Wtime() - my_rt_start; 781 { 782 KSPType ksptype; 783 KSPConvergedReason reason; 784 PetscReal rnorm; 785 PetscInt its; 786 ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 787 ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 788 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 789 ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 790 if (!test_mode || reason < 0 || rnorm > 1e-8) { 791 ierr = PetscPrintf(comm, 792 " KSP:\n" 793 " KSP Type : %s\n" 794 " KSP Convergence : %s\n" 795 " Total KSP Iterations : %D\n" 796 " Final rnorm : %e\n", 797 ksptype, KSPConvergedReasons[reason], its, 798 (double)rnorm); CHKERRQ(ierr); 799 } 800 if (benchmark_mode && (!test_mode)) { 801 CeedInt gsize; 802 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 803 ierr = MPI_Allreduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, comm); 804 CHKERRQ(ierr); 805 ierr = MPI_Allreduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, comm); 806 CHKERRQ(ierr); 807 ierr = PetscPrintf(comm, 808 " Performance:\n" 809 " CG Solve Time : %g (%g) sec\n" 810 " DoFs/Sec in CG : %g (%g) million\n", 811 rt_max, rt_min, 1e-6*gsize*its/rt_max, 812 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 813 } 814 } 815 816 { 817 PetscReal maxerror; 818 ierr = ComputeErrorMax(user, op_error, X, target, &maxerror); CHKERRQ(ierr); 819 PetscReal tol = (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) ? 5e-3 : 5e-2; 820 if (!test_mode || maxerror > tol) { 821 ierr = PetscPrintf(comm, 822 " Pointwise Error (max) : %e\n", 823 (double)maxerror); CHKERRQ(ierr); 824 } 825 } 826 827 if (write_solution) { 828 PetscViewer vtkviewersoln; 829 830 ierr = PetscViewerCreate(comm, &vtkviewersoln); CHKERRQ(ierr); 831 ierr = PetscViewerSetType(vtkviewersoln, PETSCVIEWERVTK); CHKERRQ(ierr); 832 ierr = PetscViewerFileSetName(vtkviewersoln, "solution.vtk"); CHKERRQ(ierr); 833 ierr = VecView(X, vtkviewersoln); CHKERRQ(ierr); 834 ierr = PetscViewerDestroy(&vtkviewersoln); CHKERRQ(ierr); 835 } 836 837 ierr = VecDestroy(&rhs); CHKERRQ(ierr); 838 ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 839 ierr = VecDestroy(&X); CHKERRQ(ierr); 840 ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr); 841 ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr); 842 ierr = VecScatterDestroy(<og); CHKERRQ(ierr); 843 ierr = VecScatterDestroy(<og0); CHKERRQ(ierr); 844 ierr = VecScatterDestroy(>ogD); CHKERRQ(ierr); 845 ierr = MatDestroy(&mat); CHKERRQ(ierr); 846 ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 847 848 CeedVectorDestroy(&user->xceed); 849 CeedVectorDestroy(&user->yceed); 850 CeedVectorDestroy(&user->qdata); 851 CeedVectorDestroy(&target); 852 CeedOperatorDestroy(&op_setup); 853 CeedOperatorDestroy(&op_apply); 854 CeedOperatorDestroy(&op_error); 855 CeedElemRestrictionDestroy(&Erestrictu); 856 CeedElemRestrictionDestroy(&Erestrictx); 857 CeedElemRestrictionDestroy(&Erestrictui); 858 CeedElemRestrictionDestroy(&Erestrictxi); 859 CeedElemRestrictionDestroy(&Erestrictqdi); 860 CeedQFunctionDestroy(&qf_setup); 861 CeedQFunctionDestroy(&qf_apply); 862 CeedQFunctionDestroy(&qf_error); 863 CeedBasisDestroy(&basisu); 864 CeedBasisDestroy(&basisx); 865 CeedDestroy(&ceed); 866 ierr = PetscFree(user); CHKERRQ(ierr); 867 return PetscFinalize(); 868 } 869