1 // libCEED + PETSc Example: CEED BPs 2 // 3 // This example demonstrates a simple usage of libCEED with PETSc to solve the 4 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 5 // 6 // The code is intentionally "raw", using only low-level communication 7 // primitives. 8 // 9 // Build with: 10 // 11 // make bps [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 12 // 13 // Sample runs: 14 // 15 // bps -problem bp1 16 // bps -problem bp2 -ceed /cpu/self 17 // bps -problem bp3 -ceed /gpu/occa 18 // bps -problem bp4 -ceed /cpu/occa 19 // bps -problem bp5 -ceed /omp/occa 20 // bps -problem bp6 -ceed /ocl/occa 21 // 22 //TESTARGS -ceed {ceed_resource} -test -problem bp3 23 24 /// @file 25 /// CEED BPs example using PETSc 26 const char help[] = "Solve CEED BPs using PETSc\n"; 27 28 #include <stdbool.h> 29 #include <string.h> 30 #include "common.h" 31 #include "bp1.h" 32 #include "bp2.h" 33 #include "bp3.h" 34 #include "bp4.h" 35 36 #define PATH(BASE) __DIR__ #BASE 37 38 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 39 for (PetscInt d=0,sizeleft=size; d<3; d++) { 40 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 41 while (try * (sizeleft / try) != sizeleft) try++; 42 m[reverse ? 2-d : d] = try; 43 sizeleft /= try; 44 } 45 } 46 47 static PetscInt Max3(const PetscInt a[3]) { 48 return PetscMax(a[0], PetscMax(a[1], a[2])); 49 } 50 static PetscInt Min3(const PetscInt a[3]) { 51 return PetscMin(a[0], PetscMin(a[1], a[2])); 52 } 53 static void GlobalDof(const PetscInt p[3], const PetscInt irank[3], 54 PetscInt degree, const PetscInt melem[3], 55 PetscInt mdof[3]) { 56 for (int d=0; d<3; d++) 57 mdof[d] = degree*melem[d] + (irank[d] == p[d]-1); 58 } 59 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3], 60 PetscInt degree, const PetscInt melem[3]) { 61 PetscInt start = 0; 62 // Dumb brute-force is easier to read 63 for (PetscInt i=0; i<p[0]; i++) { 64 for (PetscInt j=0; j<p[1]; j++) { 65 for (PetscInt k=0; k<p[2]; k++) { 66 PetscInt mdof[3], ijkrank[] = {i,j,k}; 67 if (i == irank[0] && j == irank[1] && k == irank[2]) return start; 68 GlobalDof(p, ijkrank, degree, melem, mdof); 69 start += mdof[0] * mdof[1] * mdof[2]; 70 } 71 } 72 } 73 return -1; 74 } 75 static int CreateRestriction(Ceed ceed, const CeedInt melem[3], 76 CeedInt P, CeedInt ncomp, 77 CeedElemRestriction *Erestrict) { 78 const PetscInt Nelem = melem[0]*melem[1]*melem[2]; 79 PetscInt mdof[3], *idx, *idxp; 80 81 for (int d=0; d<3; d++) mdof[d] = melem[d]*(P-1) + 1; 82 idxp = idx = malloc(Nelem*P*P*P*sizeof idx[0]); 83 for (CeedInt i=0; i<melem[0]; i++) { 84 for (CeedInt j=0; j<melem[1]; j++) { 85 for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P) { 86 for (CeedInt ii=0; ii<P; ii++) { 87 for (CeedInt jj=0; jj<P; jj++) { 88 for (CeedInt kk=0; kk<P; kk++) { 89 if (0) { // This is the C-style (i,j,k) ordering that I prefer 90 idxp[(ii*P+jj)*P+kk] = (((i*(P-1)+ii)*mdof[1] 91 + (j*(P-1)+jj))*mdof[2] 92 + (k*(P-1)+kk)); 93 } else { // (k,j,i) ordering for consistency with MFEM example 94 idxp[ii+P*(jj+P*kk)] = (((i*(P-1)+ii)*mdof[1] 95 + (j*(P-1)+jj))*mdof[2] 96 + (k*(P-1)+kk)); 97 } 98 } 99 } 100 } 101 } 102 } 103 } 104 CeedElemRestrictionCreate(ceed, Nelem, P*P*P, mdof[0]*mdof[1]*mdof[2], ncomp, 105 CEED_MEM_HOST, CEED_OWN_POINTER, idx, Erestrict); 106 PetscFunctionReturn(0); 107 } 108 109 // Data for PETSc 110 typedef struct User_ *User; 111 struct User_ { 112 MPI_Comm comm; 113 VecScatter ltog; // Scatter for all entries 114 VecScatter ltog0; // Skip Dirichlet values 115 VecScatter gtogD; // global-to-global; only Dirichlet values 116 Vec Xloc, Yloc; 117 CeedVector xceed, yceed; 118 CeedOperator op; 119 CeedVector rho; 120 Ceed ceed; 121 }; 122 123 // BP Options 124 typedef enum { 125 CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, 126 CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 127 } bpType; 128 static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6", 129 "bpType","CEED_BP",0}; 130 131 // BP specific data 132 typedef struct { 133 CeedInt vscale, qdatasize, qextra; 134 CeedQFunctionUser setup, apply, error; 135 const char setupfname[PETSC_MAX_PATH_LEN], applyfname[PETSC_MAX_PATH_LEN], 136 errorfname[PETSC_MAX_PATH_LEN]; 137 CeedEvalMode inmode, outmode; 138 CeedQuadMode qmode; 139 } bpData; 140 141 bpData bpOptions[6] = { 142 [CEED_BP1] = { 143 .vscale = 1, 144 .qdatasize = 1, 145 .qextra = 1, 146 .setup = SetupMass, 147 .apply = Mass, 148 .error = Error, 149 .setupfname = PATH(bp1.h:SetupMass), 150 .applyfname = PATH(bp1.h:Mass), 151 .errorfname = PATH(common.h:Error), 152 .inmode = CEED_EVAL_INTERP, 153 .outmode = CEED_EVAL_INTERP, 154 .qmode = CEED_GAUSS}, 155 [CEED_BP2] = { 156 .vscale = 3, 157 .qdatasize = 1, 158 .qextra = 1, 159 .setup = SetupMass3, 160 .apply = Mass3, 161 .error = Error3, 162 .setupfname = PATH(bp2.h:SetupMass3), 163 .applyfname = PATH(bp2.h:Mass3), 164 .errorfname = PATH(common.h:Error3), 165 .inmode = CEED_EVAL_INTERP, 166 .outmode = CEED_EVAL_INTERP, 167 .qmode = CEED_GAUSS}, 168 [CEED_BP3] = { 169 .vscale = 1, 170 .qdatasize = 6, 171 .qextra = 1, 172 .setup = SetupDiff, 173 .apply = Diff, 174 .error = Error, 175 .setupfname = PATH(bp3.h:SetupDiff), 176 .applyfname = PATH(bp3.h:Diff), 177 .errorfname = PATH(common.h:Error), 178 .inmode = CEED_EVAL_GRAD, 179 .outmode = CEED_EVAL_GRAD, 180 .qmode = CEED_GAUSS}, 181 [CEED_BP4] = { 182 .vscale = 3, 183 .qdatasize = 6, 184 .qextra = 1, 185 .setup = SetupDiff3, 186 .apply = Diff3, 187 .error = Error3, 188 .setupfname = PATH(bp4.h:SetupDiff3), 189 .applyfname = PATH(bp4.h:Diff), 190 .errorfname = PATH(common.h:Error3), 191 .inmode = CEED_EVAL_GRAD, 192 .outmode = CEED_EVAL_GRAD, 193 .qmode = CEED_GAUSS}, 194 [CEED_BP5] = { 195 .vscale = 1, 196 .qdatasize = 6, 197 .qextra = 0, 198 .setup = SetupDiff, 199 .apply = Diff, 200 .error = Error, 201 .setupfname = PATH(bp3.h:SetupDiff), 202 .applyfname = PATH(bp3.h:Diff), 203 .errorfname = PATH(common.h:Error), 204 .inmode = CEED_EVAL_GRAD, 205 .outmode = CEED_EVAL_GRAD, 206 .qmode = CEED_GAUSS_LOBATTO}, 207 [CEED_BP6] = { 208 .vscale = 3, 209 .qdatasize = 6, 210 .qextra = 0, 211 .setup = SetupDiff3, 212 .apply = Diff3, 213 .error = Error3, 214 .setupfname = PATH(bp4.h:SetupDiff3), 215 .applyfname = PATH(bp4.h:Diff), 216 .errorfname = PATH(common.h:Error3), 217 .inmode = CEED_EVAL_GRAD, 218 .outmode = CEED_EVAL_GRAD, 219 .qmode = CEED_GAUSS_LOBATTO} 220 }; 221 222 // This function uses libCEED to compute the action of the mass matrix 223 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) { 224 PetscErrorCode ierr; 225 User user; 226 PetscScalar *x, *y; 227 228 PetscFunctionBeginUser; 229 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 230 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 231 SCATTER_REVERSE); CHKERRQ(ierr); 232 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); 233 CHKERRQ(ierr); 234 ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 235 236 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 237 ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 238 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 239 CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); 240 241 CeedOperatorApply(user->op, user->xceed, user->yceed, 242 CEED_REQUEST_IMMEDIATE); 243 ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); 244 245 ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 246 ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 247 248 if (Y) { 249 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 250 ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 251 CHKERRQ(ierr); 252 ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 253 CHKERRQ(ierr); 254 } 255 PetscFunctionReturn(0); 256 } 257 258 // This function uses libCEED to compute the action of the Laplacian with 259 // Dirichlet boundary conditions 260 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { 261 PetscErrorCode ierr; 262 User user; 263 PetscScalar *x, *y; 264 265 PetscFunctionBeginUser; 266 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 267 ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES, 268 SCATTER_REVERSE); CHKERRQ(ierr); 269 ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES, 270 SCATTER_REVERSE); 271 CHKERRQ(ierr); 272 ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 273 274 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 275 ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 276 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 277 CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); 278 279 CeedOperatorApply(user->op, user->xceed, user->yceed, 280 CEED_REQUEST_IMMEDIATE); 281 ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); 282 283 ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 284 ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 285 286 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 287 ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 288 CHKERRQ(ierr); 289 ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 290 CHKERRQ(ierr); 291 ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 292 CHKERRQ(ierr); 293 ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 294 CHKERRQ(ierr); 295 PetscFunctionReturn(0); 296 } 297 298 // This function calculates the error in the final solution 299 static PetscErrorCode ComputeErrorMax(User user, CeedOperator op_error, Vec X, 300 CeedVector target, PetscReal *maxerror) { 301 PetscErrorCode ierr; 302 PetscScalar *x; 303 CeedVector collocated_error; 304 CeedInt length; 305 306 PetscFunctionBeginUser; 307 CeedVectorGetLength(target, &length); 308 CeedVectorCreate(user->ceed, length, &collocated_error); 309 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 310 SCATTER_REVERSE); CHKERRQ(ierr); 311 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); 312 CHKERRQ(ierr); 313 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 314 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 315 CeedOperatorApply(op_error, user->xceed, collocated_error, 316 CEED_REQUEST_IMMEDIATE); 317 VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 318 319 *maxerror = 0; 320 const CeedScalar *e; 321 CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); 322 for (CeedInt i=0; i<length; i++) { 323 *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i])); 324 } 325 CeedVectorRestoreArrayRead(collocated_error, &e); 326 ierr = MPI_Allreduce(MPI_IN_PLACE, &maxerror, 327 1, MPIU_SCALAR, MPIU_MAX, user->comm); CHKERRQ(ierr); 328 CeedVectorDestroy(&collocated_error); 329 PetscFunctionReturn(0); 330 } 331 332 int main(int argc, char **argv) { 333 PetscInt ierr; 334 MPI_Comm comm; 335 char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 336 PetscInt degree, qextra, localdof, localelem, melem[3], mdof[3], p[3], 337 irank[3], ldof[3], lsize, vscale = 1; 338 PetscScalar *r; 339 PetscBool test_mode, benchmark_mode; 340 PetscMPIInt size, rank; 341 VecScatter ltog, ltog0, gtogD; 342 Ceed ceed; 343 CeedBasis basisx, basisu; 344 CeedElemRestriction Erestrictx, Erestrictu, Erestrictxi, Erestrictui, 345 Erestrictqdi; 346 CeedQFunction qf_setup, qf_apply, qf_error; 347 CeedOperator op_setup, op_apply, op_error; 348 CeedVector xcoord, rho, rhsceed, target; 349 CeedInt P, Q; 350 Vec X, Xloc, rhs, rhsloc; 351 Mat mat; 352 KSP ksp; 353 User user; 354 double my_rt_start, my_rt, rt_min, rt_max; 355 bpType bpChoice; 356 357 ierr = PetscInitialize(&argc, &argv, NULL, help); 358 if (ierr) return ierr; 359 comm = PETSC_COMM_WORLD; 360 ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); 361 bpChoice = CEED_BP1; 362 ierr = PetscOptionsEnum("-problem", 363 "CEED benchmark problem to solve", NULL, 364 bpTypes, (PetscEnum)bpChoice, (PetscEnum*)&bpChoice, 365 NULL); CHKERRQ(ierr); 366 vscale = bpOptions[bpChoice].vscale; 367 test_mode = PETSC_FALSE; 368 ierr = PetscOptionsBool("-test", 369 "Testing mode (do not print unless error is large)", 370 NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 371 benchmark_mode = PETSC_FALSE; 372 ierr = PetscOptionsBool("-benchmark", 373 "Benchmarking mode (prints benchmark statistics)", 374 NULL, benchmark_mode, &benchmark_mode, NULL); 375 CHKERRQ(ierr); 376 degree = test_mode ? 3 : 1; 377 ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", 378 NULL, degree, °ree, NULL); CHKERRQ(ierr); 379 qextra = bpOptions[bpChoice].qextra; 380 ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", 381 NULL, qextra, &qextra, NULL); CHKERRQ(ierr); 382 ierr = PetscOptionsString("-ceed", "CEED resource specifier", 383 NULL, ceedresource, ceedresource, 384 sizeof(ceedresource), NULL); CHKERRQ(ierr); 385 localdof = 1000; 386 ierr = PetscOptionsInt("-local", 387 "Target number of locally owned degrees of freedom per process", 388 NULL, localdof, &localdof, NULL); CHKERRQ(ierr); 389 ierr = PetscOptionsEnd(); CHKERRQ(ierr); 390 P = degree + 1; 391 Q = P + qextra; 392 393 // Determine size of process grid 394 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 395 Split3(size, p, false); 396 397 // Find a nicely composite number of elements no less than localdof 398 for (localelem = PetscMax(1, localdof / (degree*degree*degree)); ; 399 localelem++) { 400 Split3(localelem, melem, true); 401 if (Max3(melem) / Min3(melem) <= 2) break; 402 } 403 404 // Find my location in the process grid 405 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 406 for (int d=0,rankleft=rank; d<3; d++) { 407 const int pstride[3] = {p[1] *p[2], p[2], 1}; 408 irank[d] = rankleft / pstride[d]; 409 rankleft -= irank[d] * pstride[d]; 410 } 411 412 GlobalDof(p, irank, degree, melem, mdof); 413 414 ierr = VecCreate(comm, &X); CHKERRQ(ierr); 415 ierr = VecSetSizes(X, mdof[0]*mdof[1]*mdof[2]*vscale, PETSC_DECIDE); 416 CHKERRQ(ierr); 417 ierr = VecSetUp(X); CHKERRQ(ierr); 418 419 if (!test_mode) { 420 CeedInt gsize; 421 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 422 ierr = PetscPrintf(comm, 423 "\n-- CEED Benchmark Problem %d -- libCEED + PETSc --\n" 424 " libCEED:\n" 425 " libCEED Backend : %s\n" 426 " Mesh:\n" 427 " Number of 1D Basis Nodes (p) : %d\n" 428 " Number of 1D Quadrature Points (q) : %d\n" 429 " Global DOFs : %D\n" 430 " Process Decomposition : %D %D %D\n" 431 " Local Elements : %D = %D %D %D\n" 432 " Owned DOFs : %D = %D %D %D\n", 433 bpChoice+1, ceedresource, P, Q, gsize/vscale, p[0], 434 p[1], p[2], localelem, melem[0], melem[1], melem[2], 435 mdof[0]*mdof[1]*mdof[2], mdof[0], mdof[1], mdof[2]); 436 CHKERRQ(ierr); 437 } 438 439 { 440 lsize = 1; 441 for (int d=0; d<3; d++) { 442 ldof[d] = melem[d]*degree + 1; 443 lsize *= ldof[d]; 444 } 445 ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr); 446 ierr = VecSetSizes(Xloc, lsize*vscale, PETSC_DECIDE); CHKERRQ(ierr); 447 ierr = VecSetUp(Xloc); CHKERRQ(ierr); 448 449 // Create local-to-global scatter 450 PetscInt *ltogind, *ltogind0, *locind, l0count; 451 IS ltogis, ltogis0, locis; 452 PetscInt gstart[2][2][2], gmdof[2][2][2][3]; 453 454 for (int i=0; i<2; i++) { 455 for (int j=0; j<2; j++) { 456 for (int k=0; k<2; k++) { 457 PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k}; 458 gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem); 459 GlobalDof(p, ijkrank, degree, melem, gmdof[i][j][k]); 460 } 461 } 462 } 463 464 ierr = PetscMalloc1(lsize, <ogind); CHKERRQ(ierr); 465 ierr = PetscMalloc1(lsize, <ogind0); CHKERRQ(ierr); 466 ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr); 467 l0count = 0; 468 for (PetscInt i=0,ir,ii; ir=i>=mdof[0], ii=i-ir*mdof[0], i<ldof[0]; i++) { 469 for (PetscInt j=0,jr,jj; jr=j>=mdof[1], jj=j-jr*mdof[1], j<ldof[1]; j++) { 470 for (PetscInt k=0,kr,kk; kr=k>=mdof[2], kk=k-kr*mdof[2], k<ldof[2]; k++) { 471 PetscInt here = (i*ldof[1]+j)*ldof[2]+k; 472 ltogind[here] = 473 gstart[ir][jr][kr] + (ii*gmdof[ir][jr][kr][1]+jj)*gmdof[ir][jr][kr][2]+kk; 474 if ((irank[0] == 0 && i == 0) 475 || (irank[1] == 0 && j == 0) 476 || (irank[2] == 0 && k == 0) 477 || (irank[0]+1 == p[0] && i+1 == ldof[0]) 478 || (irank[1]+1 == p[1] && j+1 == ldof[1]) 479 || (irank[2]+1 == p[2] && k+1 == ldof[2])) 480 continue; 481 ltogind0[l0count] = ltogind[here]; 482 locind[l0count++] = here; 483 } 484 } 485 } 486 ierr = ISCreateBlock(comm, vscale, lsize, ltogind, PETSC_OWN_POINTER, 487 <ogis); CHKERRQ(ierr); 488 ierr = VecScatterCreate(Xloc, NULL, X, ltogis, <og); CHKERRQ(ierr); 489 CHKERRQ(ierr); 490 ierr = ISCreateBlock(comm, vscale, l0count, ltogind0, PETSC_OWN_POINTER, 491 <ogis0); CHKERRQ(ierr); 492 ierr = ISCreateBlock(comm, vscale, l0count, locind, PETSC_OWN_POINTER, 493 &locis); CHKERRQ(ierr); 494 ierr = VecScatterCreate(Xloc, locis, X, ltogis0, <og0); CHKERRQ(ierr); 495 { 496 // Create global-to-global scatter for Dirichlet values (everything not in 497 // ltogis0, which is the range of ltog0) 498 PetscInt xstart, xend, *indD, countD = 0; 499 IS isD; 500 const PetscScalar *x; 501 ierr = VecZeroEntries(Xloc); CHKERRQ(ierr); 502 ierr = VecSet(X, 1.0); CHKERRQ(ierr); 503 ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 504 CHKERRQ(ierr); 505 ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 506 CHKERRQ(ierr); 507 ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr); 508 ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr); 509 ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr); 510 for (PetscInt i=0; i<xend-xstart; i++) { 511 if (x[i] == 1.) indD[countD++] = xstart + i; 512 } 513 ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr); 514 ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD); 515 CHKERRQ(ierr); 516 ierr = PetscFree(indD); CHKERRQ(ierr); 517 ierr = VecScatterCreate(X, isD, X, isD, >ogD); CHKERRQ(ierr); 518 ierr = ISDestroy(&isD); CHKERRQ(ierr); 519 } 520 ierr = ISDestroy(<ogis); CHKERRQ(ierr); 521 ierr = ISDestroy(<ogis0); CHKERRQ(ierr); 522 ierr = ISDestroy(&locis); CHKERRQ(ierr); 523 } 524 525 // Set up libCEED 526 CeedInit(ceedresource, &ceed); 527 CeedBasisCreateTensorH1Lagrange(ceed, 3, vscale, P, Q, 528 bpOptions[bpChoice].qmode, &basisu); 529 CeedBasisCreateTensorH1Lagrange(ceed, 3, 3, 2, Q, 530 bpOptions[bpChoice].qmode, &basisx); 531 532 CreateRestriction(ceed, melem, P, vscale, &Erestrictu); 533 CreateRestriction(ceed, melem, 2, 3, &Erestrictx); 534 CeedInt nelem = melem[0]*melem[1]*melem[2]; 535 CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, vscale, 536 &Erestrictui); 537 CeedElemRestrictionCreateIdentity(ceed, nelem, 538 bpOptions[bpChoice].qdatasize*Q*Q*Q, 539 bpOptions[bpChoice].qdatasize*nelem*Q*Q*Q, 540 1, &Erestrictqdi); 541 CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, 1, 542 &Erestrictxi); 543 { 544 CeedScalar *xloc; 545 CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len = 546 shape[0]*shape[1]*shape[2]; 547 xloc = malloc(len*3*sizeof xloc[0]); 548 for (CeedInt i=0; i<shape[0]; i++) { 549 for (CeedInt j=0; j<shape[1]; j++) { 550 for (CeedInt k=0; k<shape[2]; k++) { 551 xloc[((i*shape[1]+j)*shape[2]+k) + 0*len] = 1.*(irank[0]*melem[0]+i) / 552 (p[0]*melem[0]); 553 xloc[((i*shape[1]+j)*shape[2]+k) + 1*len] = 1.*(irank[1]*melem[1]+j) / 554 (p[1]*melem[1]); 555 xloc[((i*shape[1]+j)*shape[2]+k) + 2*len] = 1.*(irank[2]*melem[2]+k) / 556 (p[2]*melem[2]); 557 } 558 } 559 } 560 CeedVectorCreate(ceed, len*3, &xcoord); 561 CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc); 562 } 563 564 // Create the Q-function that builds the operator (i.e. computes its 565 // quadrature data) and set its context data 566 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].setup, 567 bpOptions[bpChoice].setupfname, &qf_setup); 568 CeedQFunctionAddInput(qf_setup, "x", 3, CEED_EVAL_INTERP); 569 CeedQFunctionAddInput(qf_setup, "dx", 3, CEED_EVAL_GRAD); 570 CeedQFunctionAddInput(qf_setup, "weight", 1, CEED_EVAL_WEIGHT); 571 CeedQFunctionAddOutput(qf_setup, "rho", bpOptions[bpChoice].qdatasize, 572 CEED_EVAL_NONE); 573 CeedQFunctionAddOutput(qf_setup, "true_soln", vscale, CEED_EVAL_NONE); 574 CeedQFunctionAddOutput(qf_setup, "rhs", vscale, CEED_EVAL_INTERP); 575 576 // Set up PDE operator 577 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].apply, 578 bpOptions[bpChoice].applyfname, &qf_apply); 579 // Add inputs and outputs 580 CeedQFunctionAddInput(qf_apply, "u", vscale, bpOptions[bpChoice].inmode); 581 CeedQFunctionAddInput(qf_apply, "rho", bpOptions[bpChoice].qdatasize, 582 CEED_EVAL_NONE); 583 CeedQFunctionAddOutput(qf_apply, "v", vscale, bpOptions[bpChoice].outmode); 584 585 // Create the error qfunction 586 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].error, 587 bpOptions[bpChoice].errorfname, &qf_error); 588 CeedQFunctionAddInput(qf_error, "u", vscale, CEED_EVAL_INTERP); 589 CeedQFunctionAddInput(qf_error, "true_soln", vscale, CEED_EVAL_NONE); 590 CeedQFunctionAddOutput(qf_error, "error", vscale, CEED_EVAL_NONE); 591 592 // Create the persistent vectors that will be needed in setup 593 CeedInt Nqpts, Nelem = melem[0]*melem[1]*melem[2]; 594 CeedBasisGetNumQuadraturePoints(basisu, &Nqpts); 595 CeedVectorCreate(ceed, bpOptions[bpChoice].qdatasize*Nelem*Nqpts, &rho); 596 CeedVectorCreate(ceed, Nelem*Nqpts*vscale, &target); 597 CeedVectorCreate(ceed, lsize*vscale, &rhsceed); 598 599 // Create the operator that builds the quadrature data for the ceed operator 600 CeedOperatorCreate(ceed, qf_setup, NULL, NULL, &op_setup); 601 CeedOperatorSetField(op_setup, "x", Erestrictx, CEED_NOTRANSPOSE, 602 basisx, CEED_VECTOR_ACTIVE); 603 CeedOperatorSetField(op_setup, "dx", Erestrictx, CEED_NOTRANSPOSE, 604 basisx, CEED_VECTOR_ACTIVE); 605 CeedOperatorSetField(op_setup, "weight", Erestrictxi, CEED_NOTRANSPOSE, 606 basisx, CEED_VECTOR_NONE); 607 CeedOperatorSetField(op_setup, "rho", Erestrictqdi, CEED_NOTRANSPOSE, 608 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 609 CeedOperatorSetField(op_setup, "true_soln", Erestrictui, CEED_NOTRANSPOSE, 610 CEED_BASIS_COLLOCATED, target); 611 CeedOperatorSetField(op_setup, "rhs", Erestrictu, CEED_TRANSPOSE, 612 basisu, rhsceed); 613 614 // Create the mass or diff operator 615 CeedOperatorCreate(ceed, qf_apply, NULL, NULL, &op_apply); 616 CeedOperatorSetField(op_apply, "u", Erestrictu, CEED_TRANSPOSE, 617 basisu, CEED_VECTOR_ACTIVE); 618 CeedOperatorSetField(op_apply, "rho", Erestrictqdi, CEED_NOTRANSPOSE, 619 CEED_BASIS_COLLOCATED, rho); 620 CeedOperatorSetField(op_apply, "v", Erestrictu, CEED_TRANSPOSE, 621 basisu, CEED_VECTOR_ACTIVE); 622 623 // Create the error operator 624 CeedOperatorCreate(ceed, qf_error, NULL, NULL, &op_error); 625 CeedOperatorSetField(op_error, "u", Erestrictu, CEED_TRANSPOSE, 626 basisu, CEED_VECTOR_ACTIVE); 627 CeedOperatorSetField(op_error, "true_soln", Erestrictui, CEED_NOTRANSPOSE, 628 CEED_BASIS_COLLOCATED, target); 629 CeedOperatorSetField(op_error, "error", Erestrictui, CEED_NOTRANSPOSE, 630 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 631 632 633 // Set up Mat 634 ierr = PetscMalloc1(1, &user); CHKERRQ(ierr); 635 user->comm = comm; 636 user->ltog = ltog; 637 if (bpChoice != CEED_BP1 && bpChoice != CEED_BP2) { 638 user->ltog0 = ltog0; 639 user->gtogD = gtogD; 640 } 641 user->Xloc = Xloc; 642 ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr); 643 CeedVectorCreate(ceed, lsize*vscale, &user->xceed); 644 CeedVectorCreate(ceed, lsize*vscale, &user->yceed); 645 user->op = op_apply; 646 user->rho = rho; 647 user->ceed = ceed; 648 649 ierr = MatCreateShell(comm, mdof[0]*mdof[1]*mdof[2]*vscale, 650 mdof[0]*mdof[1]*mdof[2]*vscale, 651 PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr); 652 if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { 653 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass); 654 CHKERRQ(ierr); 655 } else { 656 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff); 657 CHKERRQ(ierr); 658 } 659 ierr = MatCreateVecs(mat, &rhs, NULL); CHKERRQ(ierr); 660 661 // Get RHS vector 662 ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 663 ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 664 ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr); 665 CeedVectorSetArray(rhsceed, CEED_MEM_HOST, CEED_USE_POINTER, r); 666 667 // Setup rho, rhs, and target 668 CeedOperatorApply(op_setup, xcoord, rho, CEED_REQUEST_IMMEDIATE); 669 ierr = CeedVectorSyncArray(rhsceed, CEED_MEM_HOST); CHKERRQ(ierr); 670 CeedVectorDestroy(&xcoord); 671 672 // Gather RHS 673 ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 674 ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 675 ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 676 CHKERRQ(ierr); 677 ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 678 CHKERRQ(ierr); 679 CeedVectorDestroy(&rhsceed); 680 681 ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); 682 { 683 PC pc; 684 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 685 if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { 686 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 687 ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 688 } else { 689 ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 690 } 691 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 692 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 693 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 694 PETSC_DEFAULT); CHKERRQ(ierr); 695 } 696 ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 697 ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr); 698 // First run, if benchmarking 699 if (benchmark_mode) { 700 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 701 CHKERRQ(ierr); 702 my_rt_start = MPI_Wtime(); 703 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 704 my_rt = MPI_Wtime() - my_rt_start; 705 // Set maxits based on first iteration timing 706 if (my_rt > 0.02) { 707 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); 708 CHKERRQ(ierr); 709 } else { 710 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); 711 CHKERRQ(ierr); 712 } 713 } 714 // Timed solve 715 my_rt_start = MPI_Wtime(); 716 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 717 my_rt = MPI_Wtime() - my_rt_start; 718 { 719 KSPType ksptype; 720 KSPConvergedReason reason; 721 PetscReal rnorm; 722 PetscInt its; 723 ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 724 ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 725 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 726 ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 727 if (!test_mode || reason < 0 || rnorm > 1e-8) { 728 ierr = PetscPrintf(comm, 729 " KSP:\n" 730 " KSP Type : %s\n" 731 " KSP Convergence : %s\n" 732 " Total KSP Iterations : %D\n" 733 " Final rnorm : %e\n", 734 ksptype, KSPConvergedReasons[reason], its, 735 (double)rnorm); CHKERRQ(ierr); 736 } 737 if (benchmark_mode && (!test_mode)) { 738 CeedInt gsize; 739 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 740 MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, comm); 741 MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm); 742 ierr = PetscPrintf(comm, 743 " Performance:\n" 744 " CG Solve Time : %g (%g) sec\n" 745 " DOFs/Sec in CG : %g (%g) million\n", 746 rt_max, rt_min, 1e-6*gsize*its/rt_max, 747 1e-6*gsize*its/rt_min); CHKERRQ(ierr); 748 } 749 } 750 751 { 752 PetscReal maxerror; 753 ierr = ComputeErrorMax(user, op_error, X, target, &maxerror); CHKERRQ(ierr); 754 PetscReal tol = (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) ? 5e-3 : 5e-2; 755 if (!test_mode || maxerror > tol) { 756 ierr = PetscPrintf(comm, 757 " Pointwise Error (max) : %e\n", 758 (double)maxerror); CHKERRQ(ierr); 759 } 760 } 761 762 ierr = VecDestroy(&rhs); CHKERRQ(ierr); 763 ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 764 ierr = VecDestroy(&X); CHKERRQ(ierr); 765 ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr); 766 ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr); 767 ierr = VecScatterDestroy(<og); CHKERRQ(ierr); 768 ierr = VecScatterDestroy(<og0); CHKERRQ(ierr); 769 ierr = VecScatterDestroy(>ogD); CHKERRQ(ierr); 770 ierr = MatDestroy(&mat); CHKERRQ(ierr); 771 ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 772 773 CeedVectorDestroy(&user->xceed); 774 CeedVectorDestroy(&user->yceed); 775 CeedVectorDestroy(&user->rho); 776 CeedVectorDestroy(&target); 777 CeedOperatorDestroy(&op_setup); 778 CeedOperatorDestroy(&op_apply); 779 CeedOperatorDestroy(&op_error); 780 CeedElemRestrictionDestroy(&Erestrictu); 781 CeedElemRestrictionDestroy(&Erestrictx); 782 CeedElemRestrictionDestroy(&Erestrictui); 783 CeedElemRestrictionDestroy(&Erestrictxi); 784 CeedElemRestrictionDestroy(&Erestrictqdi); 785 CeedQFunctionDestroy(&qf_setup); 786 CeedQFunctionDestroy(&qf_apply); 787 CeedQFunctionDestroy(&qf_error); 788 CeedBasisDestroy(&basisu); 789 CeedBasisDestroy(&basisx); 790 CeedDestroy(&ceed); 791 ierr = PetscFree(user); CHKERRQ(ierr); 792 return PetscFinalize(); 793 } 794