1 // libCEED + PETSc Example: CEED BPs 2 // 3 // This example demonstrates a simple usage of libCEED with PETSc to solve the 4 // CEED BP benchmark problems, see http://ceed.exascaleproject.org/bps. 5 // 6 // The code is intentionally "raw", using only low-level communication 7 // primitives. 8 // 9 // Build with: 10 // 11 // make bps [PETSC_DIR=</path/to/petsc>] [CEED_DIR=</path/to/libceed>] 12 // 13 // Sample runs: 14 // 15 // bps -problem bp1 16 // bps -problem bp2 -ceed /cpu/self 17 // bps -problem bp3 -ceed /gpu/occa 18 // bps -problem bp4 -ceed /cpu/occa 19 // bps -problem bp5 -ceed /omp/occa 20 // bps -problem bp6 -ceed /ocl/occa 21 // 22 //TESTARGS -ceed {ceed_resource} -test -problem bp3 23 24 /// @file 25 /// CEED BPs example using PETSc 26 const char help[] = "Solve CEED BPs using PETSc\n"; 27 28 #include <stdbool.h> 29 #include <string.h> 30 #include "common.h" 31 #include "bp1.h" 32 #include "bp2.h" 33 #include "bp3.h" 34 #include "bp4.h" 35 36 #define PATH(BASE) __DIR__ #BASE 37 38 static void Split3(PetscInt size, PetscInt m[3], bool reverse) { 39 for (PetscInt d=0,sizeleft=size; d<3; d++) { 40 PetscInt try = (PetscInt)PetscCeilReal(PetscPowReal(sizeleft, 1./(3 - d))); 41 while (try * (sizeleft / try) != sizeleft) try++; 42 m[reverse ? 2-d : d] = try; 43 sizeleft /= try; 44 } 45 } 46 47 static PetscInt Max3(const PetscInt a[3]) { 48 return PetscMax(a[0], PetscMax(a[1], a[2])); 49 } 50 static PetscInt Min3(const PetscInt a[3]) { 51 return PetscMin(a[0], PetscMin(a[1], a[2])); 52 } 53 static void GlobalDof(const PetscInt p[3], const PetscInt irank[3], 54 PetscInt degree, const PetscInt melem[3], 55 PetscInt mdof[3]) { 56 for (int d=0; d<3; d++) 57 mdof[d] = degree*melem[d] + (irank[d] == p[d]-1); 58 } 59 static PetscInt GlobalStart(const PetscInt p[3], const PetscInt irank[3], 60 PetscInt degree, const PetscInt melem[3]) { 61 PetscInt start = 0; 62 // Dumb brute-force is easier to read 63 for (PetscInt i=0; i<p[0]; i++) { 64 for (PetscInt j=0; j<p[1]; j++) { 65 for (PetscInt k=0; k<p[2]; k++) { 66 PetscInt mdof[3], ijkrank[] = {i,j,k}; 67 if (i == irank[0] && j == irank[1] && k == irank[2]) return start; 68 GlobalDof(p, ijkrank, degree, melem, mdof); 69 start += mdof[0] * mdof[1] * mdof[2]; 70 } 71 } 72 } 73 return -1; 74 } 75 static int CreateRestriction(Ceed ceed, const CeedInt melem[3], 76 CeedInt P, CeedInt ncomp, 77 CeedElemRestriction *Erestrict) { 78 const PetscInt Nelem = melem[0]*melem[1]*melem[2]; 79 PetscInt mdof[3], *idx, *idxp; 80 81 for (int d=0; d<3; d++) mdof[d] = melem[d]*(P-1) + 1; 82 idxp = idx = malloc(Nelem*P*P*P*sizeof idx[0]); 83 for (CeedInt i=0; i<melem[0]; i++) { 84 for (CeedInt j=0; j<melem[1]; j++) { 85 for (CeedInt k=0; k<melem[2]; k++,idxp += P*P*P) { 86 for (CeedInt ii=0; ii<P; ii++) { 87 for (CeedInt jj=0; jj<P; jj++) { 88 for (CeedInt kk=0; kk<P; kk++) { 89 if (0) { // This is the C-style (i,j,k) ordering that I prefer 90 idxp[(ii*P+jj)*P+kk] = (((i*(P-1)+ii)*mdof[1] 91 + (j*(P-1)+jj))*mdof[2] 92 + (k*(P-1)+kk)); 93 } else { // (k,j,i) ordering for consistency with MFEM example 94 idxp[ii+P*(jj+P*kk)] = (((i*(P-1)+ii)*mdof[1] 95 + (j*(P-1)+jj))*mdof[2] 96 + (k*(P-1)+kk)); 97 } 98 } 99 } 100 } 101 } 102 } 103 } 104 CeedElemRestrictionCreate(ceed, Nelem, P*P*P, mdof[0]*mdof[1]*mdof[2], ncomp, 105 CEED_MEM_HOST, CEED_OWN_POINTER, idx, Erestrict); 106 PetscFunctionReturn(0); 107 } 108 109 // Data for PETSc 110 typedef struct User_ *User; 111 struct User_ { 112 MPI_Comm comm; 113 VecScatter ltog; // Scatter for all entries 114 VecScatter ltog0; // Skip Dirichlet values 115 VecScatter gtogD; // global-to-global; only Dirichlet values 116 Vec Xloc, Yloc; 117 CeedVector xceed, yceed; 118 CeedOperator op; 119 CeedVector rho; 120 Ceed ceed; 121 }; 122 123 // BP Options 124 typedef enum { 125 CEED_BP1 = 0, CEED_BP2 = 1, CEED_BP3 = 2, 126 CEED_BP4 = 3, CEED_BP5 = 4, CEED_BP6 = 5 127 } bpType; 128 static const char *const bpTypes[] = {"bp1","bp2","bp3","bp4","bp5","bp6", 129 "bpType","CEED_BP",0}; 130 131 // BP specific data 132 typedef struct { 133 CeedInt vscale, qdatasize, qextra; 134 CeedQFunctionUser setup, apply, error; 135 const char setupfname[PETSC_MAX_PATH_LEN], applyfname[PETSC_MAX_PATH_LEN], 136 errorfname[PETSC_MAX_PATH_LEN]; 137 CeedEvalMode inmode, outmode; 138 } bpData; 139 140 bpData bpOptions[6] = { 141 [CEED_BP1] = { 142 .vscale = 1, 143 .qdatasize = 1, 144 .qextra = 2, 145 .setup = SetupMass, 146 .apply = Mass, 147 .error = Error, 148 .setupfname = PATH(bp1.h:SetupMass), 149 .applyfname = PATH(bp1.h:Mass), 150 .errorfname = PATH(common.h:Error), 151 .inmode = CEED_EVAL_INTERP, 152 .outmode = CEED_EVAL_INTERP}, 153 [CEED_BP2] = { 154 .vscale = 3, 155 .qdatasize = 1, 156 .qextra = 2, 157 .setup = SetupMass3, 158 .apply = Mass3, 159 .error = Error3, 160 .setupfname = PATH(bp2.h:SetupMass3), 161 .applyfname = PATH(bp2.h:Mass3), 162 .errorfname = PATH(common.h:Error3), 163 .inmode = CEED_EVAL_INTERP, 164 .outmode = CEED_EVAL_INTERP}, 165 [CEED_BP3] = { 166 .vscale = 1, 167 .qdatasize = 6, 168 .qextra = 2, 169 .setup = SetupDiff, 170 .apply = Diff, 171 .error = Error, 172 .setupfname = PATH(bp3.h:SetupDiff), 173 .applyfname = PATH(bp3.h:Diff), 174 .errorfname = PATH(common.h:Error), 175 .inmode = CEED_EVAL_GRAD, 176 .outmode = CEED_EVAL_GRAD}, 177 [CEED_BP4] = { 178 .vscale = 3, 179 .qdatasize = 6, 180 .qextra = 2, 181 .setup = SetupDiff3, 182 .apply = Diff3, 183 .error = Error, 184 .setupfname = PATH(bp4.h:SetupDiff3), 185 .applyfname = PATH(bp4.h:Diff), 186 .errorfname = PATH(common.h:Error3), 187 .inmode = CEED_EVAL_GRAD, 188 .outmode = CEED_EVAL_GRAD}, 189 [CEED_BP5] = { 190 .vscale = 1, 191 .qdatasize = 6, 192 .qextra = 1, 193 .setup = SetupDiff, 194 .apply = Diff, 195 .error = Error, 196 .setupfname = PATH(bp3.h:SetupDiff), 197 .applyfname = PATH(bp3.h:Diff), 198 .errorfname = PATH(common.h:Error), 199 .inmode = CEED_EVAL_GRAD, 200 .outmode = CEED_EVAL_GRAD}, 201 [CEED_BP6] = { 202 .vscale = 3, 203 .qdatasize = 6, 204 .qextra = 1, 205 .setup = SetupDiff3, 206 .apply = Diff3, 207 .error = Error, 208 .setupfname = PATH(bp4.h:SetupDiff3), 209 .applyfname = PATH(bp4.h:Diff), 210 .errorfname = PATH(common.h:Error3), 211 .inmode = CEED_EVAL_GRAD, 212 .outmode = CEED_EVAL_GRAD} 213 }; 214 215 // This function uses libCEED to compute the action of the mass matrix 216 static PetscErrorCode MatMult_Mass(Mat A, Vec X, Vec Y) { 217 PetscErrorCode ierr; 218 User user; 219 PetscScalar *x, *y; 220 221 PetscFunctionBeginUser; 222 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 223 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 224 SCATTER_REVERSE); CHKERRQ(ierr); 225 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); 226 CHKERRQ(ierr); 227 ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 228 229 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 230 ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 231 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 232 CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); 233 234 CeedOperatorApply(user->op, user->xceed, user->yceed, 235 CEED_REQUEST_IMMEDIATE); 236 ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); 237 238 ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 239 ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 240 241 if (Y) { 242 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 243 ierr = VecScatterBegin(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 244 CHKERRQ(ierr); 245 ierr = VecScatterEnd(user->ltog, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 246 CHKERRQ(ierr); 247 } 248 PetscFunctionReturn(0); 249 } 250 251 // This function uses libCEED to compute the action of the Laplacian with 252 // Dirichlet boundary conditions 253 static PetscErrorCode MatMult_Diff(Mat A, Vec X, Vec Y) { 254 PetscErrorCode ierr; 255 User user; 256 PetscScalar *x, *y; 257 258 PetscFunctionBeginUser; 259 ierr = MatShellGetContext(A, &user); CHKERRQ(ierr); 260 ierr = VecScatterBegin(user->ltog0, X, user->Xloc, INSERT_VALUES, 261 SCATTER_REVERSE); CHKERRQ(ierr); 262 ierr = VecScatterEnd(user->ltog0, X, user->Xloc, INSERT_VALUES, 263 SCATTER_REVERSE); 264 CHKERRQ(ierr); 265 ierr = VecZeroEntries(user->Yloc); CHKERRQ(ierr); 266 267 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 268 ierr = VecGetArray(user->Yloc, &y); CHKERRQ(ierr); 269 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 270 CeedVectorSetArray(user->yceed, CEED_MEM_HOST, CEED_USE_POINTER, y); 271 272 CeedOperatorApply(user->op, user->xceed, user->yceed, 273 CEED_REQUEST_IMMEDIATE); 274 ierr = CeedVectorSyncArray(user->yceed, CEED_MEM_HOST); CHKERRQ(ierr); 275 276 ierr = VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 277 ierr = VecRestoreArray(user->Yloc, &y); CHKERRQ(ierr); 278 279 ierr = VecZeroEntries(Y); CHKERRQ(ierr); 280 ierr = VecScatterBegin(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 281 CHKERRQ(ierr); 282 ierr = VecScatterEnd(user->gtogD, X, Y, INSERT_VALUES, SCATTER_FORWARD); 283 CHKERRQ(ierr); 284 ierr = VecScatterBegin(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 285 CHKERRQ(ierr); 286 ierr = VecScatterEnd(user->ltog0, user->Yloc, Y, ADD_VALUES, SCATTER_FORWARD); 287 CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 // This function calculates the error in the final solution 292 static PetscErrorCode ComputeErrorMax(User user, CeedOperator op_error, Vec X, 293 CeedVector target, PetscReal *maxerror) { 294 PetscErrorCode ierr; 295 PetscScalar *x; 296 CeedVector collocated_error; 297 CeedInt length; 298 299 PetscFunctionBeginUser; 300 CeedVectorGetLength(target, &length); 301 CeedVectorCreate(user->ceed, length, &collocated_error); 302 ierr = VecScatterBegin(user->ltog, X, user->Xloc, INSERT_VALUES, 303 SCATTER_REVERSE); CHKERRQ(ierr); 304 ierr = VecScatterEnd(user->ltog, X, user->Xloc, INSERT_VALUES, SCATTER_REVERSE); 305 CHKERRQ(ierr); 306 ierr = VecGetArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 307 CeedVectorSetArray(user->xceed, CEED_MEM_HOST, CEED_USE_POINTER, x); 308 CeedOperatorApply(op_error, user->xceed, collocated_error, 309 CEED_REQUEST_IMMEDIATE); 310 VecRestoreArrayRead(user->Xloc, (const PetscScalar **)&x); CHKERRQ(ierr); 311 312 *maxerror = 0; 313 const CeedScalar *e; 314 CeedVectorGetArrayRead(collocated_error, CEED_MEM_HOST, &e); 315 for (CeedInt i=0; i<length; i++) { 316 *maxerror = PetscMax(*maxerror, PetscAbsScalar(e[i])); 317 } 318 CeedVectorRestoreArrayRead(collocated_error, &e); 319 ierr = MPI_Allreduce(MPI_IN_PLACE, &maxerror, 320 1, MPIU_SCALAR, MPIU_MAX, user->comm); CHKERRQ(ierr); 321 CeedVectorDestroy(&collocated_error); 322 PetscFunctionReturn(0); 323 } 324 325 int main(int argc, char **argv) { 326 PetscInt ierr; 327 MPI_Comm comm; 328 char ceedresource[PETSC_MAX_PATH_LEN] = "/cpu/self"; 329 PetscInt degree, qextra, localdof, localelem, melem[3], mdof[3], p[3], 330 irank[3], ldof[3], lsize, vscale = 1; 331 PetscScalar *r; 332 PetscBool test_mode, benchmark_mode; 333 PetscMPIInt size, rank; 334 VecScatter ltog, ltog0, gtogD;; 335 Ceed ceed; 336 CeedBasis basisx, basisu; 337 CeedElemRestriction Erestrictx, Erestrictu, Erestrictxi, Erestrictui, 338 Erestrictqdi; 339 CeedQFunction qf_setup, qf_apply, qf_error; 340 CeedOperator op_setup, op_apply, op_error; 341 CeedVector xcoord, rho, rhsceed, target; 342 CeedInt P, Q; 343 Vec X, Xloc, rhs, rhsloc; 344 Mat mat; 345 KSP ksp; 346 User user; 347 double my_rt_start, my_rt, rt_min, rt_max; 348 bpType bpChoice; 349 350 ierr = PetscInitialize(&argc, &argv, NULL, help); 351 if (ierr) return ierr; 352 comm = PETSC_COMM_WORLD; 353 ierr = PetscOptionsBegin(comm, NULL, "CEED BPs in PETSc", NULL); CHKERRQ(ierr); 354 bpChoice = CEED_BP1; 355 ierr = PetscOptionsEnum("-problem", 356 "CEED benchmark problem to solve", NULL, 357 bpTypes, (PetscEnum)bpChoice, (PetscEnum*)&bpChoice, 358 NULL); CHKERRQ(ierr); 359 vscale = bpOptions[bpChoice].vscale; 360 test_mode = PETSC_FALSE; 361 ierr = PetscOptionsBool("-test", 362 "Testing mode (do not print unless error is large)", 363 NULL, test_mode, &test_mode, NULL); CHKERRQ(ierr); 364 benchmark_mode = PETSC_FALSE; 365 ierr = PetscOptionsBool("-benchmark", 366 "Benchmarking mode (prints benchmark statistics)", 367 NULL, benchmark_mode, &benchmark_mode, NULL); 368 CHKERRQ(ierr); 369 degree = test_mode ? 3 : 1; 370 ierr = PetscOptionsInt("-degree", "Polynomial degree of tensor product basis", 371 NULL, degree, °ree, NULL); CHKERRQ(ierr); 372 qextra = bpOptions[bpChoice].qextra; 373 ierr = PetscOptionsInt("-qextra", "Number of extra quadrature points", 374 NULL, qextra, &qextra, NULL); CHKERRQ(ierr); 375 ierr = PetscOptionsString("-ceed", "CEED resource specifier", 376 NULL, ceedresource, ceedresource, 377 sizeof(ceedresource), NULL); CHKERRQ(ierr); 378 localdof = 1000; 379 ierr = PetscOptionsInt("-local", 380 "Target number of locally owned degrees of freedom per process", 381 NULL, localdof, &localdof, NULL); CHKERRQ(ierr); 382 ierr = PetscOptionsEnd(); CHKERRQ(ierr); 383 384 // Determine size of process grid 385 ierr = MPI_Comm_size(comm, &size); CHKERRQ(ierr); 386 Split3(size, p, false); 387 388 // Find a nicely composite number of elements no less than localdof 389 for (localelem = PetscMax(1, localdof / (degree*degree*degree)); ; 390 localelem++) { 391 Split3(localelem, melem, true); 392 if (Max3(melem) / Min3(melem) <= 2) break; 393 } 394 395 // Find my location in the process grid 396 ierr = MPI_Comm_rank(comm, &rank); CHKERRQ(ierr); 397 for (int d=0,rankleft=rank; d<3; d++) { 398 const int pstride[3] = {p[1] *p[2], p[2], 1}; 399 irank[d] = rankleft / pstride[d]; 400 rankleft -= irank[d] * pstride[d]; 401 } 402 403 GlobalDof(p, irank, degree, melem, mdof); 404 405 ierr = VecCreate(comm, &X); CHKERRQ(ierr); 406 ierr = VecSetSizes(X, mdof[0]*mdof[1]*mdof[2]*vscale, PETSC_DECIDE); 407 CHKERRQ(ierr); 408 ierr = VecSetUp(X); CHKERRQ(ierr); 409 410 if (!test_mode) { 411 CeedInt gsize; 412 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 413 ierr = PetscPrintf(comm, "Global dofs: %D\n", gsize/vscale); CHKERRQ(ierr); 414 ierr = PetscPrintf(comm, "Process decomposition: %D %D %D\n", 415 p[0], p[1], p[2]); CHKERRQ(ierr); 416 ierr = PetscPrintf(comm, "Local elements: %D = %D %D %D\n", localelem, 417 melem[0], melem[1], melem[2]); CHKERRQ(ierr); 418 ierr = PetscPrintf(comm, "Owned dofs: %D = %D %D %D\n", 419 mdof[0]*mdof[1]*mdof[2], mdof[0], mdof[1], mdof[2]); CHKERRQ(ierr); 420 } 421 422 { 423 lsize = 1; 424 for (int d=0; d<3; d++) { 425 ldof[d] = melem[d]*degree + 1; 426 lsize *= ldof[d]; 427 } 428 ierr = VecCreate(PETSC_COMM_SELF, &Xloc); CHKERRQ(ierr); 429 ierr = VecSetSizes(Xloc, lsize*vscale, PETSC_DECIDE); CHKERRQ(ierr); 430 ierr = VecSetUp(Xloc); CHKERRQ(ierr); 431 432 // Create local-to-global scatter 433 PetscInt *ltogind, *ltogind0, *locind, l0count; 434 IS ltogis, ltogis0, locis; 435 PetscInt gstart[2][2][2], gmdof[2][2][2][3]; 436 437 for (int i=0; i<2; i++) { 438 for (int j=0; j<2; j++) { 439 for (int k=0; k<2; k++) { 440 PetscInt ijkrank[3] = {irank[0]+i, irank[1]+j, irank[2]+k}; 441 gstart[i][j][k] = GlobalStart(p, ijkrank, degree, melem); 442 GlobalDof(p, ijkrank, degree, melem, gmdof[i][j][k]); 443 } 444 } 445 } 446 447 ierr = PetscMalloc1(lsize, <ogind); CHKERRQ(ierr); 448 ierr = PetscMalloc1(lsize, <ogind0); CHKERRQ(ierr); 449 ierr = PetscMalloc1(lsize, &locind); CHKERRQ(ierr); 450 l0count = 0; 451 for (PetscInt i=0,ir,ii; ir=i>=mdof[0], ii=i-ir*mdof[0], i<ldof[0]; i++) { 452 for (PetscInt j=0,jr,jj; jr=j>=mdof[1], jj=j-jr*mdof[1], j<ldof[1]; j++) { 453 for (PetscInt k=0,kr,kk; kr=k>=mdof[2], kk=k-kr*mdof[2], k<ldof[2]; k++) { 454 PetscInt here = (i*ldof[1]+j)*ldof[2]+k; 455 ltogind[here] = 456 gstart[ir][jr][kr] + (ii*gmdof[ir][jr][kr][1]+jj)*gmdof[ir][jr][kr][2]+kk; 457 if ((irank[0] == 0 && i == 0) 458 || (irank[1] == 0 && j == 0) 459 || (irank[2] == 0 && k == 0) 460 || (irank[0]+1 == p[0] && i+1 == ldof[0]) 461 || (irank[1]+1 == p[1] && j+1 == ldof[1]) 462 || (irank[2]+1 == p[2] && k+1 == ldof[2])) 463 continue; 464 ltogind0[l0count] = ltogind[here]; 465 locind[l0count++] = here; 466 } 467 } 468 } 469 ierr = ISCreateBlock(comm, vscale, lsize, ltogind, PETSC_OWN_POINTER, 470 <ogis); CHKERRQ(ierr); 471 ierr = VecScatterCreate(Xloc, NULL, X, ltogis, <og); CHKERRQ(ierr); 472 CHKERRQ(ierr); 473 ierr = ISCreateBlock(comm, vscale, l0count, ltogind0, PETSC_OWN_POINTER, 474 <ogis0); CHKERRQ(ierr); 475 ierr = ISCreateBlock(comm, vscale, l0count, locind, PETSC_OWN_POINTER, 476 &locis); CHKERRQ(ierr); 477 ierr = VecScatterCreate(Xloc, locis, X, ltogis0, <og0); CHKERRQ(ierr); 478 { 479 // Create global-to-global scatter for Dirichlet values (everything not in 480 // ltogis0, which is the range of ltog0) 481 PetscInt xstart, xend, *indD, countD = 0; 482 IS isD; 483 const PetscScalar *x; 484 ierr = VecZeroEntries(Xloc); CHKERRQ(ierr); 485 ierr = VecSet(X, 1.0); CHKERRQ(ierr); 486 ierr = VecScatterBegin(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 487 CHKERRQ(ierr); 488 ierr = VecScatterEnd(ltog0, Xloc, X, INSERT_VALUES, SCATTER_FORWARD); 489 CHKERRQ(ierr); 490 ierr = VecGetOwnershipRange(X, &xstart, &xend); CHKERRQ(ierr); 491 ierr = PetscMalloc1(xend-xstart, &indD); CHKERRQ(ierr); 492 ierr = VecGetArrayRead(X, &x); CHKERRQ(ierr); 493 for (PetscInt i=0; i<xend-xstart; i++) { 494 if (x[i] == 1.) indD[countD++] = xstart + i; 495 } 496 ierr = VecRestoreArrayRead(X, &x); CHKERRQ(ierr); 497 ierr = ISCreateGeneral(comm, countD, indD, PETSC_COPY_VALUES, &isD); 498 CHKERRQ(ierr); 499 ierr = PetscFree(indD); CHKERRQ(ierr); 500 ierr = VecScatterCreate(X, isD, X, isD, >ogD); CHKERRQ(ierr); 501 ierr = ISDestroy(&isD); CHKERRQ(ierr); 502 } 503 ierr = ISDestroy(<ogis); CHKERRQ(ierr); 504 ierr = ISDestroy(<ogis0); CHKERRQ(ierr); 505 ierr = ISDestroy(&locis); CHKERRQ(ierr); 506 } 507 508 // Set up libCEED 509 CeedInit(ceedresource, &ceed); 510 P = degree + 1; 511 Q = P + qextra; 512 CeedBasisCreateTensorH1Lagrange(ceed, 3, vscale, P, Q, CEED_GAUSS, &basisu); 513 CeedBasisCreateTensorH1Lagrange(ceed, 3, 3, 2, Q, CEED_GAUSS, &basisx); 514 515 CreateRestriction(ceed, melem, P, vscale, &Erestrictu); 516 CreateRestriction(ceed, melem, 2, 3, &Erestrictx); 517 CeedInt nelem = melem[0]*melem[1]*melem[2]; 518 CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, vscale, 519 &Erestrictui); 520 CeedElemRestrictionCreateIdentity(ceed, nelem, 521 bpOptions[bpChoice].qdatasize*Q*Q*Q, 522 bpOptions[bpChoice].qdatasize*nelem*Q*Q*Q, 523 1, &Erestrictqdi); 524 CeedElemRestrictionCreateIdentity(ceed, nelem, Q*Q*Q, nelem*Q*Q*Q, 1, 525 &Erestrictxi); 526 { 527 CeedScalar *xloc; 528 CeedInt shape[3] = {melem[0]+1, melem[1]+1, melem[2]+1}, len = 529 shape[0]*shape[1]*shape[2]; 530 xloc = malloc(len*3*sizeof xloc[0]); 531 for (CeedInt i=0; i<shape[0]; i++) { 532 for (CeedInt j=0; j<shape[1]; j++) { 533 for (CeedInt k=0; k<shape[2]; k++) { 534 xloc[((i*shape[1]+j)*shape[2]+k) + 0*len] = 1.*(irank[0]*melem[0]+i) / 535 (p[0]*melem[0]); 536 xloc[((i*shape[1]+j)*shape[2]+k) + 1*len] = 1.*(irank[1]*melem[1]+j) / 537 (p[1]*melem[1]); 538 xloc[((i*shape[1]+j)*shape[2]+k) + 2*len] = 1.*(irank[2]*melem[2]+k) / 539 (p[2]*melem[2]); 540 } 541 } 542 } 543 CeedVectorCreate(ceed, len*3, &xcoord); 544 CeedVectorSetArray(xcoord, CEED_MEM_HOST, CEED_OWN_POINTER, xloc); 545 } 546 547 // Create the Q-function that builds the operator (i.e. computes its 548 // quadrature data) and set its context data. 549 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].setup, 550 bpOptions[bpChoice].setupfname, &qf_setup); 551 CeedQFunctionAddInput(qf_setup, "x", 3, CEED_EVAL_INTERP); 552 CeedQFunctionAddInput(qf_setup, "dx", 3, CEED_EVAL_GRAD); 553 CeedQFunctionAddInput(qf_setup, "weight", 1, CEED_EVAL_WEIGHT); 554 CeedQFunctionAddOutput(qf_setup, "rho", bpOptions[bpChoice].qdatasize, 555 CEED_EVAL_NONE); 556 CeedQFunctionAddOutput(qf_setup, "true_soln", vscale, CEED_EVAL_NONE); 557 CeedQFunctionAddOutput(qf_setup, "rhs", vscale, CEED_EVAL_INTERP); 558 559 // Set up PDE operator 560 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].apply, 561 bpOptions[bpChoice].applyfname, &qf_apply); 562 // Add inputs and outputs 563 CeedQFunctionAddInput(qf_apply, "u", vscale, bpOptions[bpChoice].inmode); 564 CeedQFunctionAddInput(qf_apply, "rho", bpOptions[bpChoice].qdatasize, 565 CEED_EVAL_NONE); 566 CeedQFunctionAddOutput(qf_apply, "v", vscale, bpOptions[bpChoice].outmode); 567 568 // Create the error qfunction 569 CeedQFunctionCreateInterior(ceed, 1, bpOptions[bpChoice].error, 570 bpOptions[bpChoice].errorfname, &qf_error); 571 CeedQFunctionAddInput(qf_error, "u", vscale, CEED_EVAL_INTERP); 572 CeedQFunctionAddInput(qf_error, "true_soln", vscale, CEED_EVAL_NONE); 573 CeedQFunctionAddOutput(qf_error, "error", vscale, CEED_EVAL_NONE); 574 575 // Create the persistent vectors that will be needed in setup 576 CeedInt Nqpts, Nelem = melem[0]*melem[1]*melem[2]; 577 CeedBasisGetNumQuadraturePoints(basisu, &Nqpts); 578 CeedVectorCreate(ceed, bpOptions[bpChoice].qdatasize*Nelem*Nqpts, &rho); 579 CeedVectorCreate(ceed, Nelem*Nqpts*vscale, &target); 580 CeedVectorCreate(ceed, lsize*vscale, &rhsceed); 581 582 // Create the operator that builds the quadrature data for the ceed operator. 583 CeedOperatorCreate(ceed, qf_setup, NULL, NULL, &op_setup); 584 CeedOperatorSetField(op_setup, "x", Erestrictx, CEED_NOTRANSPOSE, 585 basisx, CEED_VECTOR_ACTIVE); 586 CeedOperatorSetField(op_setup, "dx", Erestrictx, CEED_NOTRANSPOSE, 587 basisx, CEED_VECTOR_ACTIVE); 588 CeedOperatorSetField(op_setup, "weight", Erestrictxi, CEED_NOTRANSPOSE, 589 basisx, CEED_VECTOR_NONE); 590 CeedOperatorSetField(op_setup, "rho", Erestrictqdi, CEED_NOTRANSPOSE, 591 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 592 CeedOperatorSetField(op_setup, "true_soln", Erestrictui, CEED_NOTRANSPOSE, 593 CEED_BASIS_COLLOCATED, target); 594 CeedOperatorSetField(op_setup, "rhs", Erestrictu, CEED_TRANSPOSE, 595 basisu, rhsceed); 596 597 // Create the mass or diff operator. 598 CeedOperatorCreate(ceed, qf_apply, NULL, NULL, &op_apply); 599 CeedOperatorSetField(op_apply, "u", Erestrictu, CEED_TRANSPOSE, 600 basisu, CEED_VECTOR_ACTIVE); 601 CeedOperatorSetField(op_apply, "rho", Erestrictqdi, CEED_NOTRANSPOSE, 602 CEED_BASIS_COLLOCATED, rho); 603 CeedOperatorSetField(op_apply, "v", Erestrictu, CEED_TRANSPOSE, 604 basisu, CEED_VECTOR_ACTIVE); 605 606 // Create the error operator 607 CeedOperatorCreate(ceed, qf_error, NULL, NULL, &op_error); 608 CeedOperatorSetField(op_error, "u", Erestrictu, CEED_TRANSPOSE, 609 basisu, CEED_VECTOR_ACTIVE); 610 CeedOperatorSetField(op_error, "true_soln", Erestrictui, CEED_NOTRANSPOSE, 611 CEED_BASIS_COLLOCATED, target); 612 CeedOperatorSetField(op_error, "error", Erestrictui, CEED_NOTRANSPOSE, 613 CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); 614 615 616 // Set up Mat 617 ierr = PetscMalloc1(1, &user); CHKERRQ(ierr); 618 user->comm = comm; 619 user->ltog = ltog; 620 if (bpChoice != CEED_BP1 && bpChoice != CEED_BP2) { 621 user->ltog0 = ltog0; 622 user->gtogD = gtogD; 623 } 624 user->Xloc = Xloc; 625 ierr = VecDuplicate(Xloc, &user->Yloc); CHKERRQ(ierr); 626 CeedVectorCreate(ceed, lsize*vscale, &user->xceed); 627 CeedVectorCreate(ceed, lsize*vscale, &user->yceed); 628 user->op = op_apply; 629 user->rho = rho; 630 user->ceed = ceed; 631 632 ierr = MatCreateShell(comm, mdof[0]*mdof[1]*mdof[2]*vscale, 633 mdof[0]*mdof[1]*mdof[2]*vscale, 634 PETSC_DECIDE, PETSC_DECIDE, user, &mat); CHKERRQ(ierr); 635 if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { 636 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Mass); 637 CHKERRQ(ierr); 638 } else { 639 ierr = MatShellSetOperation(mat, MATOP_MULT, (void(*)(void))MatMult_Diff); 640 CHKERRQ(ierr); 641 } 642 ierr = MatCreateVecs(mat, &rhs, NULL); CHKERRQ(ierr); 643 644 // Get RHS vector 645 ierr = VecDuplicate(Xloc, &rhsloc); CHKERRQ(ierr); 646 ierr = VecZeroEntries(rhsloc); CHKERRQ(ierr); 647 ierr = VecGetArray(rhsloc, &r); CHKERRQ(ierr); 648 CeedVectorSetArray(rhsceed, CEED_MEM_HOST, CEED_USE_POINTER, r); 649 650 // Setup rho, rhs, and target 651 CeedOperatorApply(op_setup, xcoord, rho, CEED_REQUEST_IMMEDIATE); 652 ierr = CeedVectorSyncArray(rhsceed, CEED_MEM_HOST); CHKERRQ(ierr); 653 CeedVectorDestroy(&xcoord); 654 655 // Gather RHS 656 ierr = VecRestoreArray(rhsloc, &r); CHKERRQ(ierr); 657 ierr = VecZeroEntries(rhs); CHKERRQ(ierr); 658 ierr = VecScatterBegin(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 659 CHKERRQ(ierr); 660 ierr = VecScatterEnd(ltog, rhsloc, rhs, ADD_VALUES, SCATTER_FORWARD); 661 CHKERRQ(ierr); 662 CeedVectorDestroy(&rhsceed); 663 664 ierr = KSPCreate(comm, &ksp); CHKERRQ(ierr); 665 { 666 PC pc; 667 ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); 668 if (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) { 669 ierr = PCSetType(pc, PCJACOBI); CHKERRQ(ierr); 670 ierr = PCJacobiSetType(pc, PC_JACOBI_ROWSUM); CHKERRQ(ierr); 671 } else { 672 ierr = PCSetType(pc, PCNONE); CHKERRQ(ierr); 673 } 674 ierr = KSPSetType(ksp, KSPCG); CHKERRQ(ierr); 675 ierr = KSPSetNormType(ksp, KSP_NORM_NATURAL); CHKERRQ(ierr); 676 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 677 PETSC_DEFAULT); CHKERRQ(ierr); 678 } 679 ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); 680 ierr = KSPSetOperators(ksp, mat, mat); CHKERRQ(ierr); 681 // First run, if benchmarking 682 if (benchmark_mode) { 683 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 1); 684 CHKERRQ(ierr); 685 my_rt_start = MPI_Wtime(); 686 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 687 my_rt = MPI_Wtime() - my_rt_start; 688 // Set maxits based on first iteration timing 689 if (my_rt > 0.02) { 690 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 5); 691 CHKERRQ(ierr); 692 } else { 693 ierr = KSPSetTolerances(ksp, 1e-10, PETSC_DEFAULT, PETSC_DEFAULT, 20); 694 CHKERRQ(ierr); 695 } 696 } 697 // Timed solve 698 my_rt_start = MPI_Wtime(); 699 ierr = KSPSolve(ksp, rhs, X); CHKERRQ(ierr); 700 my_rt = MPI_Wtime() - my_rt_start; 701 { 702 KSPType ksptype; 703 KSPConvergedReason reason; 704 PetscReal rnorm; 705 PetscInt its; 706 ierr = KSPGetType(ksp, &ksptype); CHKERRQ(ierr); 707 ierr = KSPGetConvergedReason(ksp, &reason); CHKERRQ(ierr); 708 ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); 709 ierr = KSPGetResidualNorm(ksp, &rnorm); CHKERRQ(ierr); 710 if (!test_mode || reason < 0 || rnorm > 1e-8) { 711 ierr = PetscPrintf(comm, "KSP %s %s iterations %D rnorm %e\n", ksptype, 712 KSPConvergedReasons[reason], its, (double)rnorm); CHKERRQ(ierr); 713 } 714 if (benchmark_mode && (!test_mode)) { 715 CeedInt gsize; 716 ierr = VecGetSize(X, &gsize); CHKERRQ(ierr); 717 MPI_Reduce(&my_rt, &rt_min, 1, MPI_DOUBLE, MPI_MIN, 0, comm); 718 MPI_Reduce(&my_rt, &rt_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm); 719 ierr = PetscPrintf(comm, 720 "CG solve time : %g (%g) sec.\n" 721 "DOFs/sec in CG : %g (%g) million.\n", 722 rt_max, rt_min, 723 1e-6*gsize*its/rt_max, 1e-6*gsize*its/rt_min); 724 CHKERRQ(ierr); 725 } 726 } 727 728 { 729 PetscReal maxerror; 730 ierr = ComputeErrorMax(user, op_error, X, target, &maxerror); CHKERRQ(ierr); 731 PetscReal tol = (bpChoice == CEED_BP1 || bpChoice == CEED_BP2) ? 5e-3 : 5e-2; 732 if (!test_mode || maxerror > tol) { 733 ierr = PetscPrintf(comm, "Pointwise error (max) %e\n", (double)maxerror); 734 CHKERRQ(ierr); 735 } 736 } 737 738 ierr = VecDestroy(&rhs); CHKERRQ(ierr); 739 ierr = VecDestroy(&rhsloc); CHKERRQ(ierr); 740 ierr = VecDestroy(&X); CHKERRQ(ierr); 741 ierr = VecDestroy(&user->Xloc); CHKERRQ(ierr); 742 ierr = VecDestroy(&user->Yloc); CHKERRQ(ierr); 743 ierr = VecScatterDestroy(<og); CHKERRQ(ierr); 744 ierr = VecScatterDestroy(<og0); CHKERRQ(ierr); 745 ierr = VecScatterDestroy(>ogD); CHKERRQ(ierr); 746 ierr = MatDestroy(&mat); CHKERRQ(ierr); 747 ierr = KSPDestroy(&ksp); CHKERRQ(ierr); 748 749 CeedVectorDestroy(&user->xceed); 750 CeedVectorDestroy(&user->yceed); 751 CeedVectorDestroy(&user->rho); 752 CeedVectorDestroy(&target); 753 CeedOperatorDestroy(&op_setup); 754 CeedOperatorDestroy(&op_apply); 755 CeedOperatorDestroy(&op_error); 756 CeedElemRestrictionDestroy(&Erestrictu); 757 CeedElemRestrictionDestroy(&Erestrictx); 758 CeedElemRestrictionDestroy(&Erestrictui); 759 CeedElemRestrictionDestroy(&Erestrictxi); 760 CeedElemRestrictionDestroy(&Erestrictqdi); 761 CeedQFunctionDestroy(&qf_setup); 762 CeedQFunctionDestroy(&qf_apply); 763 CeedQFunctionDestroy(&qf_error); 764 CeedBasisDestroy(&basisu); 765 CeedBasisDestroy(&basisx); 766 CeedDestroy(&ceed); 767 ierr = PetscFree(user); CHKERRQ(ierr); 768 return PetscFinalize(); 769 } 770