1 2 static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\ 3 uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\ 4 matrix assembly, the matrix is intentionally laid out across processors\n\ 5 differently from the way it is assembled. Input arguments are:\n\ 6 -m <size> -p <order> : mesh size and polynomial order\n\n"; 7 8 /* Contributed by Travis Austin <austin@txcorp.com>, 2010. 9 based on src/ksp/ksp/tutorials/ex3.c 10 */ 11 12 #include <petscksp.h> 13 14 /* Declare user-defined routines */ 15 static PetscReal src(PetscReal, PetscReal); 16 static PetscReal ubdy(PetscReal, PetscReal); 17 static PetscReal polyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal); 18 static PetscReal derivPolyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal); 19 static PetscErrorCode Form1DElementMass(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *); 20 static PetscErrorCode Form1DElementStiffness(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *); 21 static PetscErrorCode Form2DElementMass(PetscInt, PetscScalar *, PetscScalar *); 22 static PetscErrorCode Form2DElementStiffness(PetscInt, PetscScalar *, PetscScalar *, PetscScalar *); 23 static PetscErrorCode FormNodalRhs(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *); 24 static PetscErrorCode FormNodalSoln(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *); 25 static void leggaulob(PetscReal, PetscReal, PetscReal[], PetscReal[], int); 26 static void qAndLEvaluation(int, PetscReal, PetscReal *, PetscReal *, PetscReal *); 27 28 int main(int argc, char **args) 29 { 30 PetscInt p = 2, m = 5; 31 PetscInt num1Dnodes, num2Dnodes; 32 PetscScalar *Ke1D, *Ke2D, *Me1D, *Me2D; 33 PetscScalar *r, *ue, val; 34 Vec u, ustar, b, q; 35 Mat A, Mass; 36 KSP ksp; 37 PetscInt M, N; 38 PetscMPIInt rank, size; 39 PetscReal x, y, h, norm; 40 PetscInt *idx, indx, count, *rows, i, j, k, start, end, its; 41 PetscReal *rowsx, *rowsy; 42 PetscReal *gllNode, *gllWgts; 43 44 PetscFunctionBeginUser; 45 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 46 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Options for p-FEM", ""); 47 PetscCall(PetscOptionsInt("-m", "Number of elements in each direction", "None", m, &m, NULL)); 48 PetscCall(PetscOptionsInt("-p", "Order of each element (tensor product basis)", "None", p, &p, NULL)); 49 PetscOptionsEnd(); 50 PetscCheck(p > 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Option -p value should be greater than zero"); 51 N = (p * m + 1) * (p * m + 1); /* dimension of matrix */ 52 M = m * m; /* number of elements */ 53 h = 1.0 / m; /* mesh width */ 54 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 55 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 56 57 /* Create stiffness matrix */ 58 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 59 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, N, N)); 60 PetscCall(MatSetFromOptions(A)); 61 PetscCall(MatSetUp(A)); 62 63 /* Create matrix */ 64 PetscCall(MatCreate(PETSC_COMM_WORLD, &Mass)); 65 PetscCall(MatSetSizes(Mass, PETSC_DECIDE, PETSC_DECIDE, N, N)); 66 PetscCall(MatSetFromOptions(Mass)); 67 PetscCall(MatSetUp(Mass)); 68 start = rank * (M / size) + ((M % size) < rank ? (M % size) : rank); 69 end = start + M / size + ((M % size) > rank); 70 71 /* Allocate element stiffness matrices */ 72 num1Dnodes = (p + 1); 73 num2Dnodes = num1Dnodes * num1Dnodes; 74 75 PetscCall(PetscMalloc1(num1Dnodes * num1Dnodes, &Me1D)); 76 PetscCall(PetscMalloc1(num1Dnodes * num1Dnodes, &Ke1D)); 77 PetscCall(PetscMalloc1(num2Dnodes * num2Dnodes, &Me2D)); 78 PetscCall(PetscMalloc1(num2Dnodes * num2Dnodes, &Ke2D)); 79 PetscCall(PetscMalloc1(num2Dnodes, &idx)); 80 PetscCall(PetscMalloc1(num2Dnodes, &r)); 81 PetscCall(PetscMalloc1(num2Dnodes, &ue)); 82 83 /* Allocate quadrature and create stiffness matrices */ 84 PetscCall(PetscMalloc1(p + 1, &gllNode)); 85 PetscCall(PetscMalloc1(p + 1, &gllWgts)); 86 leggaulob(0.0, 1.0, gllNode, gllWgts, p); /* Get GLL nodes and weights */ 87 PetscCall(Form1DElementMass(h, p, gllNode, gllWgts, Me1D)); 88 PetscCall(Form1DElementStiffness(h, p, gllNode, gllWgts, Ke1D)); 89 PetscCall(Form2DElementMass(p, Me1D, Me2D)); 90 PetscCall(Form2DElementStiffness(p, Ke1D, Me1D, Ke2D)); 91 92 /* Assemble matrix */ 93 for (i = start; i < end; i++) { 94 indx = 0; 95 for (k = 0; k < (p + 1); ++k) { 96 for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j; 97 } 98 PetscCall(MatSetValues(A, num2Dnodes, idx, num2Dnodes, idx, Ke2D, ADD_VALUES)); 99 PetscCall(MatSetValues(Mass, num2Dnodes, idx, num2Dnodes, idx, Me2D, ADD_VALUES)); 100 } 101 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 102 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 103 PetscCall(MatAssemblyBegin(Mass, MAT_FINAL_ASSEMBLY)); 104 PetscCall(MatAssemblyEnd(Mass, MAT_FINAL_ASSEMBLY)); 105 106 PetscCall(PetscFree(Me1D)); 107 PetscCall(PetscFree(Ke1D)); 108 PetscCall(PetscFree(Me2D)); 109 PetscCall(PetscFree(Ke2D)); 110 111 /* Create right-hand-side and solution vectors */ 112 PetscCall(VecCreate(PETSC_COMM_WORLD, &u)); 113 PetscCall(VecSetSizes(u, PETSC_DECIDE, N)); 114 PetscCall(VecSetFromOptions(u)); 115 PetscCall(PetscObjectSetName((PetscObject)u, "Approx. Solution")); 116 PetscCall(VecDuplicate(u, &b)); 117 PetscCall(PetscObjectSetName((PetscObject)b, "Right hand side")); 118 PetscCall(VecDuplicate(u, &q)); 119 PetscCall(PetscObjectSetName((PetscObject)q, "Right hand side 2")); 120 PetscCall(VecDuplicate(b, &ustar)); 121 PetscCall(VecSet(u, 0.0)); 122 PetscCall(VecSet(b, 0.0)); 123 PetscCall(VecSet(q, 0.0)); 124 125 /* Assemble nodal right-hand-side and soln vector */ 126 for (i = start; i < end; i++) { 127 x = h * (i % m); 128 y = h * (i / m); 129 indx = 0; 130 for (k = 0; k < (p + 1); ++k) { 131 for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j; 132 } 133 PetscCall(FormNodalRhs(p, x, y, h, gllNode, r)); 134 PetscCall(FormNodalSoln(p, x, y, h, gllNode, ue)); 135 PetscCall(VecSetValues(q, num2Dnodes, idx, r, INSERT_VALUES)); 136 PetscCall(VecSetValues(ustar, num2Dnodes, idx, ue, INSERT_VALUES)); 137 } 138 PetscCall(VecAssemblyBegin(q)); 139 PetscCall(VecAssemblyEnd(q)); 140 PetscCall(VecAssemblyBegin(ustar)); 141 PetscCall(VecAssemblyEnd(ustar)); 142 143 PetscCall(PetscFree(idx)); 144 PetscCall(PetscFree(r)); 145 PetscCall(PetscFree(ue)); 146 147 /* Get FE right-hand side vector */ 148 PetscCall(MatMult(Mass, q, b)); 149 150 /* Modify matrix and right-hand-side for Dirichlet boundary conditions */ 151 PetscCall(PetscMalloc1(4 * p * m, &rows)); 152 PetscCall(PetscMalloc1(4 * p * m, &rowsx)); 153 PetscCall(PetscMalloc1(4 * p * m, &rowsy)); 154 for (i = 0; i < p * m + 1; i++) { 155 rows[i] = i; /* bottom */ 156 rowsx[i] = (i / p) * h + gllNode[i % p] * h; 157 rowsy[i] = 0.0; 158 rows[3 * p * m - 1 + i] = (p * m) * (p * m + 1) + i; /* top */ 159 rowsx[3 * p * m - 1 + i] = (i / p) * h + gllNode[i % p] * h; 160 rowsy[3 * p * m - 1 + i] = 1.0; 161 } 162 count = p * m + 1; /* left side */ 163 indx = 1; 164 for (i = p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) { 165 rows[count] = i; 166 rowsx[count] = 0.0; 167 rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h; 168 indx++; 169 } 170 count = 2 * p * m; /* right side */ 171 indx = 1; 172 for (i = 2 * p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) { 173 rows[count] = i; 174 rowsx[count] = 1.0; 175 rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h; 176 indx++; 177 } 178 for (i = 0; i < 4 * p * m; i++) { 179 x = rowsx[i]; 180 y = rowsy[i]; 181 val = ubdy(x, y); 182 PetscCall(VecSetValues(b, 1, &rows[i], &val, INSERT_VALUES)); 183 PetscCall(VecSetValues(u, 1, &rows[i], &val, INSERT_VALUES)); 184 } 185 PetscCall(MatZeroRows(A, 4 * p * m, rows, 1.0, 0, 0)); 186 PetscCall(PetscFree(rows)); 187 PetscCall(PetscFree(rowsx)); 188 PetscCall(PetscFree(rowsy)); 189 190 PetscCall(VecAssemblyBegin(u)); 191 PetscCall(VecAssemblyEnd(u)); 192 PetscCall(VecAssemblyBegin(b)); 193 PetscCall(VecAssemblyEnd(b)); 194 195 /* Solve linear system */ 196 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp)); 197 PetscCall(KSPSetOperators(ksp, A, A)); 198 PetscCall(KSPSetInitialGuessNonzero(ksp, PETSC_TRUE)); 199 PetscCall(KSPSetFromOptions(ksp)); 200 PetscCall(KSPSolve(ksp, b, u)); 201 202 /* Check error */ 203 PetscCall(VecAXPY(u, -1.0, ustar)); 204 PetscCall(VecNorm(u, NORM_2, &norm)); 205 PetscCall(KSPGetIterationNumber(ksp, &its)); 206 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)(norm * h), its)); 207 208 PetscCall(PetscFree(gllNode)); 209 PetscCall(PetscFree(gllWgts)); 210 211 PetscCall(KSPDestroy(&ksp)); 212 PetscCall(VecDestroy(&u)); 213 PetscCall(VecDestroy(&b)); 214 PetscCall(VecDestroy(&q)); 215 PetscCall(VecDestroy(&ustar)); 216 PetscCall(MatDestroy(&A)); 217 PetscCall(MatDestroy(&Mass)); 218 219 PetscCall(PetscFinalize()); 220 return 0; 221 } 222 223 /* --------------------------------------------------------------------- */ 224 225 /* 1d element stiffness mass matrix */ 226 static PetscErrorCode Form1DElementMass(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Me1D) 227 { 228 PetscInt i, j, k; 229 PetscInt indx; 230 231 PetscFunctionBeginUser; 232 for (j = 0; j < (P + 1); ++j) { 233 for (i = 0; i < (P + 1); ++i) { 234 indx = j * (P + 1) + i; 235 Me1D[indx] = 0.0; 236 for (k = 0; k < (P + 1); ++k) Me1D[indx] += H * gqw[k] * polyBasisFunc(P, i, gqn, gqn[k]) * polyBasisFunc(P, j, gqn, gqn[k]); 237 } 238 } 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 /* --------------------------------------------------------------------- */ 243 244 /* 1d element stiffness matrix for derivative */ 245 static PetscErrorCode Form1DElementStiffness(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Ke1D) 246 { 247 PetscInt i, j, k; 248 PetscInt indx; 249 250 PetscFunctionBeginUser; 251 for (j = 0; j < (P + 1); ++j) { 252 for (i = 0; i < (P + 1); ++i) { 253 indx = j * (P + 1) + i; 254 Ke1D[indx] = 0.0; 255 for (k = 0; k < (P + 1); ++k) Ke1D[indx] += (1. / H) * gqw[k] * derivPolyBasisFunc(P, i, gqn, gqn[k]) * derivPolyBasisFunc(P, j, gqn, gqn[k]); 256 } 257 } 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 /* --------------------------------------------------------------------- */ 262 263 /* element mass matrix */ 264 static PetscErrorCode Form2DElementMass(PetscInt P, PetscScalar *Me1D, PetscScalar *Me2D) 265 { 266 PetscInt i1, j1, i2, j2; 267 PetscInt indx1, indx2, indx3; 268 269 PetscFunctionBeginUser; 270 for (j2 = 0; j2 < (P + 1); ++j2) { 271 for (i2 = 0; i2 < (P + 1); ++i2) { 272 for (j1 = 0; j1 < (P + 1); ++j1) { 273 for (i1 = 0; i1 < (P + 1); ++i1) { 274 indx1 = j1 * (P + 1) + i1; 275 indx2 = j2 * (P + 1) + i2; 276 indx3 = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1); 277 Me2D[indx3] = Me1D[indx1] * Me1D[indx2]; 278 } 279 } 280 } 281 } 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 /* --------------------------------------------------------------------- */ 286 287 /* element stiffness for Laplacian */ 288 static PetscErrorCode Form2DElementStiffness(PetscInt P, PetscScalar *Ke1D, PetscScalar *Me1D, PetscScalar *Ke2D) 289 { 290 PetscInt i1, j1, i2, j2; 291 PetscInt indx1, indx2, indx3; 292 293 PetscFunctionBeginUser; 294 for (j2 = 0; j2 < (P + 1); ++j2) { 295 for (i2 = 0; i2 < (P + 1); ++i2) { 296 for (j1 = 0; j1 < (P + 1); ++j1) { 297 for (i1 = 0; i1 < (P + 1); ++i1) { 298 indx1 = j1 * (P + 1) + i1; 299 indx2 = j2 * (P + 1) + i2; 300 indx3 = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1); 301 Ke2D[indx3] = Ke1D[indx1] * Me1D[indx2] + Me1D[indx1] * Ke1D[indx2]; 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 /* --------------------------------------------------------------------- */ 310 311 static PetscErrorCode FormNodalRhs(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *r) 312 { 313 PetscInt i, j, indx; 314 315 PetscFunctionBeginUser; 316 indx = 0; 317 for (j = 0; j < (P + 1); ++j) { 318 for (i = 0; i < (P + 1); ++i) { 319 r[indx] = src(x + H * nds[i], y + H * nds[j]); 320 indx++; 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 /* --------------------------------------------------------------------- */ 327 328 static PetscErrorCode FormNodalSoln(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *u) 329 { 330 PetscInt i, j, indx; 331 332 PetscFunctionBeginUser; 333 indx = 0; 334 for (j = 0; j < (P + 1); ++j) { 335 for (i = 0; i < (P + 1); ++i) { 336 u[indx] = ubdy(x + H * nds[i], y + H * nds[j]); 337 indx++; 338 } 339 } 340 PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 343 /* --------------------------------------------------------------------- */ 344 345 static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval) 346 { 347 PetscReal denominator = 1.; 348 PetscReal numerator = 1.; 349 PetscInt i = 0; 350 351 for (i = 0; i < (order + 1); i++) { 352 if (i != basis) { 353 numerator *= (xval - xLocVal[i]); 354 denominator *= (xLocVal[basis] - xLocVal[i]); 355 } 356 } 357 return (numerator / denominator); 358 } 359 360 /* --------------------------------------------------------------------- */ 361 362 static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval) 363 { 364 PetscReal denominator; 365 PetscReal numerator; 366 PetscReal numtmp; 367 PetscInt i = 0, j = 0; 368 369 denominator = 1.; 370 for (i = 0; i < (order + 1); i++) { 371 if (i != basis) denominator *= (xLocVal[basis] - xLocVal[i]); 372 } 373 numerator = 0.; 374 for (j = 0; j < (order + 1); ++j) { 375 if (j != basis) { 376 numtmp = 1.; 377 for (i = 0; i < (order + 1); ++i) { 378 if (i != basis && j != i) numtmp *= (xval - xLocVal[i]); 379 } 380 numerator += numtmp; 381 } 382 } 383 384 return (numerator / denominator); 385 } 386 387 /* --------------------------------------------------------------------- */ 388 389 static PetscReal ubdy(PetscReal x, PetscReal y) 390 { 391 return x * x * y * y; 392 } 393 394 static PetscReal src(PetscReal x, PetscReal y) 395 { 396 return -2. * y * y - 2. * x * x; 397 } 398 /* --------------------------------------------------------------------- */ 399 400 static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n) 401 /******************************************************************************* 402 Given the lower and upper limits of integration x1 and x2, and given n, this 403 routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas 404 and weights of the Gauss-Lobatto-Legendre n-point quadrature formula. 405 *******************************************************************************/ 406 { 407 PetscInt j, m; 408 PetscReal z1, z, xm, xl, q, qp, Ln, scale; 409 if (n == 1) { 410 x[0] = x1; /* Scale the root to the desired interval, */ 411 x[1] = x2; /* and put in its symmetric counterpart. */ 412 w[0] = 1.; /* Compute the weight */ 413 w[1] = 1.; /* and its symmetric counterpart. */ 414 } else { 415 x[0] = x1; /* Scale the root to the desired interval, */ 416 x[n] = x2; /* and put in its symmetric counterpart. */ 417 w[0] = 2. / (n * (n + 1)); /* Compute the weight */ 418 w[n] = 2. / (n * (n + 1)); /* and its symmetric counterpart. */ 419 m = (n + 1) / 2; /* The roots are symmetric, so we only find half of them. */ 420 xm = 0.5 * (x2 + x1); 421 xl = 0.5 * (x2 - x1); 422 for (j = 1; j <= (m - 1); j++) { /* Loop over the desired roots. */ 423 z = -1.0 * PetscCosReal((PETSC_PI * (j + 0.25) / (n)) - (3.0 / (8.0 * n * PETSC_PI)) * (1.0 / (j + 0.25))); 424 /* Starting with the above approximation to the ith root, we enter */ 425 /* the main loop of refinement by Newton's method. */ 426 do { 427 qAndLEvaluation(n, z, &q, &qp, &Ln); 428 z1 = z; 429 z = z1 - q / qp; /* Newton's method. */ 430 } while (PetscAbsReal(z - z1) > 3.0e-11); 431 qAndLEvaluation(n, z, &q, &qp, &Ln); 432 x[j] = xm + xl * z; /* Scale the root to the desired interval, */ 433 x[n - j] = xm - xl * z; /* and put in its symmetric counterpart. */ 434 w[j] = 2.0 / (n * (n + 1) * Ln * Ln); /* Compute the weight */ 435 w[n - j] = w[j]; /* and its symmetric counterpart. */ 436 } 437 } 438 if (n % 2 == 0) { 439 qAndLEvaluation(n, 0.0, &q, &qp, &Ln); 440 x[n / 2] = (x2 - x1) / 2.0; 441 w[n / 2] = 2.0 / (n * (n + 1) * Ln * Ln); 442 } 443 /* scale the weights according to mapping from [-1,1] to [0,1] */ 444 scale = (x2 - x1) / 2.0; 445 for (j = 0; j <= n; ++j) w[j] = w[j] * scale; 446 } 447 448 /******************************************************************************/ 449 static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln) 450 /******************************************************************************* 451 Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in 452 addition to L_N(x) as these are needed for the GLL points. See the book titled 453 "Implementing Spectral Methods for Partial Differential Equations: Algorithms 454 for Scientists and Engineers" by David A. Kopriva. 455 *******************************************************************************/ 456 { 457 PetscInt k; 458 459 PetscReal Lnp; 460 PetscReal Lnp1, Lnp1p; 461 PetscReal Lnm1, Lnm1p; 462 PetscReal Lnm2, Lnm2p; 463 464 Lnm1 = 1.0; 465 *Ln = x; 466 Lnm1p = 0.0; 467 Lnp = 1.0; 468 469 for (k = 2; k <= n; ++k) { 470 Lnm2 = Lnm1; 471 Lnm1 = *Ln; 472 Lnm2p = Lnm1p; 473 Lnm1p = Lnp; 474 *Ln = (2. * k - 1.) / (1.0 * k) * x * Lnm1 - (k - 1.) / (1.0 * k) * Lnm2; 475 Lnp = Lnm2p + (2.0 * k - 1) * Lnm1; 476 } 477 k = n + 1; 478 Lnp1 = (2. * k - 1.) / (1.0 * k) * x * (*Ln) - (k - 1.) / (1.0 * k) * Lnm1; 479 Lnp1p = Lnm1p + (2.0 * k - 1) * (*Ln); 480 *q = Lnp1 - Lnm1; 481 *qp = Lnp1p - Lnm1p; 482 } 483 484 /*TEST 485 486 test: 487 nsize: 2 488 args: -ksp_monitor_short 489 490 TEST*/ 491