1 static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\ 2 uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\ 3 matrix assembly, the matrix is intentionally laid out across processors\n\ 4 differently from the way it is assembled. Input arguments are:\n\ 5 -m <size> -p <order> : mesh size and polynomial order\n\n"; 6 7 /* Contributed by Travis Austin <austin@txcorp.com>, 2010. 8 based on src/ksp/ksp/tutorials/ex3.c 9 */ 10 11 #include <petscksp.h> 12 13 /* Declare user-defined routines */ 14 static PetscReal src(PetscReal, PetscReal); 15 static PetscReal ubdy(PetscReal, PetscReal); 16 static PetscReal polyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal); 17 static PetscReal derivPolyBasisFunc(PetscInt, PetscInt, PetscReal *, PetscReal); 18 static PetscErrorCode Form1DElementMass(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *); 19 static PetscErrorCode Form1DElementStiffness(PetscReal, PetscInt, PetscReal *, PetscReal *, PetscScalar *); 20 static PetscErrorCode Form2DElementMass(PetscInt, PetscScalar *, PetscScalar *); 21 static PetscErrorCode Form2DElementStiffness(PetscInt, PetscScalar *, PetscScalar *, PetscScalar *); 22 static PetscErrorCode FormNodalRhs(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *); 23 static PetscErrorCode FormNodalSoln(PetscInt, PetscReal, PetscReal, PetscReal, PetscReal *, PetscScalar *); 24 static void leggaulob(PetscReal, PetscReal, PetscReal[], PetscReal[], int); 25 static void qAndLEvaluation(int, PetscReal, PetscReal *, PetscReal *, PetscReal *); 26 27 int main(int argc, char **args) 28 { 29 PetscInt p = 2, m = 5; 30 PetscInt num1Dnodes, num2Dnodes; 31 PetscScalar *Ke1D, *Ke2D, *Me1D, *Me2D; 32 PetscScalar *r, *ue, val; 33 Vec u, ustar, b, q; 34 Mat A, Mass; 35 KSP ksp; 36 PetscInt M, N; 37 PetscMPIInt rank, size; 38 PetscReal x, y, h, norm; 39 PetscInt *idx, indx, count, *rows, i, j, k, start, end, its; 40 PetscReal *rowsx, *rowsy; 41 PetscReal *gllNode, *gllWgts; 42 43 PetscFunctionBeginUser; 44 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 45 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Options for p-FEM", ""); 46 PetscCall(PetscOptionsInt("-m", "Number of elements in each direction", "None", m, &m, NULL)); 47 PetscCall(PetscOptionsInt("-p", "Order of each element (tensor product basis)", "None", p, &p, NULL)); 48 PetscOptionsEnd(); 49 PetscCheck(p > 0, PETSC_COMM_SELF, PETSC_ERR_USER, "Option -p value should be greater than zero"); 50 N = (p * m + 1) * (p * m + 1); /* dimension of matrix */ 51 M = m * m; /* number of elements */ 52 h = 1.0 / m; /* mesh width */ 53 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 54 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 55 56 /* Create stiffness matrix */ 57 PetscCall(MatCreate(PETSC_COMM_WORLD, &A)); 58 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, N, N)); 59 PetscCall(MatSetFromOptions(A)); 60 PetscCall(MatSetUp(A)); 61 62 /* Create matrix */ 63 PetscCall(MatCreate(PETSC_COMM_WORLD, &Mass)); 64 PetscCall(MatSetSizes(Mass, PETSC_DECIDE, PETSC_DECIDE, N, N)); 65 PetscCall(MatSetFromOptions(Mass)); 66 PetscCall(MatSetUp(Mass)); 67 start = rank * (M / size) + ((M % size) < rank ? (M % size) : rank); 68 end = start + M / size + ((M % size) > rank); 69 70 /* Allocate element stiffness matrices */ 71 num1Dnodes = (p + 1); 72 num2Dnodes = num1Dnodes * num1Dnodes; 73 74 PetscCall(PetscMalloc1(num1Dnodes * num1Dnodes, &Me1D)); 75 PetscCall(PetscMalloc1(num1Dnodes * num1Dnodes, &Ke1D)); 76 PetscCall(PetscMalloc1(num2Dnodes * num2Dnodes, &Me2D)); 77 PetscCall(PetscMalloc1(num2Dnodes * num2Dnodes, &Ke2D)); 78 PetscCall(PetscMalloc1(num2Dnodes, &idx)); 79 PetscCall(PetscMalloc1(num2Dnodes, &r)); 80 PetscCall(PetscMalloc1(num2Dnodes, &ue)); 81 82 /* Allocate quadrature and create stiffness matrices */ 83 PetscCall(PetscMalloc1(p + 1, &gllNode)); 84 PetscCall(PetscMalloc1(p + 1, &gllWgts)); 85 leggaulob(0.0, 1.0, gllNode, gllWgts, p); /* Get GLL nodes and weights */ 86 PetscCall(Form1DElementMass(h, p, gllNode, gllWgts, Me1D)); 87 PetscCall(Form1DElementStiffness(h, p, gllNode, gllWgts, Ke1D)); 88 PetscCall(Form2DElementMass(p, Me1D, Me2D)); 89 PetscCall(Form2DElementStiffness(p, Ke1D, Me1D, Ke2D)); 90 91 /* Assemble matrix */ 92 for (i = start; i < end; i++) { 93 indx = 0; 94 for (k = 0; k < (p + 1); ++k) { 95 for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j; 96 } 97 PetscCall(MatSetValues(A, num2Dnodes, idx, num2Dnodes, idx, Ke2D, ADD_VALUES)); 98 PetscCall(MatSetValues(Mass, num2Dnodes, idx, num2Dnodes, idx, Me2D, ADD_VALUES)); 99 } 100 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 101 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 102 PetscCall(MatAssemblyBegin(Mass, MAT_FINAL_ASSEMBLY)); 103 PetscCall(MatAssemblyEnd(Mass, MAT_FINAL_ASSEMBLY)); 104 105 PetscCall(PetscFree(Me1D)); 106 PetscCall(PetscFree(Ke1D)); 107 PetscCall(PetscFree(Me2D)); 108 PetscCall(PetscFree(Ke2D)); 109 110 /* Create right-hand-side and solution vectors */ 111 PetscCall(VecCreate(PETSC_COMM_WORLD, &u)); 112 PetscCall(VecSetSizes(u, PETSC_DECIDE, N)); 113 PetscCall(VecSetFromOptions(u)); 114 PetscCall(PetscObjectSetName((PetscObject)u, "Approx. Solution")); 115 PetscCall(VecDuplicate(u, &b)); 116 PetscCall(PetscObjectSetName((PetscObject)b, "Right hand side")); 117 PetscCall(VecDuplicate(u, &q)); 118 PetscCall(PetscObjectSetName((PetscObject)q, "Right hand side 2")); 119 PetscCall(VecDuplicate(b, &ustar)); 120 PetscCall(VecSet(u, 0.0)); 121 PetscCall(VecSet(b, 0.0)); 122 PetscCall(VecSet(q, 0.0)); 123 124 /* Assemble nodal right-hand-side and soln vector */ 125 for (i = start; i < end; i++) { 126 x = h * (i % m); 127 y = h * (i / m); 128 indx = 0; 129 for (k = 0; k < (p + 1); ++k) { 130 for (j = 0; j < (p + 1); ++j) idx[indx++] = p * (p * m + 1) * (i / m) + p * (i % m) + k * (p * m + 1) + j; 131 } 132 PetscCall(FormNodalRhs(p, x, y, h, gllNode, r)); 133 PetscCall(FormNodalSoln(p, x, y, h, gllNode, ue)); 134 PetscCall(VecSetValues(q, num2Dnodes, idx, r, INSERT_VALUES)); 135 PetscCall(VecSetValues(ustar, num2Dnodes, idx, ue, INSERT_VALUES)); 136 } 137 PetscCall(VecAssemblyBegin(q)); 138 PetscCall(VecAssemblyEnd(q)); 139 PetscCall(VecAssemblyBegin(ustar)); 140 PetscCall(VecAssemblyEnd(ustar)); 141 142 PetscCall(PetscFree(idx)); 143 PetscCall(PetscFree(r)); 144 PetscCall(PetscFree(ue)); 145 146 /* Get FE right-hand side vector */ 147 PetscCall(MatMult(Mass, q, b)); 148 149 /* Modify matrix and right-hand-side for Dirichlet boundary conditions */ 150 PetscCall(PetscMalloc1(4 * p * m, &rows)); 151 PetscCall(PetscMalloc1(4 * p * m, &rowsx)); 152 PetscCall(PetscMalloc1(4 * p * m, &rowsy)); 153 for (i = 0; i < p * m + 1; i++) { 154 rows[i] = i; /* bottom */ 155 rowsx[i] = (i / p) * h + gllNode[i % p] * h; 156 rowsy[i] = 0.0; 157 rows[3 * p * m - 1 + i] = (p * m) * (p * m + 1) + i; /* top */ 158 rowsx[3 * p * m - 1 + i] = (i / p) * h + gllNode[i % p] * h; 159 rowsy[3 * p * m - 1 + i] = 1.0; 160 } 161 count = p * m + 1; /* left side */ 162 indx = 1; 163 for (i = p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) { 164 rows[count] = i; 165 rowsx[count] = 0.0; 166 rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h; 167 indx++; 168 } 169 count = 2 * p * m; /* right side */ 170 indx = 1; 171 for (i = 2 * p * m + 1; i < (p * m) * (p * m + 1); i += (p * m + 1)) { 172 rows[count] = i; 173 rowsx[count] = 1.0; 174 rowsy[count++] = (indx / p) * h + gllNode[indx % p] * h; 175 indx++; 176 } 177 for (i = 0; i < 4 * p * m; i++) { 178 x = rowsx[i]; 179 y = rowsy[i]; 180 val = ubdy(x, y); 181 PetscCall(VecSetValues(b, 1, &rows[i], &val, INSERT_VALUES)); 182 PetscCall(VecSetValues(u, 1, &rows[i], &val, INSERT_VALUES)); 183 } 184 PetscCall(MatZeroRows(A, 4 * p * m, rows, 1.0, 0, 0)); 185 PetscCall(PetscFree(rows)); 186 PetscCall(PetscFree(rowsx)); 187 PetscCall(PetscFree(rowsy)); 188 189 PetscCall(VecAssemblyBegin(u)); 190 PetscCall(VecAssemblyEnd(u)); 191 PetscCall(VecAssemblyBegin(b)); 192 PetscCall(VecAssemblyEnd(b)); 193 194 /* Solve linear system */ 195 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp)); 196 PetscCall(KSPSetOperators(ksp, A, A)); 197 PetscCall(KSPSetInitialGuessNonzero(ksp, PETSC_TRUE)); 198 PetscCall(KSPSetFromOptions(ksp)); 199 PetscCall(KSPSolve(ksp, b, u)); 200 201 /* Check error */ 202 PetscCall(VecAXPY(u, -1.0, ustar)); 203 PetscCall(VecNorm(u, NORM_2, &norm)); 204 PetscCall(KSPGetIterationNumber(ksp, &its)); 205 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)(norm * h), its)); 206 207 PetscCall(PetscFree(gllNode)); 208 PetscCall(PetscFree(gllWgts)); 209 210 PetscCall(KSPDestroy(&ksp)); 211 PetscCall(VecDestroy(&u)); 212 PetscCall(VecDestroy(&b)); 213 PetscCall(VecDestroy(&q)); 214 PetscCall(VecDestroy(&ustar)); 215 PetscCall(MatDestroy(&A)); 216 PetscCall(MatDestroy(&Mass)); 217 218 PetscCall(PetscFinalize()); 219 return 0; 220 } 221 222 /* --------------------------------------------------------------------- */ 223 224 /* 1d element stiffness mass matrix */ 225 static PetscErrorCode Form1DElementMass(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Me1D) 226 { 227 PetscInt i, j, k; 228 PetscInt indx; 229 230 PetscFunctionBeginUser; 231 for (j = 0; j < (P + 1); ++j) { 232 for (i = 0; i < (P + 1); ++i) { 233 indx = j * (P + 1) + i; 234 Me1D[indx] = 0.0; 235 for (k = 0; k < (P + 1); ++k) Me1D[indx] += H * gqw[k] * polyBasisFunc(P, i, gqn, gqn[k]) * polyBasisFunc(P, j, gqn, gqn[k]); 236 } 237 } 238 PetscFunctionReturn(PETSC_SUCCESS); 239 } 240 241 /* --------------------------------------------------------------------- */ 242 243 /* 1d element stiffness matrix for derivative */ 244 static PetscErrorCode Form1DElementStiffness(PetscReal H, PetscInt P, PetscReal *gqn, PetscReal *gqw, PetscScalar *Ke1D) 245 { 246 PetscInt i, j, k; 247 PetscInt indx; 248 249 PetscFunctionBeginUser; 250 for (j = 0; j < (P + 1); ++j) { 251 for (i = 0; i < (P + 1); ++i) { 252 indx = j * (P + 1) + i; 253 Ke1D[indx] = 0.0; 254 for (k = 0; k < (P + 1); ++k) Ke1D[indx] += (1. / H) * gqw[k] * derivPolyBasisFunc(P, i, gqn, gqn[k]) * derivPolyBasisFunc(P, j, gqn, gqn[k]); 255 } 256 } 257 PetscFunctionReturn(PETSC_SUCCESS); 258 } 259 260 /* --------------------------------------------------------------------- */ 261 262 /* element mass matrix */ 263 static PetscErrorCode Form2DElementMass(PetscInt P, PetscScalar *Me1D, PetscScalar *Me2D) 264 { 265 PetscInt i1, j1, i2, j2; 266 PetscInt indx1, indx2, indx3; 267 268 PetscFunctionBeginUser; 269 for (j2 = 0; j2 < (P + 1); ++j2) { 270 for (i2 = 0; i2 < (P + 1); ++i2) { 271 for (j1 = 0; j1 < (P + 1); ++j1) { 272 for (i1 = 0; i1 < (P + 1); ++i1) { 273 indx1 = j1 * (P + 1) + i1; 274 indx2 = j2 * (P + 1) + i2; 275 indx3 = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1); 276 Me2D[indx3] = Me1D[indx1] * Me1D[indx2]; 277 } 278 } 279 } 280 } 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 /* --------------------------------------------------------------------- */ 285 286 /* element stiffness for Laplacian */ 287 static PetscErrorCode Form2DElementStiffness(PetscInt P, PetscScalar *Ke1D, PetscScalar *Me1D, PetscScalar *Ke2D) 288 { 289 PetscInt i1, j1, i2, j2; 290 PetscInt indx1, indx2, indx3; 291 292 PetscFunctionBeginUser; 293 for (j2 = 0; j2 < (P + 1); ++j2) { 294 for (i2 = 0; i2 < (P + 1); ++i2) { 295 for (j1 = 0; j1 < (P + 1); ++j1) { 296 for (i1 = 0; i1 < (P + 1); ++i1) { 297 indx1 = j1 * (P + 1) + i1; 298 indx2 = j2 * (P + 1) + i2; 299 indx3 = (j2 * (P + 1) + j1) * (P + 1) * (P + 1) + (i2 * (P + 1) + i1); 300 Ke2D[indx3] = Ke1D[indx1] * Me1D[indx2] + Me1D[indx1] * Ke1D[indx2]; 301 } 302 } 303 } 304 } 305 PetscFunctionReturn(PETSC_SUCCESS); 306 } 307 308 /* --------------------------------------------------------------------- */ 309 310 static PetscErrorCode FormNodalRhs(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *r) 311 { 312 PetscInt i, j, indx; 313 314 PetscFunctionBeginUser; 315 indx = 0; 316 for (j = 0; j < (P + 1); ++j) { 317 for (i = 0; i < (P + 1); ++i) { 318 r[indx] = src(x + H * nds[i], y + H * nds[j]); 319 indx++; 320 } 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 /* --------------------------------------------------------------------- */ 326 327 static PetscErrorCode FormNodalSoln(PetscInt P, PetscReal x, PetscReal y, PetscReal H, PetscReal *nds, PetscScalar *u) 328 { 329 PetscInt i, j, indx; 330 331 PetscFunctionBeginUser; 332 indx = 0; 333 for (j = 0; j < (P + 1); ++j) { 334 for (i = 0; i < (P + 1); ++i) { 335 u[indx] = ubdy(x + H * nds[i], y + H * nds[j]); 336 indx++; 337 } 338 } 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 342 /* --------------------------------------------------------------------- */ 343 344 static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval) 345 { 346 PetscReal denominator = 1.; 347 PetscReal numerator = 1.; 348 PetscInt i = 0; 349 350 for (i = 0; i < (order + 1); i++) { 351 if (i != basis) { 352 numerator *= (xval - xLocVal[i]); 353 denominator *= (xLocVal[basis] - xLocVal[i]); 354 } 355 } 356 return (numerator / denominator); 357 } 358 359 /* --------------------------------------------------------------------- */ 360 361 static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval) 362 { 363 PetscReal denominator; 364 PetscReal numerator; 365 PetscReal numtmp; 366 PetscInt i = 0, j = 0; 367 368 denominator = 1.; 369 for (i = 0; i < (order + 1); i++) { 370 if (i != basis) denominator *= (xLocVal[basis] - xLocVal[i]); 371 } 372 numerator = 0.; 373 for (j = 0; j < (order + 1); ++j) { 374 if (j != basis) { 375 numtmp = 1.; 376 for (i = 0; i < (order + 1); ++i) { 377 if (i != basis && j != i) numtmp *= (xval - xLocVal[i]); 378 } 379 numerator += numtmp; 380 } 381 } 382 383 return (numerator / denominator); 384 } 385 386 /* --------------------------------------------------------------------- */ 387 388 static PetscReal ubdy(PetscReal x, PetscReal y) 389 { 390 return x * x * y * y; 391 } 392 393 static PetscReal src(PetscReal x, PetscReal y) 394 { 395 return -2. * y * y - 2. * x * x; 396 } 397 /* --------------------------------------------------------------------- */ 398 399 static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n) 400 /******************************************************************************* 401 Given the lower and upper limits of integration x1 and x2, and given n, this 402 routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas 403 and weights of the Gauss-Lobatto-Legendre n-point quadrature formula. 404 *******************************************************************************/ 405 { 406 PetscInt j, m; 407 PetscReal z1, z, xm, xl, q, qp, Ln, scale; 408 if (n == 1) { 409 x[0] = x1; /* Scale the root to the desired interval, */ 410 x[1] = x2; /* and put in its symmetric counterpart. */ 411 w[0] = 1.; /* Compute the weight */ 412 w[1] = 1.; /* and its symmetric counterpart. */ 413 } else { 414 x[0] = x1; /* Scale the root to the desired interval, */ 415 x[n] = x2; /* and put in its symmetric counterpart. */ 416 w[0] = 2. / (n * (n + 1)); /* Compute the weight */ 417 w[n] = 2. / (n * (n + 1)); /* and its symmetric counterpart. */ 418 m = (n + 1) / 2; /* The roots are symmetric, so we only find half of them. */ 419 xm = 0.5 * (x2 + x1); 420 xl = 0.5 * (x2 - x1); 421 for (j = 1; j <= (m - 1); j++) { /* Loop over the desired roots. */ 422 z = -1.0 * PetscCosReal((PETSC_PI * (j + 0.25) / (n)) - (3.0 / (8.0 * n * PETSC_PI)) * (1.0 / (j + 0.25))); 423 /* Starting with the above approximation to the ith root, we enter */ 424 /* the main loop of refinement by Newton's method. */ 425 do { 426 qAndLEvaluation(n, z, &q, &qp, &Ln); 427 z1 = z; 428 z = z1 - q / qp; /* Newton's method. */ 429 } while (PetscAbsReal(z - z1) > 3.0e-11); 430 qAndLEvaluation(n, z, &q, &qp, &Ln); 431 x[j] = xm + xl * z; /* Scale the root to the desired interval, */ 432 x[n - j] = xm - xl * z; /* and put in its symmetric counterpart. */ 433 w[j] = 2.0 / (n * (n + 1) * Ln * Ln); /* Compute the weight */ 434 w[n - j] = w[j]; /* and its symmetric counterpart. */ 435 } 436 } 437 if (n % 2 == 0) { 438 qAndLEvaluation(n, 0.0, &q, &qp, &Ln); 439 x[n / 2] = (x2 - x1) / 2.0; 440 w[n / 2] = 2.0 / (n * (n + 1) * Ln * Ln); 441 } 442 /* scale the weights according to mapping from [-1,1] to [0,1] */ 443 scale = (x2 - x1) / 2.0; 444 for (j = 0; j <= n; ++j) w[j] = w[j] * scale; 445 } 446 447 /******************************************************************************/ 448 static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln) 449 /******************************************************************************* 450 Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in 451 addition to L_N(x) as these are needed for the GLL points. See the book titled 452 "Implementing Spectral Methods for Partial Differential Equations: Algorithms 453 for Scientists and Engineers" by David A. Kopriva. 454 *******************************************************************************/ 455 { 456 PetscInt k; 457 458 PetscReal Lnp; 459 PetscReal Lnp1, Lnp1p; 460 PetscReal Lnm1, Lnm1p; 461 PetscReal Lnm2, Lnm2p; 462 463 Lnm1 = 1.0; 464 *Ln = x; 465 Lnm1p = 0.0; 466 Lnp = 1.0; 467 468 for (k = 2; k <= n; ++k) { 469 Lnm2 = Lnm1; 470 Lnm1 = *Ln; 471 Lnm2p = Lnm1p; 472 Lnm1p = Lnp; 473 *Ln = (2. * k - 1.) / (1.0 * k) * x * Lnm1 - (k - 1.) / (1.0 * k) * Lnm2; 474 Lnp = Lnm2p + (2.0 * k - 1) * Lnm1; 475 } 476 k = n + 1; 477 Lnp1 = (2. * k - 1.) / (1.0 * k) * x * (*Ln) - (k - 1.) / (1.0 * k) * Lnm1; 478 Lnp1p = Lnm1p + (2.0 * k - 1) * (*Ln); 479 *q = Lnp1 - Lnm1; 480 *qp = Lnp1p - Lnm1p; 481 } 482 483 /*TEST 484 485 test: 486 nsize: 2 487 args: -ksp_monitor_short 488 489 TEST*/ 490