1 2 static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\ 3 uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\ 4 matrix assembly, the matrix is intentionally laid out across processors\n\ 5 differently from the way it is assembled. Input arguments are:\n\ 6 -m <size> -p <order> : mesh size and polynomial order\n\n"; 7 8 /* Contributed by Travis Austin <austin@txcorp.com>, 2010. 9 based on src/ksp/ksp/tutorials/ex3.c 10 */ 11 12 #include <petscksp.h> 13 14 /* Declare user-defined routines */ 15 static PetscReal src(PetscReal,PetscReal); 16 static PetscReal ubdy(PetscReal,PetscReal); 17 static PetscReal polyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal); 18 static PetscReal derivPolyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal); 19 static PetscErrorCode Form1DElementMass(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*); 20 static PetscErrorCode Form1DElementStiffness(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*); 21 static PetscErrorCode Form2DElementMass(PetscInt,PetscScalar*,PetscScalar*); 22 static PetscErrorCode Form2DElementStiffness(PetscInt,PetscScalar*,PetscScalar*,PetscScalar*); 23 static PetscErrorCode FormNodalRhs(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*); 24 static PetscErrorCode FormNodalSoln(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*); 25 static void leggaulob(PetscReal, PetscReal, PetscReal [], PetscReal [], int); 26 static void qAndLEvaluation(int, PetscReal, PetscReal*, PetscReal*, PetscReal*); 27 28 int main(int argc,char **args) 29 { 30 PetscInt p = 2, m = 5; 31 PetscInt num1Dnodes, num2Dnodes; 32 PetscScalar *Ke1D,*Ke2D,*Me1D,*Me2D; 33 PetscScalar *r,*ue,val; 34 Vec u,ustar,b,q; 35 Mat A,Mass; 36 KSP ksp; 37 PetscInt M,N; 38 PetscMPIInt rank,size; 39 PetscReal x,y,h,norm; 40 PetscInt *idx,indx,count,*rows,i,j,k,start,end,its; 41 PetscReal *rowsx,*rowsy; 42 PetscReal *gllNode, *gllWgts; 43 44 PetscCall(PetscInitialize(&argc,&args,(char*)0,help)); 45 PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Options for p-FEM",""); 46 PetscCall(PetscOptionsInt("-m","Number of elements in each direction","None",m,&m,NULL)); 47 PetscCall(PetscOptionsInt("-p","Order of each element (tensor product basis)","None",p,&p,NULL)); 48 PetscOptionsEnd(); 49 PetscCheck(p > 0,PETSC_COMM_SELF,PETSC_ERR_USER,"Option -p value should be greater than zero"); 50 N = (p*m+1)*(p*m+1); /* dimension of matrix */ 51 M = m*m; /* number of elements */ 52 h = 1.0/m; /* mesh width */ 53 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 54 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 55 56 /* Create stiffness matrix */ 57 PetscCall(MatCreate(PETSC_COMM_WORLD,&A)); 58 PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,N,N)); 59 PetscCall(MatSetFromOptions(A)); 60 PetscCall(MatSetUp(A)); 61 62 /* Create matrix */ 63 PetscCall(MatCreate(PETSC_COMM_WORLD,&Mass)); 64 PetscCall(MatSetSizes(Mass,PETSC_DECIDE,PETSC_DECIDE,N,N)); 65 PetscCall(MatSetFromOptions(Mass)); 66 PetscCall(MatSetUp(Mass)); 67 start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank); 68 end = start + M/size + ((M%size) > rank); 69 70 /* Allocate element stiffness matrices */ 71 num1Dnodes = (p+1); 72 num2Dnodes = num1Dnodes*num1Dnodes; 73 74 PetscCall(PetscMalloc1(num1Dnodes*num1Dnodes,&Me1D)); 75 PetscCall(PetscMalloc1(num1Dnodes*num1Dnodes,&Ke1D)); 76 PetscCall(PetscMalloc1(num2Dnodes*num2Dnodes,&Me2D)); 77 PetscCall(PetscMalloc1(num2Dnodes*num2Dnodes,&Ke2D)); 78 PetscCall(PetscMalloc1(num2Dnodes,&idx)); 79 PetscCall(PetscMalloc1(num2Dnodes,&r)); 80 PetscCall(PetscMalloc1(num2Dnodes,&ue)); 81 82 /* Allocate quadrature and create stiffness matrices */ 83 PetscCall(PetscMalloc1(p+1,&gllNode)); 84 PetscCall(PetscMalloc1(p+1,&gllWgts)); 85 leggaulob(0.0,1.0,gllNode,gllWgts,p); /* Get GLL nodes and weights */ 86 PetscCall(Form1DElementMass(h,p,gllNode,gllWgts,Me1D)); 87 PetscCall(Form1DElementStiffness(h,p,gllNode,gllWgts,Ke1D)); 88 PetscCall(Form2DElementMass(p,Me1D,Me2D)); 89 PetscCall(Form2DElementStiffness(p,Ke1D,Me1D,Ke2D)); 90 91 /* Assemble matrix */ 92 for (i=start; i<end; i++) { 93 indx = 0; 94 for (k=0;k<(p+1);++k) { 95 for (j=0;j<(p+1);++j) { 96 idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j; 97 } 98 } 99 PetscCall(MatSetValues(A,num2Dnodes,idx,num2Dnodes,idx,Ke2D,ADD_VALUES)); 100 PetscCall(MatSetValues(Mass,num2Dnodes,idx,num2Dnodes,idx,Me2D,ADD_VALUES)); 101 } 102 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 103 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 104 PetscCall(MatAssemblyBegin(Mass,MAT_FINAL_ASSEMBLY)); 105 PetscCall(MatAssemblyEnd(Mass,MAT_FINAL_ASSEMBLY)); 106 107 PetscCall(PetscFree(Me1D)); 108 PetscCall(PetscFree(Ke1D)); 109 PetscCall(PetscFree(Me2D)); 110 PetscCall(PetscFree(Ke2D)); 111 112 /* Create right-hand-side and solution vectors */ 113 PetscCall(VecCreate(PETSC_COMM_WORLD,&u)); 114 PetscCall(VecSetSizes(u,PETSC_DECIDE,N)); 115 PetscCall(VecSetFromOptions(u)); 116 PetscCall(PetscObjectSetName((PetscObject)u,"Approx. Solution")); 117 PetscCall(VecDuplicate(u,&b)); 118 PetscCall(PetscObjectSetName((PetscObject)b,"Right hand side")); 119 PetscCall(VecDuplicate(u,&q)); 120 PetscCall(PetscObjectSetName((PetscObject)q,"Right hand side 2")); 121 PetscCall(VecDuplicate(b,&ustar)); 122 PetscCall(VecSet(u,0.0)); 123 PetscCall(VecSet(b,0.0)); 124 PetscCall(VecSet(q,0.0)); 125 126 /* Assemble nodal right-hand-side and soln vector */ 127 for (i=start; i<end; i++) { 128 x = h*(i % m); 129 y = h*(i/m); 130 indx = 0; 131 for (k=0;k<(p+1);++k) { 132 for (j=0;j<(p+1);++j) { 133 idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j; 134 } 135 } 136 PetscCall(FormNodalRhs(p,x,y,h,gllNode,r)); 137 PetscCall(FormNodalSoln(p,x,y,h,gllNode,ue)); 138 PetscCall(VecSetValues(q,num2Dnodes,idx,r,INSERT_VALUES)); 139 PetscCall(VecSetValues(ustar,num2Dnodes,idx,ue,INSERT_VALUES)); 140 } 141 PetscCall(VecAssemblyBegin(q)); 142 PetscCall(VecAssemblyEnd(q)); 143 PetscCall(VecAssemblyBegin(ustar)); 144 PetscCall(VecAssemblyEnd(ustar)); 145 146 PetscCall(PetscFree(idx)); 147 PetscCall(PetscFree(r)); 148 PetscCall(PetscFree(ue)); 149 150 /* Get FE right-hand side vector */ 151 PetscCall(MatMult(Mass,q,b)); 152 153 /* Modify matrix and right-hand-side for Dirichlet boundary conditions */ 154 PetscCall(PetscMalloc1(4*p*m,&rows)); 155 PetscCall(PetscMalloc1(4*p*m,&rowsx)); 156 PetscCall(PetscMalloc1(4*p*m,&rowsy)); 157 for (i=0; i<p*m+1; i++) { 158 rows[i] = i; /* bottom */ 159 rowsx[i] = (i/p)*h+gllNode[i%p]*h; 160 rowsy[i] = 0.0; 161 rows[3*p*m-1+i] = (p*m)*(p*m+1) + i; /* top */ 162 rowsx[3*p*m-1+i] = (i/p)*h+gllNode[i%p]*h; 163 rowsy[3*p*m-1+i] = 1.0; 164 } 165 count = p*m+1; /* left side */ 166 indx = 1; 167 for (i=p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) { 168 rows[count] = i; 169 rowsx[count] = 0.0; 170 rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h; 171 indx++; 172 } 173 count = 2*p*m; /* right side */ 174 indx = 1; 175 for (i=2*p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) { 176 rows[count] = i; 177 rowsx[count] = 1.0; 178 rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h; 179 indx++; 180 } 181 for (i=0; i<4*p*m; i++) { 182 x = rowsx[i]; 183 y = rowsy[i]; 184 val = ubdy(x,y); 185 PetscCall(VecSetValues(b,1,&rows[i],&val,INSERT_VALUES)); 186 PetscCall(VecSetValues(u,1,&rows[i],&val,INSERT_VALUES)); 187 } 188 PetscCall(MatZeroRows(A,4*p*m,rows,1.0,0,0)); 189 PetscCall(PetscFree(rows)); 190 PetscCall(PetscFree(rowsx)); 191 PetscCall(PetscFree(rowsy)); 192 193 PetscCall(VecAssemblyBegin(u)); 194 PetscCall(VecAssemblyEnd(u)); 195 PetscCall(VecAssemblyBegin(b)); 196 PetscCall(VecAssemblyEnd(b)); 197 198 /* Solve linear system */ 199 PetscCall(KSPCreate(PETSC_COMM_WORLD,&ksp)); 200 PetscCall(KSPSetOperators(ksp,A,A)); 201 PetscCall(KSPSetInitialGuessNonzero(ksp,PETSC_TRUE)); 202 PetscCall(KSPSetFromOptions(ksp)); 203 PetscCall(KSPSolve(ksp,b,u)); 204 205 /* Check error */ 206 PetscCall(VecAXPY(u,-1.0,ustar)); 207 PetscCall(VecNorm(u,NORM_2,&norm)); 208 PetscCall(KSPGetIterationNumber(ksp,&its)); 209 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %" PetscInt_FMT "\n",(double)(norm*h),its)); 210 211 PetscCall(PetscFree(gllNode)); 212 PetscCall(PetscFree(gllWgts)); 213 214 PetscCall(KSPDestroy(&ksp)); 215 PetscCall(VecDestroy(&u)); 216 PetscCall(VecDestroy(&b)); 217 PetscCall(VecDestroy(&q)); 218 PetscCall(VecDestroy(&ustar)); 219 PetscCall(MatDestroy(&A)); 220 PetscCall(MatDestroy(&Mass)); 221 222 PetscCall(PetscFinalize()); 223 return 0; 224 } 225 226 /* --------------------------------------------------------------------- */ 227 228 /* 1d element stiffness mass matrix */ 229 static PetscErrorCode Form1DElementMass(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Me1D) 230 { 231 PetscInt i,j,k; 232 PetscInt indx; 233 234 PetscFunctionBeginUser; 235 for (j=0; j<(P+1); ++j) { 236 for (i=0; i<(P+1); ++i) { 237 indx = j*(P+1)+i; 238 Me1D[indx] = 0.0; 239 for (k=0; k<(P+1);++k) { 240 Me1D[indx] += H*gqw[k]*polyBasisFunc(P,i,gqn,gqn[k])*polyBasisFunc(P,j,gqn,gqn[k]); 241 } 242 } 243 } 244 PetscFunctionReturn(0); 245 } 246 247 /* --------------------------------------------------------------------- */ 248 249 /* 1d element stiffness matrix for derivative */ 250 static PetscErrorCode Form1DElementStiffness(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Ke1D) 251 { 252 PetscInt i,j,k; 253 PetscInt indx; 254 255 PetscFunctionBeginUser; 256 for (j=0;j<(P+1);++j) { 257 for (i=0;i<(P+1);++i) { 258 indx = j*(P+1)+i; 259 Ke1D[indx] = 0.0; 260 for (k=0; k<(P+1);++k) { 261 Ke1D[indx] += (1./H)*gqw[k]*derivPolyBasisFunc(P,i,gqn,gqn[k])*derivPolyBasisFunc(P,j,gqn,gqn[k]); 262 } 263 } 264 } 265 PetscFunctionReturn(0); 266 } 267 268 /* --------------------------------------------------------------------- */ 269 270 /* element mass matrix */ 271 static PetscErrorCode Form2DElementMass(PetscInt P,PetscScalar *Me1D,PetscScalar *Me2D) 272 { 273 PetscInt i1,j1,i2,j2; 274 PetscInt indx1,indx2,indx3; 275 276 PetscFunctionBeginUser; 277 for (j2=0;j2<(P+1);++j2) { 278 for (i2=0; i2<(P+1);++i2) { 279 for (j1=0;j1<(P+1);++j1) { 280 for (i1=0;i1<(P+1);++i1) { 281 indx1 = j1*(P+1)+i1; 282 indx2 = j2*(P+1)+i2; 283 indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1); 284 Me2D[indx3] = Me1D[indx1]*Me1D[indx2]; 285 } 286 } 287 } 288 } 289 PetscFunctionReturn(0); 290 } 291 292 /* --------------------------------------------------------------------- */ 293 294 /* element stiffness for Laplacian */ 295 static PetscErrorCode Form2DElementStiffness(PetscInt P,PetscScalar *Ke1D,PetscScalar *Me1D,PetscScalar *Ke2D) 296 { 297 PetscInt i1,j1,i2,j2; 298 PetscInt indx1,indx2,indx3; 299 300 PetscFunctionBeginUser; 301 for (j2=0;j2<(P+1);++j2) { 302 for (i2=0; i2<(P+1);++i2) { 303 for (j1=0;j1<(P+1);++j1) { 304 for (i1=0;i1<(P+1);++i1) { 305 indx1 = j1*(P+1)+i1; 306 indx2 = j2*(P+1)+i2; 307 indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1); 308 Ke2D[indx3] = Ke1D[indx1]*Me1D[indx2] + Me1D[indx1]*Ke1D[indx2]; 309 } 310 } 311 } 312 } 313 PetscFunctionReturn(0); 314 } 315 316 /* --------------------------------------------------------------------- */ 317 318 static PetscErrorCode FormNodalRhs(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *r) 319 { 320 PetscInt i,j,indx; 321 322 PetscFunctionBeginUser; 323 indx=0; 324 for (j=0;j<(P+1);++j) { 325 for (i=0;i<(P+1);++i) { 326 r[indx] = src(x+H*nds[i],y+H*nds[j]); 327 indx++; 328 } 329 } 330 PetscFunctionReturn(0); 331 } 332 333 /* --------------------------------------------------------------------- */ 334 335 static PetscErrorCode FormNodalSoln(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *u) 336 { 337 PetscInt i,j,indx; 338 339 PetscFunctionBeginUser; 340 indx=0; 341 for (j=0;j<(P+1);++j) { 342 for (i=0;i<(P+1);++i) { 343 u[indx] = ubdy(x+H*nds[i],y+H*nds[j]); 344 indx++; 345 } 346 } 347 PetscFunctionReturn(0); 348 } 349 350 /* --------------------------------------------------------------------- */ 351 352 static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval) 353 { 354 PetscReal denominator = 1.; 355 PetscReal numerator = 1.; 356 PetscInt i =0; 357 358 for (i=0; i<(order+1); i++) { 359 if (i!=basis) { 360 numerator *= (xval-xLocVal[i]); 361 denominator *= (xLocVal[basis]-xLocVal[i]); 362 } 363 } 364 return (numerator/denominator); 365 } 366 367 /* --------------------------------------------------------------------- */ 368 369 static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval) 370 { 371 PetscReal denominator; 372 PetscReal numerator; 373 PetscReal numtmp; 374 PetscInt i=0,j=0; 375 376 denominator=1.; 377 for (i=0; i<(order+1); i++) { 378 if (i!=basis) denominator *= (xLocVal[basis]-xLocVal[i]); 379 } 380 numerator = 0.; 381 for (j=0;j<(order+1);++j) { 382 if (j != basis) { 383 numtmp = 1.; 384 for (i=0;i<(order+1);++i) { 385 if (i!=basis && j!=i) numtmp *= (xval-xLocVal[i]); 386 } 387 numerator += numtmp; 388 } 389 } 390 391 return (numerator/denominator); 392 } 393 394 /* --------------------------------------------------------------------- */ 395 396 static PetscReal ubdy(PetscReal x,PetscReal y) 397 { 398 return x*x*y*y; 399 } 400 401 static PetscReal src(PetscReal x,PetscReal y) 402 { 403 return -2.*y*y - 2.*x*x; 404 } 405 /* --------------------------------------------------------------------- */ 406 407 static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n) 408 /******************************************************************************* 409 Given the lower and upper limits of integration x1 and x2, and given n, this 410 routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas 411 and weights of the Gauss-Lobatto-Legendre n-point quadrature formula. 412 *******************************************************************************/ 413 { 414 PetscInt j,m; 415 PetscReal z1,z,xm,xl,q,qp,Ln,scale; 416 if (n==1) { 417 x[0] = x1; /* Scale the root to the desired interval, */ 418 x[1] = x2; /* and put in its symmetric counterpart. */ 419 w[0] = 1.; /* Compute the weight */ 420 w[1] = 1.; /* and its symmetric counterpart. */ 421 } else { 422 x[0] = x1; /* Scale the root to the desired interval, */ 423 x[n] = x2; /* and put in its symmetric counterpart. */ 424 w[0] = 2./(n*(n+1)); /* Compute the weight */ 425 w[n] = 2./(n*(n+1)); /* and its symmetric counterpart. */ 426 m = (n+1)/2; /* The roots are symmetric, so we only find half of them. */ 427 xm = 0.5*(x2+x1); 428 xl = 0.5*(x2-x1); 429 for (j=1; j<=(m-1); j++) { /* Loop over the desired roots. */ 430 z=-1.0*PetscCosReal((PETSC_PI*(j+0.25)/(n))-(3.0/(8.0*n*PETSC_PI))*(1.0/(j+0.25))); 431 /* Starting with the above approximation to the ith root, we enter */ 432 /* the main loop of refinement by Newton's method. */ 433 do { 434 qAndLEvaluation(n,z,&q,&qp,&Ln); 435 z1 = z; 436 z = z1-q/qp; /* Newton's method. */ 437 } while (PetscAbsReal(z-z1) > 3.0e-11); 438 qAndLEvaluation(n,z,&q,&qp,&Ln); 439 x[j] = xm+xl*z; /* Scale the root to the desired interval, */ 440 x[n-j] = xm-xl*z; /* and put in its symmetric counterpart. */ 441 w[j] = 2.0/(n*(n+1)*Ln*Ln); /* Compute the weight */ 442 w[n-j] = w[j]; /* and its symmetric counterpart. */ 443 } 444 } 445 if (n%2==0) { 446 qAndLEvaluation(n,0.0,&q,&qp,&Ln); 447 x[n/2]=(x2-x1)/2.0; 448 w[n/2]=2.0/(n*(n+1)*Ln*Ln); 449 } 450 /* scale the weights according to mapping from [-1,1] to [0,1] */ 451 scale = (x2-x1)/2.0; 452 for (j=0; j<=n; ++j) w[j] = w[j]*scale; 453 } 454 455 /******************************************************************************/ 456 static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln) 457 /******************************************************************************* 458 Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in 459 addition to L_N(x) as these are needed for the GLL points. See the book titled 460 "Implementing Spectral Methods for Partial Differential Equations: Algorithms 461 for Scientists and Engineers" by David A. Kopriva. 462 *******************************************************************************/ 463 { 464 PetscInt k; 465 466 PetscReal Lnp; 467 PetscReal Lnp1, Lnp1p; 468 PetscReal Lnm1, Lnm1p; 469 PetscReal Lnm2, Lnm2p; 470 471 Lnm1 = 1.0; 472 *Ln = x; 473 Lnm1p = 0.0; 474 Lnp = 1.0; 475 476 for (k=2; k<=n; ++k) { 477 Lnm2 = Lnm1; 478 Lnm1 = *Ln; 479 Lnm2p = Lnm1p; 480 Lnm1p = Lnp; 481 *Ln = (2.*k-1.)/(1.0*k)*x*Lnm1 - (k-1.)/(1.0*k)*Lnm2; 482 Lnp = Lnm2p + (2.0*k-1)*Lnm1; 483 } 484 k = n+1; 485 Lnp1 = (2.*k-1.)/(1.0*k)*x*(*Ln) - (k-1.)/(1.0*k)*Lnm1; 486 Lnp1p = Lnm1p + (2.0*k-1)*(*Ln); 487 *q = Lnp1 - Lnm1; 488 *qp = Lnp1p - Lnm1p; 489 } 490 491 /*TEST 492 493 test: 494 nsize: 2 495 args: -ksp_monitor_short 496 497 TEST*/ 498