1 2 static char help[] = "This example solves a linear system in parallel with KSP. The matrix\n\ 3 uses arbitrary order polynomials for finite elements on the unit square. To test the parallel\n\ 4 matrix assembly, the matrix is intentionally laid out across processors\n\ 5 differently from the way it is assembled. Input arguments are:\n\ 6 -m <size> -p <order> : mesh size and polynomial order\n\n"; 7 8 /* Contributed by Travis Austin <austin@txcorp.com>, 2010. 9 based on src/ksp/ksp/tutorials/ex3.c 10 */ 11 12 #include <petscksp.h> 13 14 /* Declare user-defined routines */ 15 static PetscReal src(PetscReal,PetscReal); 16 static PetscReal ubdy(PetscReal,PetscReal); 17 static PetscReal polyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal); 18 static PetscReal derivPolyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal); 19 static PetscErrorCode Form1DElementMass(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*); 20 static PetscErrorCode Form1DElementStiffness(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*); 21 static PetscErrorCode Form2DElementMass(PetscInt,PetscScalar*,PetscScalar*); 22 static PetscErrorCode Form2DElementStiffness(PetscInt,PetscScalar*,PetscScalar*,PetscScalar*); 23 static PetscErrorCode FormNodalRhs(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*); 24 static PetscErrorCode FormNodalSoln(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*); 25 static void leggaulob(PetscReal, PetscReal, PetscReal [], PetscReal [], int); 26 static void qAndLEvaluation(int, PetscReal, PetscReal*, PetscReal*, PetscReal*); 27 28 int main(int argc,char **args) 29 { 30 PetscInt p = 2, m = 5; 31 PetscInt num1Dnodes, num2Dnodes; 32 PetscScalar *Ke1D,*Ke2D,*Me1D,*Me2D; 33 PetscScalar *r,*ue,val; 34 Vec u,ustar,b,q; 35 Mat A,Mass; 36 KSP ksp; 37 PetscInt M,N; 38 PetscMPIInt rank,size; 39 PetscReal x,y,h,norm; 40 PetscInt *idx,indx,count,*rows,i,j,k,start,end,its; 41 PetscReal *rowsx,*rowsy; 42 PetscReal *gllNode, *gllWgts; 43 PetscErrorCode ierr; 44 45 PetscCall(PetscInitialize(&argc,&args,(char*)0,help)); 46 ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Options for p-FEM","");PetscCall(ierr); 47 PetscCall(PetscOptionsInt("-m","Number of elements in each direction","None",m,&m,NULL)); 48 PetscCall(PetscOptionsInt("-p","Order of each element (tensor product basis)","None",p,&p,NULL)); 49 ierr = PetscOptionsEnd();PetscCall(ierr); 50 PetscCheck(p > 0,PETSC_COMM_SELF,PETSC_ERR_USER,"Option -p value should be greater than zero"); 51 N = (p*m+1)*(p*m+1); /* dimension of matrix */ 52 M = m*m; /* number of elements */ 53 h = 1.0/m; /* mesh width */ 54 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank)); 55 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size)); 56 57 /* Create stiffness matrix */ 58 PetscCall(MatCreate(PETSC_COMM_WORLD,&A)); 59 PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,N,N)); 60 PetscCall(MatSetFromOptions(A)); 61 PetscCall(MatSetUp(A)); 62 63 /* Create matrix */ 64 PetscCall(MatCreate(PETSC_COMM_WORLD,&Mass)); 65 PetscCall(MatSetSizes(Mass,PETSC_DECIDE,PETSC_DECIDE,N,N)); 66 PetscCall(MatSetFromOptions(Mass)); 67 PetscCall(MatSetUp(Mass)); 68 start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank); 69 end = start + M/size + ((M%size) > rank); 70 71 /* Allocate element stiffness matrices */ 72 num1Dnodes = (p+1); 73 num2Dnodes = num1Dnodes*num1Dnodes; 74 75 PetscCall(PetscMalloc1(num1Dnodes*num1Dnodes,&Me1D)); 76 PetscCall(PetscMalloc1(num1Dnodes*num1Dnodes,&Ke1D)); 77 PetscCall(PetscMalloc1(num2Dnodes*num2Dnodes,&Me2D)); 78 PetscCall(PetscMalloc1(num2Dnodes*num2Dnodes,&Ke2D)); 79 PetscCall(PetscMalloc1(num2Dnodes,&idx)); 80 PetscCall(PetscMalloc1(num2Dnodes,&r)); 81 PetscCall(PetscMalloc1(num2Dnodes,&ue)); 82 83 /* Allocate quadrature and create stiffness matrices */ 84 PetscCall(PetscMalloc1(p+1,&gllNode)); 85 PetscCall(PetscMalloc1(p+1,&gllWgts)); 86 leggaulob(0.0,1.0,gllNode,gllWgts,p); /* Get GLL nodes and weights */ 87 PetscCall(Form1DElementMass(h,p,gllNode,gllWgts,Me1D)); 88 PetscCall(Form1DElementStiffness(h,p,gllNode,gllWgts,Ke1D)); 89 PetscCall(Form2DElementMass(p,Me1D,Me2D)); 90 PetscCall(Form2DElementStiffness(p,Ke1D,Me1D,Ke2D)); 91 92 /* Assemble matrix */ 93 for (i=start; i<end; i++) { 94 indx = 0; 95 for (k=0;k<(p+1);++k) { 96 for (j=0;j<(p+1);++j) { 97 idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j; 98 } 99 } 100 PetscCall(MatSetValues(A,num2Dnodes,idx,num2Dnodes,idx,Ke2D,ADD_VALUES)); 101 PetscCall(MatSetValues(Mass,num2Dnodes,idx,num2Dnodes,idx,Me2D,ADD_VALUES)); 102 } 103 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 104 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 105 PetscCall(MatAssemblyBegin(Mass,MAT_FINAL_ASSEMBLY)); 106 PetscCall(MatAssemblyEnd(Mass,MAT_FINAL_ASSEMBLY)); 107 108 PetscCall(PetscFree(Me1D)); 109 PetscCall(PetscFree(Ke1D)); 110 PetscCall(PetscFree(Me2D)); 111 PetscCall(PetscFree(Ke2D)); 112 113 /* Create right-hand-side and solution vectors */ 114 PetscCall(VecCreate(PETSC_COMM_WORLD,&u)); 115 PetscCall(VecSetSizes(u,PETSC_DECIDE,N)); 116 PetscCall(VecSetFromOptions(u)); 117 PetscCall(PetscObjectSetName((PetscObject)u,"Approx. Solution")); 118 PetscCall(VecDuplicate(u,&b)); 119 PetscCall(PetscObjectSetName((PetscObject)b,"Right hand side")); 120 PetscCall(VecDuplicate(u,&q)); 121 PetscCall(PetscObjectSetName((PetscObject)q,"Right hand side 2")); 122 PetscCall(VecDuplicate(b,&ustar)); 123 PetscCall(VecSet(u,0.0)); 124 PetscCall(VecSet(b,0.0)); 125 PetscCall(VecSet(q,0.0)); 126 127 /* Assemble nodal right-hand-side and soln vector */ 128 for (i=start; i<end; i++) { 129 x = h*(i % m); 130 y = h*(i/m); 131 indx = 0; 132 for (k=0;k<(p+1);++k) { 133 for (j=0;j<(p+1);++j) { 134 idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j; 135 } 136 } 137 PetscCall(FormNodalRhs(p,x,y,h,gllNode,r)); 138 PetscCall(FormNodalSoln(p,x,y,h,gllNode,ue)); 139 PetscCall(VecSetValues(q,num2Dnodes,idx,r,INSERT_VALUES)); 140 PetscCall(VecSetValues(ustar,num2Dnodes,idx,ue,INSERT_VALUES)); 141 } 142 PetscCall(VecAssemblyBegin(q)); 143 PetscCall(VecAssemblyEnd(q)); 144 PetscCall(VecAssemblyBegin(ustar)); 145 PetscCall(VecAssemblyEnd(ustar)); 146 147 PetscCall(PetscFree(idx)); 148 PetscCall(PetscFree(r)); 149 PetscCall(PetscFree(ue)); 150 151 /* Get FE right-hand side vector */ 152 PetscCall(MatMult(Mass,q,b)); 153 154 /* Modify matrix and right-hand-side for Dirichlet boundary conditions */ 155 PetscCall(PetscMalloc1(4*p*m,&rows)); 156 PetscCall(PetscMalloc1(4*p*m,&rowsx)); 157 PetscCall(PetscMalloc1(4*p*m,&rowsy)); 158 for (i=0; i<p*m+1; i++) { 159 rows[i] = i; /* bottom */ 160 rowsx[i] = (i/p)*h+gllNode[i%p]*h; 161 rowsy[i] = 0.0; 162 rows[3*p*m-1+i] = (p*m)*(p*m+1) + i; /* top */ 163 rowsx[3*p*m-1+i] = (i/p)*h+gllNode[i%p]*h; 164 rowsy[3*p*m-1+i] = 1.0; 165 } 166 count = p*m+1; /* left side */ 167 indx = 1; 168 for (i=p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) { 169 rows[count] = i; 170 rowsx[count] = 0.0; 171 rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h; 172 indx++; 173 } 174 count = 2*p*m; /* right side */ 175 indx = 1; 176 for (i=2*p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) { 177 rows[count] = i; 178 rowsx[count] = 1.0; 179 rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h; 180 indx++; 181 } 182 for (i=0; i<4*p*m; i++) { 183 x = rowsx[i]; 184 y = rowsy[i]; 185 val = ubdy(x,y); 186 PetscCall(VecSetValues(b,1,&rows[i],&val,INSERT_VALUES)); 187 PetscCall(VecSetValues(u,1,&rows[i],&val,INSERT_VALUES)); 188 } 189 PetscCall(MatZeroRows(A,4*p*m,rows,1.0,0,0)); 190 PetscCall(PetscFree(rows)); 191 PetscCall(PetscFree(rowsx)); 192 PetscCall(PetscFree(rowsy)); 193 194 PetscCall(VecAssemblyBegin(u)); 195 PetscCall(VecAssemblyEnd(u)); 196 PetscCall(VecAssemblyBegin(b)); 197 PetscCall(VecAssemblyEnd(b)); 198 199 /* Solve linear system */ 200 PetscCall(KSPCreate(PETSC_COMM_WORLD,&ksp)); 201 PetscCall(KSPSetOperators(ksp,A,A)); 202 PetscCall(KSPSetInitialGuessNonzero(ksp,PETSC_TRUE)); 203 PetscCall(KSPSetFromOptions(ksp)); 204 PetscCall(KSPSolve(ksp,b,u)); 205 206 /* Check error */ 207 PetscCall(VecAXPY(u,-1.0,ustar)); 208 PetscCall(VecNorm(u,NORM_2,&norm)); 209 PetscCall(KSPGetIterationNumber(ksp,&its)); 210 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)(norm*h),its)); 211 212 PetscCall(PetscFree(gllNode)); 213 PetscCall(PetscFree(gllWgts)); 214 215 PetscCall(KSPDestroy(&ksp)); 216 PetscCall(VecDestroy(&u)); 217 PetscCall(VecDestroy(&b)); 218 PetscCall(VecDestroy(&q)); 219 PetscCall(VecDestroy(&ustar)); 220 PetscCall(MatDestroy(&A)); 221 PetscCall(MatDestroy(&Mass)); 222 223 PetscCall(PetscFinalize()); 224 return 0; 225 } 226 227 /* --------------------------------------------------------------------- */ 228 229 /* 1d element stiffness mass matrix */ 230 static PetscErrorCode Form1DElementMass(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Me1D) 231 { 232 PetscInt i,j,k; 233 PetscInt indx; 234 235 PetscFunctionBeginUser; 236 for (j=0; j<(P+1); ++j) { 237 for (i=0; i<(P+1); ++i) { 238 indx = j*(P+1)+i; 239 Me1D[indx] = 0.0; 240 for (k=0; k<(P+1);++k) { 241 Me1D[indx] += H*gqw[k]*polyBasisFunc(P,i,gqn,gqn[k])*polyBasisFunc(P,j,gqn,gqn[k]); 242 } 243 } 244 } 245 PetscFunctionReturn(0); 246 } 247 248 /* --------------------------------------------------------------------- */ 249 250 /* 1d element stiffness matrix for derivative */ 251 static PetscErrorCode Form1DElementStiffness(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Ke1D) 252 { 253 PetscInt i,j,k; 254 PetscInt indx; 255 256 PetscFunctionBeginUser; 257 for (j=0;j<(P+1);++j) { 258 for (i=0;i<(P+1);++i) { 259 indx = j*(P+1)+i; 260 Ke1D[indx] = 0.0; 261 for (k=0; k<(P+1);++k) { 262 Ke1D[indx] += (1./H)*gqw[k]*derivPolyBasisFunc(P,i,gqn,gqn[k])*derivPolyBasisFunc(P,j,gqn,gqn[k]); 263 } 264 } 265 } 266 PetscFunctionReturn(0); 267 } 268 269 /* --------------------------------------------------------------------- */ 270 271 /* element mass matrix */ 272 static PetscErrorCode Form2DElementMass(PetscInt P,PetscScalar *Me1D,PetscScalar *Me2D) 273 { 274 PetscInt i1,j1,i2,j2; 275 PetscInt indx1,indx2,indx3; 276 277 PetscFunctionBeginUser; 278 for (j2=0;j2<(P+1);++j2) { 279 for (i2=0; i2<(P+1);++i2) { 280 for (j1=0;j1<(P+1);++j1) { 281 for (i1=0;i1<(P+1);++i1) { 282 indx1 = j1*(P+1)+i1; 283 indx2 = j2*(P+1)+i2; 284 indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1); 285 Me2D[indx3] = Me1D[indx1]*Me1D[indx2]; 286 } 287 } 288 } 289 } 290 PetscFunctionReturn(0); 291 } 292 293 /* --------------------------------------------------------------------- */ 294 295 /* element stiffness for Laplacian */ 296 static PetscErrorCode Form2DElementStiffness(PetscInt P,PetscScalar *Ke1D,PetscScalar *Me1D,PetscScalar *Ke2D) 297 { 298 PetscInt i1,j1,i2,j2; 299 PetscInt indx1,indx2,indx3; 300 301 PetscFunctionBeginUser; 302 for (j2=0;j2<(P+1);++j2) { 303 for (i2=0; i2<(P+1);++i2) { 304 for (j1=0;j1<(P+1);++j1) { 305 for (i1=0;i1<(P+1);++i1) { 306 indx1 = j1*(P+1)+i1; 307 indx2 = j2*(P+1)+i2; 308 indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1); 309 Ke2D[indx3] = Ke1D[indx1]*Me1D[indx2] + Me1D[indx1]*Ke1D[indx2]; 310 } 311 } 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 /* --------------------------------------------------------------------- */ 318 319 static PetscErrorCode FormNodalRhs(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *r) 320 { 321 PetscInt i,j,indx; 322 323 PetscFunctionBeginUser; 324 indx=0; 325 for (j=0;j<(P+1);++j) { 326 for (i=0;i<(P+1);++i) { 327 r[indx] = src(x+H*nds[i],y+H*nds[j]); 328 indx++; 329 } 330 } 331 PetscFunctionReturn(0); 332 } 333 334 /* --------------------------------------------------------------------- */ 335 336 static PetscErrorCode FormNodalSoln(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *u) 337 { 338 PetscInt i,j,indx; 339 340 PetscFunctionBeginUser; 341 indx=0; 342 for (j=0;j<(P+1);++j) { 343 for (i=0;i<(P+1);++i) { 344 u[indx] = ubdy(x+H*nds[i],y+H*nds[j]); 345 indx++; 346 } 347 } 348 PetscFunctionReturn(0); 349 } 350 351 /* --------------------------------------------------------------------- */ 352 353 static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval) 354 { 355 PetscReal denominator = 1.; 356 PetscReal numerator = 1.; 357 PetscInt i =0; 358 359 for (i=0; i<(order+1); i++) { 360 if (i!=basis) { 361 numerator *= (xval-xLocVal[i]); 362 denominator *= (xLocVal[basis]-xLocVal[i]); 363 } 364 } 365 return (numerator/denominator); 366 } 367 368 /* --------------------------------------------------------------------- */ 369 370 static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval) 371 { 372 PetscReal denominator; 373 PetscReal numerator; 374 PetscReal numtmp; 375 PetscInt i=0,j=0; 376 377 denominator=1.; 378 for (i=0; i<(order+1); i++) { 379 if (i!=basis) denominator *= (xLocVal[basis]-xLocVal[i]); 380 } 381 numerator = 0.; 382 for (j=0;j<(order+1);++j) { 383 if (j != basis) { 384 numtmp = 1.; 385 for (i=0;i<(order+1);++i) { 386 if (i!=basis && j!=i) numtmp *= (xval-xLocVal[i]); 387 } 388 numerator += numtmp; 389 } 390 } 391 392 return (numerator/denominator); 393 } 394 395 /* --------------------------------------------------------------------- */ 396 397 static PetscReal ubdy(PetscReal x,PetscReal y) 398 { 399 return x*x*y*y; 400 } 401 402 static PetscReal src(PetscReal x,PetscReal y) 403 { 404 return -2.*y*y - 2.*x*x; 405 } 406 /* --------------------------------------------------------------------- */ 407 408 static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n) 409 /******************************************************************************* 410 Given the lower and upper limits of integration x1 and x2, and given n, this 411 routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas 412 and weights of the Gauss-Lobatto-Legendre n-point quadrature formula. 413 *******************************************************************************/ 414 { 415 PetscInt j,m; 416 PetscReal z1,z,xm,xl,q,qp,Ln,scale; 417 if (n==1) { 418 x[0] = x1; /* Scale the root to the desired interval, */ 419 x[1] = x2; /* and put in its symmetric counterpart. */ 420 w[0] = 1.; /* Compute the weight */ 421 w[1] = 1.; /* and its symmetric counterpart. */ 422 } else { 423 x[0] = x1; /* Scale the root to the desired interval, */ 424 x[n] = x2; /* and put in its symmetric counterpart. */ 425 w[0] = 2./(n*(n+1)); /* Compute the weight */ 426 w[n] = 2./(n*(n+1)); /* and its symmetric counterpart. */ 427 m = (n+1)/2; /* The roots are symmetric, so we only find half of them. */ 428 xm = 0.5*(x2+x1); 429 xl = 0.5*(x2-x1); 430 for (j=1; j<=(m-1); j++) { /* Loop over the desired roots. */ 431 z=-1.0*PetscCosReal((PETSC_PI*(j+0.25)/(n))-(3.0/(8.0*n*PETSC_PI))*(1.0/(j+0.25))); 432 /* Starting with the above approximation to the ith root, we enter */ 433 /* the main loop of refinement by Newton's method. */ 434 do { 435 qAndLEvaluation(n,z,&q,&qp,&Ln); 436 z1 = z; 437 z = z1-q/qp; /* Newton's method. */ 438 } while (PetscAbsReal(z-z1) > 3.0e-11); 439 qAndLEvaluation(n,z,&q,&qp,&Ln); 440 x[j] = xm+xl*z; /* Scale the root to the desired interval, */ 441 x[n-j] = xm-xl*z; /* and put in its symmetric counterpart. */ 442 w[j] = 2.0/(n*(n+1)*Ln*Ln); /* Compute the weight */ 443 w[n-j] = w[j]; /* and its symmetric counterpart. */ 444 } 445 } 446 if (n%2==0) { 447 qAndLEvaluation(n,0.0,&q,&qp,&Ln); 448 x[n/2]=(x2-x1)/2.0; 449 w[n/2]=2.0/(n*(n+1)*Ln*Ln); 450 } 451 /* scale the weights according to mapping from [-1,1] to [0,1] */ 452 scale = (x2-x1)/2.0; 453 for (j=0; j<=n; ++j) w[j] = w[j]*scale; 454 } 455 456 /******************************************************************************/ 457 static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln) 458 /******************************************************************************* 459 Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in 460 addition to L_N(x) as these are needed for the GLL points. See the book titled 461 "Implementing Spectral Methods for Partial Differential Equations: Algorithms 462 for Scientists and Engineers" by David A. Kopriva. 463 *******************************************************************************/ 464 { 465 PetscInt k; 466 467 PetscReal Lnp; 468 PetscReal Lnp1, Lnp1p; 469 PetscReal Lnm1, Lnm1p; 470 PetscReal Lnm2, Lnm2p; 471 472 Lnm1 = 1.0; 473 *Ln = x; 474 Lnm1p = 0.0; 475 Lnp = 1.0; 476 477 for (k=2; k<=n; ++k) { 478 Lnm2 = Lnm1; 479 Lnm1 = *Ln; 480 Lnm2p = Lnm1p; 481 Lnm1p = Lnp; 482 *Ln = (2.*k-1.)/(1.0*k)*x*Lnm1 - (k-1.)/(1.0*k)*Lnm2; 483 Lnp = Lnm2p + (2.0*k-1)*Lnm1; 484 } 485 k = n+1; 486 Lnp1 = (2.*k-1.)/(1.0*k)*x*(*Ln) - (k-1.)/(1.0*k)*Lnm1; 487 Lnp1p = Lnm1p + (2.0*k-1)*(*Ln); 488 *q = Lnp1 - Lnm1; 489 *qp = Lnp1p - Lnm1p; 490 } 491 492 /*TEST 493 494 test: 495 nsize: 2 496 args: -ksp_monitor_short 497 498 TEST*/ 499