xref: /petsc/src/ksp/ksp/tutorials/ex51.c (revision 85b8072b2b83ac119844ec3b1e8ff708fe6bee9d)
1 
2 static char help[] = "This example solves a linear system in parallel with KSP.  The matrix\n\
3 uses arbitrary order polynomials for finite elements on the unit square.  To test the parallel\n\
4 matrix assembly, the matrix is intentionally laid out across processors\n\
5 differently from the way it is assembled.  Input arguments are:\n\
6   -m <size> -p <order> : mesh size and polynomial order\n\n";
7 
8 /* Contributed by Travis Austin <austin@txcorp.com>, 2010.
9    based on src/ksp/ksp/tutorials/ex3.c
10  */
11 
12 #include <petscksp.h>
13 
14 /* Declare user-defined routines */
15 static PetscReal      src(PetscReal,PetscReal);
16 static PetscReal      ubdy(PetscReal,PetscReal);
17 static PetscReal      polyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
18 static PetscReal      derivPolyBasisFunc(PetscInt,PetscInt,PetscReal*,PetscReal);
19 static PetscErrorCode Form1DElementMass(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*);
20 static PetscErrorCode Form1DElementStiffness(PetscReal,PetscInt,PetscReal*,PetscReal*,PetscScalar*);
21 static PetscErrorCode Form2DElementMass(PetscInt,PetscScalar*,PetscScalar*);
22 static PetscErrorCode Form2DElementStiffness(PetscInt,PetscScalar*,PetscScalar*,PetscScalar*);
23 static PetscErrorCode FormNodalRhs(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*);
24 static PetscErrorCode FormNodalSoln(PetscInt,PetscReal,PetscReal,PetscReal,PetscReal*,PetscScalar*);
25 static void leggaulob(PetscReal, PetscReal, PetscReal [], PetscReal [], int);
26 static void qAndLEvaluation(int, PetscReal, PetscReal*, PetscReal*, PetscReal*);
27 
28 int main(int argc,char **args)
29 {
30   PetscInt       p = 2, m = 5;
31   PetscInt       num1Dnodes, num2Dnodes;
32   PetscScalar    *Ke1D,*Ke2D,*Me1D,*Me2D;
33   PetscScalar    *r,*ue,val;
34   Vec            u,ustar,b,q;
35   Mat            A,Mass;
36   KSP            ksp;
37   PetscInt       M,N;
38   PetscMPIInt    rank,size;
39   PetscReal      x,y,h,norm;
40   PetscInt       *idx,indx,count,*rows,i,j,k,start,end,its;
41   PetscReal      *rowsx,*rowsy;
42   PetscReal      *gllNode, *gllWgts;
43 
44   PetscCall(PetscInitialize(&argc,&args,(char*)0,help));
45   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Options for p-FEM","");
46   PetscCall(PetscOptionsInt("-m","Number of elements in each direction","None",m,&m,NULL));
47   PetscCall(PetscOptionsInt("-p","Order of each element (tensor product basis)","None",p,&p,NULL));
48   PetscOptionsEnd();
49   PetscCheck(p > 0,PETSC_COMM_SELF,PETSC_ERR_USER,"Option -p value should be greater than zero");
50   N    = (p*m+1)*(p*m+1); /* dimension of matrix */
51   M    = m*m; /* number of elements */
52   h    = 1.0/m; /* mesh width */
53   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD,&rank));
54   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD,&size));
55 
56   /* Create stiffness matrix */
57   PetscCall(MatCreate(PETSC_COMM_WORLD,&A));
58   PetscCall(MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,N,N));
59   PetscCall(MatSetFromOptions(A));
60   PetscCall(MatSetUp(A));
61 
62   /* Create matrix  */
63   PetscCall(MatCreate(PETSC_COMM_WORLD,&Mass));
64   PetscCall(MatSetSizes(Mass,PETSC_DECIDE,PETSC_DECIDE,N,N));
65   PetscCall(MatSetFromOptions(Mass));
66   PetscCall(MatSetUp(Mass));
67   start = rank*(M/size) + ((M%size) < rank ? (M%size) : rank);
68   end   = start + M/size + ((M%size) > rank);
69 
70   /* Allocate element stiffness matrices */
71   num1Dnodes = (p+1);
72   num2Dnodes = num1Dnodes*num1Dnodes;
73 
74   PetscCall(PetscMalloc1(num1Dnodes*num1Dnodes,&Me1D));
75   PetscCall(PetscMalloc1(num1Dnodes*num1Dnodes,&Ke1D));
76   PetscCall(PetscMalloc1(num2Dnodes*num2Dnodes,&Me2D));
77   PetscCall(PetscMalloc1(num2Dnodes*num2Dnodes,&Ke2D));
78   PetscCall(PetscMalloc1(num2Dnodes,&idx));
79   PetscCall(PetscMalloc1(num2Dnodes,&r));
80   PetscCall(PetscMalloc1(num2Dnodes,&ue));
81 
82   /* Allocate quadrature and create stiffness matrices */
83   PetscCall(PetscMalloc1(p+1,&gllNode));
84   PetscCall(PetscMalloc1(p+1,&gllWgts));
85   leggaulob(0.0,1.0,gllNode,gllWgts,p); /* Get GLL nodes and weights */
86   PetscCall(Form1DElementMass(h,p,gllNode,gllWgts,Me1D));
87   PetscCall(Form1DElementStiffness(h,p,gllNode,gllWgts,Ke1D));
88   PetscCall(Form2DElementMass(p,Me1D,Me2D));
89   PetscCall(Form2DElementStiffness(p,Ke1D,Me1D,Ke2D));
90 
91   /* Assemble matrix */
92   for (i=start; i<end; i++) {
93      indx = 0;
94      for (k=0;k<(p+1);++k) {
95        for (j=0;j<(p+1);++j) {
96          idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
97        }
98      }
99      PetscCall(MatSetValues(A,num2Dnodes,idx,num2Dnodes,idx,Ke2D,ADD_VALUES));
100      PetscCall(MatSetValues(Mass,num2Dnodes,idx,num2Dnodes,idx,Me2D,ADD_VALUES));
101   }
102   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
103   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
104   PetscCall(MatAssemblyBegin(Mass,MAT_FINAL_ASSEMBLY));
105   PetscCall(MatAssemblyEnd(Mass,MAT_FINAL_ASSEMBLY));
106 
107   PetscCall(PetscFree(Me1D));
108   PetscCall(PetscFree(Ke1D));
109   PetscCall(PetscFree(Me2D));
110   PetscCall(PetscFree(Ke2D));
111 
112   /* Create right-hand-side and solution vectors */
113   PetscCall(VecCreate(PETSC_COMM_WORLD,&u));
114   PetscCall(VecSetSizes(u,PETSC_DECIDE,N));
115   PetscCall(VecSetFromOptions(u));
116   PetscCall(PetscObjectSetName((PetscObject)u,"Approx. Solution"));
117   PetscCall(VecDuplicate(u,&b));
118   PetscCall(PetscObjectSetName((PetscObject)b,"Right hand side"));
119   PetscCall(VecDuplicate(u,&q));
120   PetscCall(PetscObjectSetName((PetscObject)q,"Right hand side 2"));
121   PetscCall(VecDuplicate(b,&ustar));
122   PetscCall(VecSet(u,0.0));
123   PetscCall(VecSet(b,0.0));
124   PetscCall(VecSet(q,0.0));
125 
126   /* Assemble nodal right-hand-side and soln vector  */
127   for (i=start; i<end; i++) {
128     x    = h*(i % m);
129     y    = h*(i/m);
130     indx = 0;
131     for (k=0;k<(p+1);++k) {
132       for (j=0;j<(p+1);++j) {
133         idx[indx++] = p*(p*m+1)*(i/m) + p*(i % m) + k*(p*m+1) + j;
134       }
135     }
136     PetscCall(FormNodalRhs(p,x,y,h,gllNode,r));
137     PetscCall(FormNodalSoln(p,x,y,h,gllNode,ue));
138     PetscCall(VecSetValues(q,num2Dnodes,idx,r,INSERT_VALUES));
139     PetscCall(VecSetValues(ustar,num2Dnodes,idx,ue,INSERT_VALUES));
140   }
141   PetscCall(VecAssemblyBegin(q));
142   PetscCall(VecAssemblyEnd(q));
143   PetscCall(VecAssemblyBegin(ustar));
144   PetscCall(VecAssemblyEnd(ustar));
145 
146   PetscCall(PetscFree(idx));
147   PetscCall(PetscFree(r));
148   PetscCall(PetscFree(ue));
149 
150   /* Get FE right-hand side vector */
151   PetscCall(MatMult(Mass,q,b));
152 
153   /* Modify matrix and right-hand-side for Dirichlet boundary conditions */
154   PetscCall(PetscMalloc1(4*p*m,&rows));
155   PetscCall(PetscMalloc1(4*p*m,&rowsx));
156   PetscCall(PetscMalloc1(4*p*m,&rowsy));
157   for (i=0; i<p*m+1; i++) {
158     rows[i]          = i; /* bottom */
159     rowsx[i]         = (i/p)*h+gllNode[i%p]*h;
160     rowsy[i]         = 0.0;
161     rows[3*p*m-1+i]  = (p*m)*(p*m+1) + i; /* top */
162     rowsx[3*p*m-1+i] = (i/p)*h+gllNode[i%p]*h;
163     rowsy[3*p*m-1+i] = 1.0;
164   }
165   count = p*m+1; /* left side */
166   indx  = 1;
167   for (i=p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
168     rows[count]    = i;
169     rowsx[count]   = 0.0;
170     rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
171     indx++;
172   }
173   count = 2*p*m; /* right side */
174   indx  = 1;
175   for (i=2*p*m+1; i<(p*m)*(p*m+1); i+= (p*m+1)) {
176     rows[count]    = i;
177     rowsx[count]   = 1.0;
178     rowsy[count++] = (indx/p)*h+gllNode[indx%p]*h;
179     indx++;
180   }
181   for (i=0; i<4*p*m; i++) {
182     x    = rowsx[i];
183     y    = rowsy[i];
184     val  = ubdy(x,y);
185     PetscCall(VecSetValues(b,1,&rows[i],&val,INSERT_VALUES));
186     PetscCall(VecSetValues(u,1,&rows[i],&val,INSERT_VALUES));
187   }
188   PetscCall(MatZeroRows(A,4*p*m,rows,1.0,0,0));
189   PetscCall(PetscFree(rows));
190   PetscCall(PetscFree(rowsx));
191   PetscCall(PetscFree(rowsy));
192 
193   PetscCall(VecAssemblyBegin(u));
194   PetscCall(VecAssemblyEnd(u));
195   PetscCall(VecAssemblyBegin(b));
196   PetscCall(VecAssemblyEnd(b));
197 
198   /* Solve linear system */
199   PetscCall(KSPCreate(PETSC_COMM_WORLD,&ksp));
200   PetscCall(KSPSetOperators(ksp,A,A));
201   PetscCall(KSPSetInitialGuessNonzero(ksp,PETSC_TRUE));
202   PetscCall(KSPSetFromOptions(ksp));
203   PetscCall(KSPSolve(ksp,b,u));
204 
205   /* Check error */
206   PetscCall(VecAXPY(u,-1.0,ustar));
207   PetscCall(VecNorm(u,NORM_2,&norm));
208   PetscCall(KSPGetIterationNumber(ksp,&its));
209   PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %" PetscInt_FMT "\n",(double)(norm*h),its));
210 
211   PetscCall(PetscFree(gllNode));
212   PetscCall(PetscFree(gllWgts));
213 
214   PetscCall(KSPDestroy(&ksp));
215   PetscCall(VecDestroy(&u));
216   PetscCall(VecDestroy(&b));
217   PetscCall(VecDestroy(&q));
218   PetscCall(VecDestroy(&ustar));
219   PetscCall(MatDestroy(&A));
220   PetscCall(MatDestroy(&Mass));
221 
222   PetscCall(PetscFinalize());
223   return 0;
224 }
225 
226 /* --------------------------------------------------------------------- */
227 
228 /* 1d element stiffness mass matrix  */
229 static PetscErrorCode Form1DElementMass(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Me1D)
230 {
231   PetscInt i,j,k;
232   PetscInt indx;
233 
234   PetscFunctionBeginUser;
235   for (j=0; j<(P+1); ++j) {
236     for (i=0; i<(P+1); ++i) {
237       indx       = j*(P+1)+i;
238       Me1D[indx] = 0.0;
239       for (k=0; k<(P+1);++k) {
240         Me1D[indx] += H*gqw[k]*polyBasisFunc(P,i,gqn,gqn[k])*polyBasisFunc(P,j,gqn,gqn[k]);
241       }
242     }
243   }
244   PetscFunctionReturn(0);
245 }
246 
247 /* --------------------------------------------------------------------- */
248 
249 /* 1d element stiffness matrix for derivative */
250 static PetscErrorCode Form1DElementStiffness(PetscReal H,PetscInt P,PetscReal *gqn,PetscReal *gqw,PetscScalar *Ke1D)
251 {
252   PetscInt i,j,k;
253   PetscInt indx;
254 
255   PetscFunctionBeginUser;
256   for (j=0;j<(P+1);++j) {
257     for (i=0;i<(P+1);++i) {
258       indx = j*(P+1)+i;
259       Ke1D[indx] = 0.0;
260       for (k=0; k<(P+1);++k) {
261         Ke1D[indx] += (1./H)*gqw[k]*derivPolyBasisFunc(P,i,gqn,gqn[k])*derivPolyBasisFunc(P,j,gqn,gqn[k]);
262       }
263     }
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 /* --------------------------------------------------------------------- */
269 
270    /* element mass matrix */
271 static PetscErrorCode Form2DElementMass(PetscInt P,PetscScalar *Me1D,PetscScalar *Me2D)
272 {
273   PetscInt i1,j1,i2,j2;
274   PetscInt indx1,indx2,indx3;
275 
276   PetscFunctionBeginUser;
277   for (j2=0;j2<(P+1);++j2) {
278     for (i2=0; i2<(P+1);++i2) {
279       for (j1=0;j1<(P+1);++j1) {
280         for (i1=0;i1<(P+1);++i1) {
281           indx1 = j1*(P+1)+i1;
282           indx2 = j2*(P+1)+i2;
283           indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
284           Me2D[indx3] = Me1D[indx1]*Me1D[indx2];
285         }
286       }
287     }
288   }
289   PetscFunctionReturn(0);
290 }
291 
292 /* --------------------------------------------------------------------- */
293 
294 /* element stiffness for Laplacian */
295 static PetscErrorCode Form2DElementStiffness(PetscInt P,PetscScalar *Ke1D,PetscScalar *Me1D,PetscScalar *Ke2D)
296 {
297   PetscInt i1,j1,i2,j2;
298   PetscInt indx1,indx2,indx3;
299 
300   PetscFunctionBeginUser;
301   for (j2=0;j2<(P+1);++j2) {
302     for (i2=0; i2<(P+1);++i2) {
303       for (j1=0;j1<(P+1);++j1) {
304         for (i1=0;i1<(P+1);++i1) {
305           indx1 = j1*(P+1)+i1;
306           indx2 = j2*(P+1)+i2;
307           indx3 = (j2*(P+1)+j1)*(P+1)*(P+1)+(i2*(P+1)+i1);
308           Ke2D[indx3] = Ke1D[indx1]*Me1D[indx2] + Me1D[indx1]*Ke1D[indx2];
309         }
310       }
311     }
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 /* --------------------------------------------------------------------- */
317 
318 static PetscErrorCode FormNodalRhs(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *r)
319 {
320   PetscInt i,j,indx;
321 
322   PetscFunctionBeginUser;
323   indx=0;
324   for (j=0;j<(P+1);++j) {
325     for (i=0;i<(P+1);++i) {
326       r[indx] = src(x+H*nds[i],y+H*nds[j]);
327       indx++;
328     }
329   }
330   PetscFunctionReturn(0);
331 }
332 
333 /* --------------------------------------------------------------------- */
334 
335 static PetscErrorCode FormNodalSoln(PetscInt P,PetscReal x,PetscReal y,PetscReal H,PetscReal* nds,PetscScalar *u)
336 {
337   PetscInt i,j,indx;
338 
339   PetscFunctionBeginUser;
340   indx=0;
341   for (j=0;j<(P+1);++j) {
342     for (i=0;i<(P+1);++i) {
343       u[indx] = ubdy(x+H*nds[i],y+H*nds[j]);
344       indx++;
345     }
346   }
347   PetscFunctionReturn(0);
348 }
349 
350 /* --------------------------------------------------------------------- */
351 
352 static PetscReal polyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
353 {
354   PetscReal denominator = 1.;
355   PetscReal numerator   = 1.;
356   PetscInt  i           =0;
357 
358   for (i=0; i<(order+1); i++) {
359     if (i!=basis) {
360       numerator   *= (xval-xLocVal[i]);
361       denominator *= (xLocVal[basis]-xLocVal[i]);
362     }
363   }
364   return (numerator/denominator);
365 }
366 
367 /* --------------------------------------------------------------------- */
368 
369 static PetscReal derivPolyBasisFunc(PetscInt order, PetscInt basis, PetscReal *xLocVal, PetscReal xval)
370 {
371   PetscReal denominator;
372   PetscReal numerator;
373   PetscReal numtmp;
374   PetscInt  i=0,j=0;
375 
376   denominator=1.;
377   for (i=0; i<(order+1); i++) {
378     if (i!=basis) denominator *= (xLocVal[basis]-xLocVal[i]);
379   }
380   numerator = 0.;
381   for (j=0;j<(order+1);++j) {
382     if (j != basis) {
383       numtmp = 1.;
384       for (i=0;i<(order+1);++i) {
385         if (i!=basis && j!=i) numtmp *= (xval-xLocVal[i]);
386       }
387       numerator += numtmp;
388     }
389   }
390 
391   return (numerator/denominator);
392 }
393 
394 /* --------------------------------------------------------------------- */
395 
396 static PetscReal ubdy(PetscReal x,PetscReal y)
397 {
398   return x*x*y*y;
399 }
400 
401 static PetscReal src(PetscReal x,PetscReal y)
402 {
403   return -2.*y*y - 2.*x*x;
404 }
405 /* --------------------------------------------------------------------- */
406 
407 static void leggaulob(PetscReal x1, PetscReal x2, PetscReal x[], PetscReal w[], int n)
408 /*******************************************************************************
409 Given the lower and upper limits of integration x1 and x2, and given n, this
410 routine returns arrays x[0..n-1] and w[0..n-1] of length n, containing the abscissas
411 and weights of the Gauss-Lobatto-Legendre n-point quadrature formula.
412 *******************************************************************************/
413 {
414   PetscInt    j,m;
415   PetscReal z1,z,xm,xl,q,qp,Ln,scale;
416   if (n==1) {
417     x[0] = x1;   /* Scale the root to the desired interval, */
418     x[1] = x2;   /* and put in its symmetric counterpart.   */
419     w[0] = 1.;   /* Compute the weight */
420     w[1] = 1.;   /* and its symmetric counterpart. */
421   } else {
422     x[0] = x1;   /* Scale the root to the desired interval, */
423     x[n] = x2;   /* and put in its symmetric counterpart.   */
424     w[0] = 2./(n*(n+1));   /* Compute the weight */
425     w[n] = 2./(n*(n+1));   /* and its symmetric counterpart. */
426     m    = (n+1)/2; /* The roots are symmetric, so we only find half of them. */
427     xm   = 0.5*(x2+x1);
428     xl   = 0.5*(x2-x1);
429     for (j=1; j<=(m-1); j++) { /* Loop over the desired roots. */
430       z=-1.0*PetscCosReal((PETSC_PI*(j+0.25)/(n))-(3.0/(8.0*n*PETSC_PI))*(1.0/(j+0.25)));
431       /* Starting with the above approximation to the ith root, we enter */
432       /* the main loop of refinement by Newton's method.                 */
433       do {
434         qAndLEvaluation(n,z,&q,&qp,&Ln);
435         z1 = z;
436         z  = z1-q/qp; /* Newton's method. */
437       } while (PetscAbsReal(z-z1) > 3.0e-11);
438       qAndLEvaluation(n,z,&q,&qp,&Ln);
439       x[j]   = xm+xl*z;      /* Scale the root to the desired interval, */
440       x[n-j] = xm-xl*z;      /* and put in its symmetric counterpart.   */
441       w[j]   = 2.0/(n*(n+1)*Ln*Ln);  /* Compute the weight */
442       w[n-j] = w[j];                 /* and its symmetric counterpart. */
443     }
444   }
445   if (n%2==0) {
446     qAndLEvaluation(n,0.0,&q,&qp,&Ln);
447     x[n/2]=(x2-x1)/2.0;
448     w[n/2]=2.0/(n*(n+1)*Ln*Ln);
449   }
450   /* scale the weights according to mapping from [-1,1] to [0,1] */
451   scale = (x2-x1)/2.0;
452   for (j=0; j<=n; ++j) w[j] = w[j]*scale;
453 }
454 
455 /******************************************************************************/
456 static void qAndLEvaluation(int n, PetscReal x, PetscReal *q, PetscReal *qp, PetscReal *Ln)
457 /*******************************************************************************
458 Compute the polynomial qn(x) = L_{N+1}(x) - L_{n-1}(x) and its derivative in
459 addition to L_N(x) as these are needed for the GLL points.  See the book titled
460 "Implementing Spectral Methods for Partial Differential Equations: Algorithms
461 for Scientists and Engineers" by David A. Kopriva.
462 *******************************************************************************/
463 {
464   PetscInt k;
465 
466   PetscReal Lnp;
467   PetscReal Lnp1, Lnp1p;
468   PetscReal Lnm1, Lnm1p;
469   PetscReal Lnm2, Lnm2p;
470 
471   Lnm1  = 1.0;
472   *Ln   = x;
473   Lnm1p = 0.0;
474   Lnp   = 1.0;
475 
476   for (k=2; k<=n; ++k) {
477     Lnm2  = Lnm1;
478     Lnm1  = *Ln;
479     Lnm2p = Lnm1p;
480     Lnm1p = Lnp;
481     *Ln   = (2.*k-1.)/(1.0*k)*x*Lnm1 - (k-1.)/(1.0*k)*Lnm2;
482     Lnp   = Lnm2p + (2.0*k-1)*Lnm1;
483   }
484   k     = n+1;
485   Lnp1  = (2.*k-1.)/(1.0*k)*x*(*Ln) - (k-1.)/(1.0*k)*Lnm1;
486   Lnp1p = Lnm1p + (2.0*k-1)*(*Ln);
487   *q    = Lnp1 - Lnm1;
488   *qp   = Lnp1p - Lnm1p;
489 }
490 
491 /*TEST
492 
493    test:
494       nsize: 2
495       args: -ksp_monitor_short
496 
497 TEST*/
498