xref: /petsc/src/ksp/ksp/tutorials/ex74.c (revision 5d2a865bca654049dd32bfe43807bdf4c92c46f4)
1 static char help[] = "Solves the constant-coefficient 1D heat equation \n\
2 with an Implicit Runge-Kutta method using MatKAIJ.                  \n\
3                                                                     \n\
4     du      d^2 u                                                   \n\
5     --  = a ----- ; 0 <= x <= 1;                                    \n\
6     dt      dx^2                                                    \n\
7                                                                     \n\
8   with periodic boundary conditions                                 \n\
9                                                                     \n\
10 2nd order central discretization in space:                          \n\
11                                                                     \n\
12    [ d^2 u ]     u_{i+1} - 2u_i + u_{i-1}                           \n\
13    [ ----- ]  =  ------------------------                           \n\
14    [ dx^2  ]i              h^2                                      \n\
15                                                                     \n\
16     i = grid index;    h = x_{i+1}-x_i (Uniform)                    \n\
17     0 <= i < n         h = 1.0/n                                    \n\
18                                                                     \n\
19 Thus,                                                               \n\
20                                                                     \n\
21    du                                                               \n\
22    --  = Ju;  J = (a/h^2) tridiagonal(1,-2,1)_n                     \n\
23    dt                                                               \n\
24                                                                     \n\
25 Implicit Runge-Kutta method:                                        \n\
26                                                                     \n\
27   U^(k)   = u^n + dt \\sum_i a_{ki} JU^{i}                          \n\
28   u^{n+1} = u^n + dt \\sum_i b_i JU^{i}                             \n\
29                                                                     \n\
30   i = 1,...,s (s -> number of stages)                               \n\
31                                                                     \n\
32 At each time step, we solve                                         \n\
33                                                                     \n\
34  [  1                                  ]     1                      \n\
35  [ -- I \\otimes A^{-1} - J \\otimes I ] U = -- u^n \\otimes A^{-1} \n\
36  [ dt                                  ]     dt                     \n\
37                                                                     \n\
38   where A is the Butcher tableaux of the implicit                   \n\
39   Runge-Kutta method,                                               \n\
40                                                                     \n\
41 with MATKAIJ and KSP.                                               \n\
42                                                                     \n\
43 Available IRK Methods:                                              \n\
44   gauss       n-stage Gauss method                                  \n\
45                                                                     \n";
46 
47 /*
48   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
49   automatically includes:
50   petscsys.h      - base PETSc routines
51   petscvec.h      - vectors
52   petscmat.h      - matrices
53   petscis.h       - index sets
54   petscviewer.h   - viewers
55   petscpc.h       - preconditioners
56 */
57 #include <petscksp.h>
58 #include <petscdt.h>
59 
60 /* define the IRK methods available */
61 #define IRKGAUSS "gauss"
62 
63 typedef enum {
64   PHYSICS_DIFFUSION,
65   PHYSICS_ADVECTION
66 } PhysicsType;
67 const char *const PhysicsTypes[] = {"DIFFUSION", "ADVECTION", "PhysicsType", "PHYSICS_", NULL};
68 
69 typedef struct __context__ {
70   PetscReal   a;          /* diffusion coefficient      */
71   PetscReal   xmin, xmax; /* domain bounds              */
72   PetscInt    imax;       /* number of grid points      */
73   PetscInt    niter;      /* number of time iterations  */
74   PetscReal   dt;         /* time step size             */
75   PhysicsType physics_type;
76 } UserContext;
77 
78 static PetscErrorCode ExactSolution(Vec, void *, PetscReal);
79 static PetscErrorCode RKCreate_Gauss(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
80 static PetscErrorCode Assemble_AdvDiff(MPI_Comm, UserContext *, Mat *);
81 
main(int argc,char ** argv)82 int main(int argc, char **argv)
83 {
84   Vec               u, uex, rhs, z;
85   UserContext       ctxt;
86   PetscInt          nstages, is, ie, matis, matie, *ix, *ix2;
87   PetscInt          n, i, s, t, total_its;
88   PetscScalar      *A, *B, *At, *b, *zvals, one = 1.0;
89   PetscReal        *c, err, time;
90   Mat               Identity, J, TA, SC, R;
91   KSP               ksp;
92   PetscFunctionList IRKList      = NULL;
93   char              irktype[256] = IRKGAUSS;
94 
95   PetscFunctionBeginUser;
96   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
97   PetscCall(PetscFunctionListAdd(&IRKList, IRKGAUSS, RKCreate_Gauss));
98 
99   /* default value */
100   ctxt.a            = 1.0;
101   ctxt.xmin         = 0.0;
102   ctxt.xmax         = 1.0;
103   ctxt.imax         = 20;
104   ctxt.niter        = 0;
105   ctxt.dt           = 0.0;
106   ctxt.physics_type = PHYSICS_DIFFUSION;
107 
108   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "IRK options", "");
109   PetscCall(PetscOptionsReal("-a", "diffusion coefficient", "<1.0>", ctxt.a, &ctxt.a, NULL));
110   PetscCall(PetscOptionsInt("-imax", "grid size", "<20>", ctxt.imax, &ctxt.imax, NULL));
111   PetscCall(PetscOptionsReal("-xmin", "xmin", "<0.0>", ctxt.xmin, &ctxt.xmin, NULL));
112   PetscCall(PetscOptionsReal("-xmax", "xmax", "<1.0>", ctxt.xmax, &ctxt.xmax, NULL));
113   PetscCall(PetscOptionsInt("-niter", "number of time steps", "<0>", ctxt.niter, &ctxt.niter, NULL));
114   PetscCall(PetscOptionsReal("-dt", "time step size", "<0.0>", ctxt.dt, &ctxt.dt, NULL));
115   PetscCall(PetscOptionsFList("-irk_type", "IRK method family", "", IRKList, irktype, irktype, sizeof(irktype), NULL));
116   nstages = 2;
117   PetscCall(PetscOptionsInt("-irk_nstages", "Number of stages in IRK method", "", nstages, &nstages, NULL));
118   PetscCall(PetscOptionsEnum("-physics_type", "Type of process to discretize", "", PhysicsTypes, (PetscEnum)ctxt.physics_type, (PetscEnum *)&ctxt.physics_type, NULL));
119   PetscOptionsEnd();
120 
121   /* allocate and initialize solution vector and exact solution */
122   PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
123   PetscCall(VecSetSizes(u, PETSC_DECIDE, ctxt.imax));
124   PetscCall(VecSetFromOptions(u));
125   PetscCall(VecDuplicate(u, &uex));
126   /* initial solution */
127   PetscCall(ExactSolution(u, &ctxt, 0.0));
128   /* exact   solution */
129   PetscCall(ExactSolution(uex, &ctxt, ctxt.dt * ctxt.niter));
130 
131   { /* Create A,b,c */
132     PetscErrorCode (*irkcreate)(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
133     PetscCall(PetscFunctionListFind(IRKList, irktype, &irkcreate));
134     PetscCall((*irkcreate)(nstages, &A, &b, &c));
135   }
136   { /* Invert A */
137     /* PETSc does not provide a routine to calculate the inverse of a general matrix.
138      * To get the inverse of A, we form a sequential BAIJ matrix from it, consisting of a single block with block size
139      * equal to the dimension of A, and then use MatInvertBlockDiagonal(). */
140     Mat                A_baij;
141     PetscInt           idxm[1] = {0}, idxn[1] = {0};
142     const PetscScalar *A_inv;
143     PetscCall(MatCreateSeqBAIJ(PETSC_COMM_SELF, nstages, nstages, nstages, 1, NULL, &A_baij));
144     PetscCall(MatSetOption(A_baij, MAT_ROW_ORIENTED, PETSC_FALSE));
145     PetscCall(MatSetValuesBlocked(A_baij, 1, idxm, 1, idxn, A, INSERT_VALUES));
146     PetscCall(MatAssemblyBegin(A_baij, MAT_FINAL_ASSEMBLY));
147     PetscCall(MatAssemblyEnd(A_baij, MAT_FINAL_ASSEMBLY));
148     PetscCall(MatInvertBlockDiagonal(A_baij, &A_inv));
149     PetscCall(PetscArraycpy(A, A_inv, nstages * nstages));
150     PetscCall(MatDestroy(&A_baij));
151   }
152   /* Scale (1/dt)*A^{-1} and (1/dt)*b */
153   for (s = 0; s < nstages * nstages; s++) A[s] *= 1.0 / ctxt.dt;
154   for (s = 0; s < nstages; s++) b[s] *= (-ctxt.dt);
155 
156   /* Compute row sums At and identity B */
157   PetscCall(PetscMalloc2(nstages, &At, PetscSqr(nstages), &B));
158   for (s = 0; s < nstages; s++) {
159     At[s] = 0;
160     for (t = 0; t < nstages; t++) {
161       At[s] += A[s + nstages * t];        /* Row sums of  */
162       B[s + nstages * t] = 1. * (s == t); /* identity */
163     }
164   }
165 
166   /* allocate and calculate the (-J) matrix */
167   switch (ctxt.physics_type) {
168   case PHYSICS_ADVECTION:
169   case PHYSICS_DIFFUSION:
170     PetscCall(Assemble_AdvDiff(PETSC_COMM_WORLD, &ctxt, &J));
171   }
172   PetscCall(MatCreate(PETSC_COMM_WORLD, &Identity));
173   PetscCall(MatSetType(Identity, MATAIJ));
174   PetscCall(MatGetOwnershipRange(J, &matis, &matie));
175   PetscCall(MatSetSizes(Identity, matie - matis, matie - matis, ctxt.imax, ctxt.imax));
176   PetscCall(MatSetUp(Identity));
177   for (i = matis; i < matie; i++) PetscCall(MatSetValues(Identity, 1, &i, 1, &i, &one, INSERT_VALUES));
178   PetscCall(MatAssemblyBegin(Identity, MAT_FINAL_ASSEMBLY));
179   PetscCall(MatAssemblyEnd(Identity, MAT_FINAL_ASSEMBLY));
180 
181   /* Create the KAIJ matrix for solving the stages */
182   PetscCall(MatCreateKAIJ(J, nstages, nstages, A, B, &TA));
183 
184   /* Create the KAIJ matrix for step completion */
185   PetscCall(MatCreateKAIJ(J, 1, nstages, NULL, b, &SC));
186 
187   /* Create the KAIJ matrix to create the R for solving the stages */
188   PetscCall(MatCreateKAIJ(Identity, nstages, 1, NULL, At, &R));
189 
190   /* Create and set options for KSP */
191   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
192   PetscCall(KSPSetOperators(ksp, TA, TA));
193   PetscCall(KSPSetFromOptions(ksp));
194 
195   /* Allocate work and right-hand-side vectors */
196   PetscCall(VecCreate(PETSC_COMM_WORLD, &z));
197   PetscCall(VecSetFromOptions(z));
198   PetscCall(VecSetSizes(z, PETSC_DECIDE, ctxt.imax * nstages));
199   PetscCall(VecDuplicate(z, &rhs));
200 
201   PetscCall(VecGetOwnershipRange(u, &is, &ie));
202   PetscCall(PetscMalloc3(nstages, &ix, nstages, &zvals, ie - is, &ix2));
203   /* iterate in time */
204   for (n = 0, time = 0., total_its = 0; n < ctxt.niter; n++) {
205     PetscInt its;
206 
207     /* compute and set the right-hand side */
208     PetscCall(MatMult(R, u, rhs));
209 
210     /* Solve the system */
211     PetscCall(KSPSolve(ksp, rhs, z));
212     PetscCall(KSPGetIterationNumber(ksp, &its));
213     total_its += its;
214 
215     /* Update the solution */
216     PetscCall(MatMultAdd(SC, z, u, u));
217 
218     /* time step complete */
219     time += ctxt.dt;
220   }
221   PetscCall(PetscFree3(ix, ix2, zvals));
222 
223   /* Deallocate work and right-hand-side vectors */
224   PetscCall(VecDestroy(&z));
225   PetscCall(VecDestroy(&rhs));
226 
227   /* Calculate error in final solution */
228   PetscCall(VecAYPX(uex, -1.0, u));
229   PetscCall(VecNorm(uex, NORM_2, &err));
230   err = PetscSqrtReal(err * err / ((PetscReal)ctxt.imax));
231   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "L2 norm of the numerical error = %g (time=%g)\n", (double)err, (double)time));
232   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of time steps: %" PetscInt_FMT " (%" PetscInt_FMT " Krylov iterations)\n", ctxt.niter, total_its));
233 
234   /* Free up memory */
235   PetscCall(KSPDestroy(&ksp));
236   PetscCall(MatDestroy(&TA));
237   PetscCall(MatDestroy(&SC));
238   PetscCall(MatDestroy(&R));
239   PetscCall(MatDestroy(&J));
240   PetscCall(MatDestroy(&Identity));
241   PetscCall(PetscFree3(A, b, c));
242   PetscCall(PetscFree2(At, B));
243   PetscCall(VecDestroy(&uex));
244   PetscCall(VecDestroy(&u));
245   PetscCall(PetscFunctionListDestroy(&IRKList));
246 
247   PetscCall(PetscFinalize());
248   return 0;
249 }
250 
ExactSolution(Vec u,void * c,PetscReal t)251 PetscErrorCode ExactSolution(Vec u, void *c, PetscReal t)
252 {
253   UserContext *ctxt = (UserContext *)c;
254   PetscInt     i, is, ie;
255   PetscScalar *uarr;
256   PetscReal    x, dx, a = ctxt->a, pi = PETSC_PI;
257 
258   PetscFunctionBegin;
259   dx = (ctxt->xmax - ctxt->xmin) / ((PetscReal)ctxt->imax);
260   PetscCall(VecGetOwnershipRange(u, &is, &ie));
261   PetscCall(VecGetArray(u, &uarr));
262   for (i = is; i < ie; i++) {
263     x = i * dx;
264     switch (ctxt->physics_type) {
265     case PHYSICS_DIFFUSION:
266       uarr[i - is] = PetscExpScalar(-4.0 * pi * pi * a * t) * PetscSinScalar(2 * pi * x);
267       break;
268     case PHYSICS_ADVECTION:
269       uarr[i - is] = PetscSinScalar(2 * pi * (x - a * t));
270       break;
271     default:
272       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[ctxt->physics_type]);
273     }
274   }
275   PetscCall(VecRestoreArray(u, &uarr));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 /* Arrays should be freed with PetscFree3(A,b,c) */
RKCreate_Gauss(PetscInt nstages,PetscScalar ** gauss_A,PetscScalar ** gauss_b,PetscReal ** gauss_c)280 static PetscErrorCode RKCreate_Gauss(PetscInt nstages, PetscScalar **gauss_A, PetscScalar **gauss_b, PetscReal **gauss_c)
281 {
282   PetscScalar *A, *G0, *G1;
283   PetscReal   *b, *c;
284   PetscInt     i, j;
285   Mat          G0mat, G1mat, Amat;
286 
287   PetscFunctionBegin;
288   PetscCall(PetscMalloc3(PetscSqr(nstages), &A, nstages, gauss_b, nstages, &c));
289   PetscCall(PetscMalloc3(nstages, &b, PetscSqr(nstages), &G0, PetscSqr(nstages), &G1));
290   PetscCall(PetscDTGaussQuadrature(nstages, 0., 1., c, b));
291   for (i = 0; i < nstages; i++) (*gauss_b)[i] = b[i]; /* copy to possibly-complex array */
292 
293   /* A^T = G0^{-1} G1 */
294   for (i = 0; i < nstages; i++) {
295     for (j = 0; j < nstages; j++) {
296       G0[i * nstages + j] = PetscPowRealInt(c[i], j);
297       G1[i * nstages + j] = PetscPowRealInt(c[i], j + 1) / (j + 1);
298     }
299   }
300   /* The arrays above are row-aligned, but we create dense matrices as the transpose */
301   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G0, &G0mat));
302   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G1, &G1mat));
303   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, A, &Amat));
304   PetscCall(MatLUFactor(G0mat, NULL, NULL, NULL));
305   PetscCall(MatMatSolve(G0mat, G1mat, Amat));
306   PetscCall(MatTranspose(Amat, MAT_INPLACE_MATRIX, &Amat));
307 
308   PetscCall(MatDestroy(&G0mat));
309   PetscCall(MatDestroy(&G1mat));
310   PetscCall(MatDestroy(&Amat));
311   PetscCall(PetscFree3(b, G0, G1));
312   *gauss_A = A;
313   *gauss_c = c;
314   PetscFunctionReturn(PETSC_SUCCESS);
315 }
316 
Assemble_AdvDiff(MPI_Comm comm,UserContext * user,Mat * J)317 static PetscErrorCode Assemble_AdvDiff(MPI_Comm comm, UserContext *user, Mat *J)
318 {
319   PetscInt  matis, matie, i;
320   PetscReal dx, dx2;
321 
322   PetscFunctionBegin;
323   dx  = (user->xmax - user->xmin) / ((PetscReal)user->imax);
324   dx2 = dx * dx;
325   PetscCall(MatCreate(comm, J));
326   PetscCall(MatSetType(*J, MATAIJ));
327   PetscCall(MatSetSizes(*J, PETSC_DECIDE, PETSC_DECIDE, user->imax, user->imax));
328   PetscCall(MatSetUp(*J));
329   PetscCall(MatGetOwnershipRange(*J, &matis, &matie));
330   for (i = matis; i < matie; i++) {
331     PetscScalar values[3];
332     PetscInt    col[3];
333     switch (user->physics_type) {
334     case PHYSICS_DIFFUSION:
335       values[0] = -user->a * 1.0 / dx2;
336       values[1] = user->a * 2.0 / dx2;
337       values[2] = -user->a * 1.0 / dx2;
338       break;
339     case PHYSICS_ADVECTION:
340       values[0] = -user->a * .5 / dx;
341       values[1] = 0.;
342       values[2] = user->a * .5 / dx;
343       break;
344     default:
345       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[user->physics_type]);
346     }
347     /* periodic boundaries */
348     if (i == 0) {
349       col[0] = user->imax - 1;
350       col[1] = i;
351       col[2] = i + 1;
352     } else if (i == user->imax - 1) {
353       col[0] = i - 1;
354       col[1] = i;
355       col[2] = 0;
356     } else {
357       col[0] = i - 1;
358       col[1] = i;
359       col[2] = i + 1;
360     }
361     PetscCall(MatSetValues(*J, 1, &i, 3, col, values, INSERT_VALUES));
362   }
363   PetscCall(MatAssemblyBegin(*J, MAT_FINAL_ASSEMBLY));
364   PetscCall(MatAssemblyEnd(*J, MAT_FINAL_ASSEMBLY));
365   PetscFunctionReturn(PETSC_SUCCESS);
366 }
367 
368 /*TEST
369  testset:
370    args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -irk_type gauss -irk_nstages 2
371    test:
372      suffix: 1
373      requires: !single
374      args: -ksp_atol 1e-6 -vec_mdot_use_gemv {{0 1}} -vec_maxpy_use_gemv {{0 1}}
375    test:
376      requires: hpddm !single
377      suffix: hpddm
378      output_file: output/ex74_1.out
379      args: -ksp_atol 1e-6 -ksp_type hpddm
380    test:
381      requires: hpddm
382      suffix: hpddm_gcrodr
383      output_file: output/ex74_1_hpddm.out
384      args: -ksp_atol 1e-4 -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 2
385  test:
386    suffix: 2
387    args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100
388  testset:
389    suffix: 3
390    requires: !single
391    args: -a 1 -dt .33 -niter 3 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100 -physics_type advection
392    test:
393      args: -vec_mdot_use_gemv {{0 1}} -vec_maxpy_use_gemv {{0 1}}
394    test:
395      requires: hpddm
396      suffix: hpddm
397      output_file: output/ex74_3.out
398      args: -ksp_type hpddm
399    test:
400      requires: hpddm
401      suffix: hpddm_gcrodr
402      output_file: output/ex74_3_hpddm.out
403      args: -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 5
404 
405 TEST*/
406