1 static char help[] = "Solves the constant-coefficient 1D heat equation \n\
2 with an Implicit Runge-Kutta method using MatKAIJ. \n\
3 \n\
4 du d^2 u \n\
5 -- = a ----- ; 0 <= x <= 1; \n\
6 dt dx^2 \n\
7 \n\
8 with periodic boundary conditions \n\
9 \n\
10 2nd order central discretization in space: \n\
11 \n\
12 [ d^2 u ] u_{i+1} - 2u_i + u_{i-1} \n\
13 [ ----- ] = ------------------------ \n\
14 [ dx^2 ]i h^2 \n\
15 \n\
16 i = grid index; h = x_{i+1}-x_i (Uniform) \n\
17 0 <= i < n h = 1.0/n \n\
18 \n\
19 Thus, \n\
20 \n\
21 du \n\
22 -- = Ju; J = (a/h^2) tridiagonal(1,-2,1)_n \n\
23 dt \n\
24 \n\
25 Implicit Runge-Kutta method: \n\
26 \n\
27 U^(k) = u^n + dt \\sum_i a_{ki} JU^{i} \n\
28 u^{n+1} = u^n + dt \\sum_i b_i JU^{i} \n\
29 \n\
30 i = 1,...,s (s -> number of stages) \n\
31 \n\
32 At each time step, we solve \n\
33 \n\
34 [ 1 ] 1 \n\
35 [ -- I \\otimes A^{-1} - J \\otimes I ] U = -- u^n \\otimes A^{-1} \n\
36 [ dt ] dt \n\
37 \n\
38 where A is the Butcher tableaux of the implicit \n\
39 Runge-Kutta method, \n\
40 \n\
41 with MATKAIJ and KSP. \n\
42 \n\
43 Available IRK Methods: \n\
44 gauss n-stage Gauss method \n\
45 \n";
46
47 /*
48 Include "petscksp.h" so that we can use KSP solvers. Note that this file
49 automatically includes:
50 petscsys.h - base PETSc routines
51 petscvec.h - vectors
52 petscmat.h - matrices
53 petscis.h - index sets
54 petscviewer.h - viewers
55 petscpc.h - preconditioners
56 */
57 #include <petscksp.h>
58 #include <petscdt.h>
59
60 /* define the IRK methods available */
61 #define IRKGAUSS "gauss"
62
63 typedef enum {
64 PHYSICS_DIFFUSION,
65 PHYSICS_ADVECTION
66 } PhysicsType;
67 const char *const PhysicsTypes[] = {"DIFFUSION", "ADVECTION", "PhysicsType", "PHYSICS_", NULL};
68
69 typedef struct __context__ {
70 PetscReal a; /* diffusion coefficient */
71 PetscReal xmin, xmax; /* domain bounds */
72 PetscInt imax; /* number of grid points */
73 PetscInt niter; /* number of time iterations */
74 PetscReal dt; /* time step size */
75 PhysicsType physics_type;
76 } UserContext;
77
78 static PetscErrorCode ExactSolution(Vec, void *, PetscReal);
79 static PetscErrorCode RKCreate_Gauss(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
80 static PetscErrorCode Assemble_AdvDiff(MPI_Comm, UserContext *, Mat *);
81
main(int argc,char ** argv)82 int main(int argc, char **argv)
83 {
84 Vec u, uex, rhs, z;
85 UserContext ctxt;
86 PetscInt nstages, is, ie, matis, matie, *ix, *ix2;
87 PetscInt n, i, s, t, total_its;
88 PetscScalar *A, *B, *At, *b, *zvals, one = 1.0;
89 PetscReal *c, err, time;
90 Mat Identity, J, TA, SC, R;
91 KSP ksp;
92 PetscFunctionList IRKList = NULL;
93 char irktype[256] = IRKGAUSS;
94
95 PetscFunctionBeginUser;
96 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
97 PetscCall(PetscFunctionListAdd(&IRKList, IRKGAUSS, RKCreate_Gauss));
98
99 /* default value */
100 ctxt.a = 1.0;
101 ctxt.xmin = 0.0;
102 ctxt.xmax = 1.0;
103 ctxt.imax = 20;
104 ctxt.niter = 0;
105 ctxt.dt = 0.0;
106 ctxt.physics_type = PHYSICS_DIFFUSION;
107
108 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "IRK options", "");
109 PetscCall(PetscOptionsReal("-a", "diffusion coefficient", "<1.0>", ctxt.a, &ctxt.a, NULL));
110 PetscCall(PetscOptionsInt("-imax", "grid size", "<20>", ctxt.imax, &ctxt.imax, NULL));
111 PetscCall(PetscOptionsReal("-xmin", "xmin", "<0.0>", ctxt.xmin, &ctxt.xmin, NULL));
112 PetscCall(PetscOptionsReal("-xmax", "xmax", "<1.0>", ctxt.xmax, &ctxt.xmax, NULL));
113 PetscCall(PetscOptionsInt("-niter", "number of time steps", "<0>", ctxt.niter, &ctxt.niter, NULL));
114 PetscCall(PetscOptionsReal("-dt", "time step size", "<0.0>", ctxt.dt, &ctxt.dt, NULL));
115 PetscCall(PetscOptionsFList("-irk_type", "IRK method family", "", IRKList, irktype, irktype, sizeof(irktype), NULL));
116 nstages = 2;
117 PetscCall(PetscOptionsInt("-irk_nstages", "Number of stages in IRK method", "", nstages, &nstages, NULL));
118 PetscCall(PetscOptionsEnum("-physics_type", "Type of process to discretize", "", PhysicsTypes, (PetscEnum)ctxt.physics_type, (PetscEnum *)&ctxt.physics_type, NULL));
119 PetscOptionsEnd();
120
121 /* allocate and initialize solution vector and exact solution */
122 PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
123 PetscCall(VecSetSizes(u, PETSC_DECIDE, ctxt.imax));
124 PetscCall(VecSetFromOptions(u));
125 PetscCall(VecDuplicate(u, &uex));
126 /* initial solution */
127 PetscCall(ExactSolution(u, &ctxt, 0.0));
128 /* exact solution */
129 PetscCall(ExactSolution(uex, &ctxt, ctxt.dt * ctxt.niter));
130
131 { /* Create A,b,c */
132 PetscErrorCode (*irkcreate)(PetscInt, PetscScalar **, PetscScalar **, PetscReal **);
133 PetscCall(PetscFunctionListFind(IRKList, irktype, &irkcreate));
134 PetscCall((*irkcreate)(nstages, &A, &b, &c));
135 }
136 { /* Invert A */
137 /* PETSc does not provide a routine to calculate the inverse of a general matrix.
138 * To get the inverse of A, we form a sequential BAIJ matrix from it, consisting of a single block with block size
139 * equal to the dimension of A, and then use MatInvertBlockDiagonal(). */
140 Mat A_baij;
141 PetscInt idxm[1] = {0}, idxn[1] = {0};
142 const PetscScalar *A_inv;
143 PetscCall(MatCreateSeqBAIJ(PETSC_COMM_SELF, nstages, nstages, nstages, 1, NULL, &A_baij));
144 PetscCall(MatSetOption(A_baij, MAT_ROW_ORIENTED, PETSC_FALSE));
145 PetscCall(MatSetValuesBlocked(A_baij, 1, idxm, 1, idxn, A, INSERT_VALUES));
146 PetscCall(MatAssemblyBegin(A_baij, MAT_FINAL_ASSEMBLY));
147 PetscCall(MatAssemblyEnd(A_baij, MAT_FINAL_ASSEMBLY));
148 PetscCall(MatInvertBlockDiagonal(A_baij, &A_inv));
149 PetscCall(PetscArraycpy(A, A_inv, nstages * nstages));
150 PetscCall(MatDestroy(&A_baij));
151 }
152 /* Scale (1/dt)*A^{-1} and (1/dt)*b */
153 for (s = 0; s < nstages * nstages; s++) A[s] *= 1.0 / ctxt.dt;
154 for (s = 0; s < nstages; s++) b[s] *= (-ctxt.dt);
155
156 /* Compute row sums At and identity B */
157 PetscCall(PetscMalloc2(nstages, &At, PetscSqr(nstages), &B));
158 for (s = 0; s < nstages; s++) {
159 At[s] = 0;
160 for (t = 0; t < nstages; t++) {
161 At[s] += A[s + nstages * t]; /* Row sums of */
162 B[s + nstages * t] = 1. * (s == t); /* identity */
163 }
164 }
165
166 /* allocate and calculate the (-J) matrix */
167 switch (ctxt.physics_type) {
168 case PHYSICS_ADVECTION:
169 case PHYSICS_DIFFUSION:
170 PetscCall(Assemble_AdvDiff(PETSC_COMM_WORLD, &ctxt, &J));
171 }
172 PetscCall(MatCreate(PETSC_COMM_WORLD, &Identity));
173 PetscCall(MatSetType(Identity, MATAIJ));
174 PetscCall(MatGetOwnershipRange(J, &matis, &matie));
175 PetscCall(MatSetSizes(Identity, matie - matis, matie - matis, ctxt.imax, ctxt.imax));
176 PetscCall(MatSetUp(Identity));
177 for (i = matis; i < matie; i++) PetscCall(MatSetValues(Identity, 1, &i, 1, &i, &one, INSERT_VALUES));
178 PetscCall(MatAssemblyBegin(Identity, MAT_FINAL_ASSEMBLY));
179 PetscCall(MatAssemblyEnd(Identity, MAT_FINAL_ASSEMBLY));
180
181 /* Create the KAIJ matrix for solving the stages */
182 PetscCall(MatCreateKAIJ(J, nstages, nstages, A, B, &TA));
183
184 /* Create the KAIJ matrix for step completion */
185 PetscCall(MatCreateKAIJ(J, 1, nstages, NULL, b, &SC));
186
187 /* Create the KAIJ matrix to create the R for solving the stages */
188 PetscCall(MatCreateKAIJ(Identity, nstages, 1, NULL, At, &R));
189
190 /* Create and set options for KSP */
191 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
192 PetscCall(KSPSetOperators(ksp, TA, TA));
193 PetscCall(KSPSetFromOptions(ksp));
194
195 /* Allocate work and right-hand-side vectors */
196 PetscCall(VecCreate(PETSC_COMM_WORLD, &z));
197 PetscCall(VecSetFromOptions(z));
198 PetscCall(VecSetSizes(z, PETSC_DECIDE, ctxt.imax * nstages));
199 PetscCall(VecDuplicate(z, &rhs));
200
201 PetscCall(VecGetOwnershipRange(u, &is, &ie));
202 PetscCall(PetscMalloc3(nstages, &ix, nstages, &zvals, ie - is, &ix2));
203 /* iterate in time */
204 for (n = 0, time = 0., total_its = 0; n < ctxt.niter; n++) {
205 PetscInt its;
206
207 /* compute and set the right-hand side */
208 PetscCall(MatMult(R, u, rhs));
209
210 /* Solve the system */
211 PetscCall(KSPSolve(ksp, rhs, z));
212 PetscCall(KSPGetIterationNumber(ksp, &its));
213 total_its += its;
214
215 /* Update the solution */
216 PetscCall(MatMultAdd(SC, z, u, u));
217
218 /* time step complete */
219 time += ctxt.dt;
220 }
221 PetscCall(PetscFree3(ix, ix2, zvals));
222
223 /* Deallocate work and right-hand-side vectors */
224 PetscCall(VecDestroy(&z));
225 PetscCall(VecDestroy(&rhs));
226
227 /* Calculate error in final solution */
228 PetscCall(VecAYPX(uex, -1.0, u));
229 PetscCall(VecNorm(uex, NORM_2, &err));
230 err = PetscSqrtReal(err * err / ((PetscReal)ctxt.imax));
231 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "L2 norm of the numerical error = %g (time=%g)\n", (double)err, (double)time));
232 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of time steps: %" PetscInt_FMT " (%" PetscInt_FMT " Krylov iterations)\n", ctxt.niter, total_its));
233
234 /* Free up memory */
235 PetscCall(KSPDestroy(&ksp));
236 PetscCall(MatDestroy(&TA));
237 PetscCall(MatDestroy(&SC));
238 PetscCall(MatDestroy(&R));
239 PetscCall(MatDestroy(&J));
240 PetscCall(MatDestroy(&Identity));
241 PetscCall(PetscFree3(A, b, c));
242 PetscCall(PetscFree2(At, B));
243 PetscCall(VecDestroy(&uex));
244 PetscCall(VecDestroy(&u));
245 PetscCall(PetscFunctionListDestroy(&IRKList));
246
247 PetscCall(PetscFinalize());
248 return 0;
249 }
250
ExactSolution(Vec u,void * c,PetscReal t)251 PetscErrorCode ExactSolution(Vec u, void *c, PetscReal t)
252 {
253 UserContext *ctxt = (UserContext *)c;
254 PetscInt i, is, ie;
255 PetscScalar *uarr;
256 PetscReal x, dx, a = ctxt->a, pi = PETSC_PI;
257
258 PetscFunctionBegin;
259 dx = (ctxt->xmax - ctxt->xmin) / ((PetscReal)ctxt->imax);
260 PetscCall(VecGetOwnershipRange(u, &is, &ie));
261 PetscCall(VecGetArray(u, &uarr));
262 for (i = is; i < ie; i++) {
263 x = i * dx;
264 switch (ctxt->physics_type) {
265 case PHYSICS_DIFFUSION:
266 uarr[i - is] = PetscExpScalar(-4.0 * pi * pi * a * t) * PetscSinScalar(2 * pi * x);
267 break;
268 case PHYSICS_ADVECTION:
269 uarr[i - is] = PetscSinScalar(2 * pi * (x - a * t));
270 break;
271 default:
272 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[ctxt->physics_type]);
273 }
274 }
275 PetscCall(VecRestoreArray(u, &uarr));
276 PetscFunctionReturn(PETSC_SUCCESS);
277 }
278
279 /* Arrays should be freed with PetscFree3(A,b,c) */
RKCreate_Gauss(PetscInt nstages,PetscScalar ** gauss_A,PetscScalar ** gauss_b,PetscReal ** gauss_c)280 static PetscErrorCode RKCreate_Gauss(PetscInt nstages, PetscScalar **gauss_A, PetscScalar **gauss_b, PetscReal **gauss_c)
281 {
282 PetscScalar *A, *G0, *G1;
283 PetscReal *b, *c;
284 PetscInt i, j;
285 Mat G0mat, G1mat, Amat;
286
287 PetscFunctionBegin;
288 PetscCall(PetscMalloc3(PetscSqr(nstages), &A, nstages, gauss_b, nstages, &c));
289 PetscCall(PetscMalloc3(nstages, &b, PetscSqr(nstages), &G0, PetscSqr(nstages), &G1));
290 PetscCall(PetscDTGaussQuadrature(nstages, 0., 1., c, b));
291 for (i = 0; i < nstages; i++) (*gauss_b)[i] = b[i]; /* copy to possibly-complex array */
292
293 /* A^T = G0^{-1} G1 */
294 for (i = 0; i < nstages; i++) {
295 for (j = 0; j < nstages; j++) {
296 G0[i * nstages + j] = PetscPowRealInt(c[i], j);
297 G1[i * nstages + j] = PetscPowRealInt(c[i], j + 1) / (j + 1);
298 }
299 }
300 /* The arrays above are row-aligned, but we create dense matrices as the transpose */
301 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G0, &G0mat));
302 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, G1, &G1mat));
303 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nstages, nstages, A, &Amat));
304 PetscCall(MatLUFactor(G0mat, NULL, NULL, NULL));
305 PetscCall(MatMatSolve(G0mat, G1mat, Amat));
306 PetscCall(MatTranspose(Amat, MAT_INPLACE_MATRIX, &Amat));
307
308 PetscCall(MatDestroy(&G0mat));
309 PetscCall(MatDestroy(&G1mat));
310 PetscCall(MatDestroy(&Amat));
311 PetscCall(PetscFree3(b, G0, G1));
312 *gauss_A = A;
313 *gauss_c = c;
314 PetscFunctionReturn(PETSC_SUCCESS);
315 }
316
Assemble_AdvDiff(MPI_Comm comm,UserContext * user,Mat * J)317 static PetscErrorCode Assemble_AdvDiff(MPI_Comm comm, UserContext *user, Mat *J)
318 {
319 PetscInt matis, matie, i;
320 PetscReal dx, dx2;
321
322 PetscFunctionBegin;
323 dx = (user->xmax - user->xmin) / ((PetscReal)user->imax);
324 dx2 = dx * dx;
325 PetscCall(MatCreate(comm, J));
326 PetscCall(MatSetType(*J, MATAIJ));
327 PetscCall(MatSetSizes(*J, PETSC_DECIDE, PETSC_DECIDE, user->imax, user->imax));
328 PetscCall(MatSetUp(*J));
329 PetscCall(MatGetOwnershipRange(*J, &matis, &matie));
330 for (i = matis; i < matie; i++) {
331 PetscScalar values[3];
332 PetscInt col[3];
333 switch (user->physics_type) {
334 case PHYSICS_DIFFUSION:
335 values[0] = -user->a * 1.0 / dx2;
336 values[1] = user->a * 2.0 / dx2;
337 values[2] = -user->a * 1.0 / dx2;
338 break;
339 case PHYSICS_ADVECTION:
340 values[0] = -user->a * .5 / dx;
341 values[1] = 0.;
342 values[2] = user->a * .5 / dx;
343 break;
344 default:
345 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for physics type %s", PhysicsTypes[user->physics_type]);
346 }
347 /* periodic boundaries */
348 if (i == 0) {
349 col[0] = user->imax - 1;
350 col[1] = i;
351 col[2] = i + 1;
352 } else if (i == user->imax - 1) {
353 col[0] = i - 1;
354 col[1] = i;
355 col[2] = 0;
356 } else {
357 col[0] = i - 1;
358 col[1] = i;
359 col[2] = i + 1;
360 }
361 PetscCall(MatSetValues(*J, 1, &i, 3, col, values, INSERT_VALUES));
362 }
363 PetscCall(MatAssemblyBegin(*J, MAT_FINAL_ASSEMBLY));
364 PetscCall(MatAssemblyEnd(*J, MAT_FINAL_ASSEMBLY));
365 PetscFunctionReturn(PETSC_SUCCESS);
366 }
367
368 /*TEST
369 testset:
370 args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -irk_type gauss -irk_nstages 2
371 test:
372 suffix: 1
373 requires: !single
374 args: -ksp_atol 1e-6 -vec_mdot_use_gemv {{0 1}} -vec_maxpy_use_gemv {{0 1}}
375 test:
376 requires: hpddm !single
377 suffix: hpddm
378 output_file: output/ex74_1.out
379 args: -ksp_atol 1e-6 -ksp_type hpddm
380 test:
381 requires: hpddm
382 suffix: hpddm_gcrodr
383 output_file: output/ex74_1_hpddm.out
384 args: -ksp_atol 1e-4 -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 2
385 test:
386 suffix: 2
387 args: -a 0.1 -dt .125 -niter 5 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100
388 testset:
389 suffix: 3
390 requires: !single
391 args: -a 1 -dt .33 -niter 3 -imax 40 -ksp_monitor_short -pc_type pbjacobi -ksp_atol 1e-6 -irk_type gauss -irk_nstages 4 -ksp_gmres_restart 100 -physics_type advection
392 test:
393 args: -vec_mdot_use_gemv {{0 1}} -vec_maxpy_use_gemv {{0 1}}
394 test:
395 requires: hpddm
396 suffix: hpddm
397 output_file: output/ex74_3.out
398 args: -ksp_type hpddm
399 test:
400 requires: hpddm
401 suffix: hpddm_gcrodr
402 output_file: output/ex74_3_hpddm.out
403 args: -ksp_view_final_residual -ksp_type hpddm -ksp_hpddm_type gcrodr -ksp_hpddm_recycle 5
404
405 TEST*/
406