xref: /petsc/src/ksp/ksp/tutorials/ex5.c (revision 609caa7c8c030312b00807b4f015fd827bb80932)
1 static char help[] = "Solves two linear systems in parallel with KSP.  The code\n\
2 illustrates repeated solution of linear systems with the same preconditioner\n\
3 method but different matrices (having the same nonzero structure).  The code\n\
4 also uses multiple profiling stages.  Input arguments are\n\
5   -m <size> : problem size\n\
6   -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";
7 
8 /*
9   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
10   automatically includes:
11      petscsys.h       - base PETSc routines   petscvec.h - vectors
12      petscmat.h - matrices
13      petscis.h     - index sets            petscksp.h - Krylov subspace methods
14      petscviewer.h - viewers               petscpc.h  - preconditioners
15 */
16 #include <petscksp.h>
17 
main(int argc,char ** args)18 int main(int argc, char **args)
19 {
20   KSP           ksp;         /* linear solver context */
21   Mat           C;           /* matrix */
22   Vec           x, u, b;     /* approx solution, RHS, exact solution */
23   PetscReal     norm, bnorm; /* norm of solution error */
24   PetscScalar   v, none = -1.0;
25   PetscInt      Ii, J, ldim, low, high, iglobal, Istart, Iend;
26   PetscInt      i, j, m = 3, n = 2, its;
27   PetscMPIInt   size, rank;
28   PetscBool     mat_nonsymmetric = PETSC_FALSE;
29   PetscBool     testnewC = PETSC_FALSE, testscaledMat = PETSC_FALSE, single = PETSC_FALSE;
30   PetscLogStage stages[2];
31 
32   PetscFunctionBeginUser;
33   PetscCall(PetscInitialize(&argc, &args, NULL, help));
34   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
35   PetscCall(PetscOptionsHasName(NULL, NULL, "-pc_precision", &single));
36   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
37   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
38   n = 2 * size;
39 
40   /*
41      Set flag if we are doing a nonsymmetric problem; the default is symmetric.
42   */
43   PetscCall(PetscOptionsGetBool(NULL, NULL, "-mat_nonsym", &mat_nonsymmetric, NULL));
44 
45   PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_scaledMat", &testscaledMat, NULL));
46 
47   /*
48      Register two stages for separate profiling of the two linear solves.
49      Use the runtime option -log_view for a printout of performance
50      statistics at the program's conlusion.
51   */
52   PetscCall(PetscLogStageRegister("Original Solve", &stages[0]));
53   PetscCall(PetscLogStageRegister("Second Solve", &stages[1]));
54 
55   /* -------------- Stage 0: Solve Original System ---------------------- */
56   /*
57      Indicate to PETSc profiling that we're beginning the first stage
58   */
59   PetscCall(PetscLogStagePush(stages[0]));
60 
61   /*
62      Create parallel matrix, specifying only its global dimensions.
63      When using MatCreate(), the matrix format can be specified at
64      runtime. Also, the parallel partitioning of the matrix is
65      determined by PETSc at runtime.
66   */
67   PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
68   PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
69   PetscCall(MatSetFromOptions(C));
70   PetscCall(MatSetUp(C));
71 
72   /*
73      Currently, all PETSc parallel matrix formats are partitioned by
74      contiguous chunks of rows across the processors.  Determine which
75      rows of the matrix are locally owned.
76   */
77   PetscCall(MatGetOwnershipRange(C, &Istart, &Iend));
78 
79   /*
80      Set matrix entries matrix in parallel.
81       - Each processor needs to insert only elements that it owns
82         locally (but any non-local elements will be sent to the
83         appropriate processor during matrix assembly).
84       - Always specify global row and columns of matrix entries.
85   */
86   for (Ii = Istart; Ii < Iend; Ii++) {
87     v = -1.0;
88     i = Ii / n;
89     j = Ii - i * n;
90     if (i > 0) {
91       J = Ii - n;
92       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
93     }
94     if (i < m - 1) {
95       J = Ii + n;
96       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
97     }
98     if (j > 0) {
99       J = Ii - 1;
100       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
101     }
102     if (j < n - 1) {
103       J = Ii + 1;
104       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
105     }
106     v = 4.0;
107     PetscCall(MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
108   }
109 
110   /*
111      Make the matrix nonsymmetric if desired
112   */
113   if (mat_nonsymmetric) {
114     for (Ii = Istart; Ii < Iend; Ii++) {
115       v = -1.5;
116       i = Ii / n;
117       if (i > 1) {
118         J = Ii - n - 1;
119         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
120       }
121     }
122   } else {
123     PetscCall(MatSetOption(C, MAT_SYMMETRIC, PETSC_TRUE));
124     PetscCall(MatSetOption(C, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
125   }
126 
127   /*
128      Assemble matrix, using the 2-step process:
129        MatAssemblyBegin(), MatAssemblyEnd()
130      Computations can be done while messages are in transition
131      by placing code between these two statements.
132   */
133   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
134   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
135 
136   /*
137      Create parallel vectors.
138       - When using VecSetSizes(), we specify only the vector's global
139         dimension; the parallel partitioning is determined at runtime.
140       - Note: We form 1 vector from scratch and then duplicate as needed.
141   */
142   PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
143   PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
144   PetscCall(VecSetFromOptions(u));
145   PetscCall(VecDuplicate(u, &b));
146   PetscCall(VecDuplicate(b, &x));
147 
148   /*
149      Currently, all parallel PETSc vectors are partitioned by
150      contiguous chunks across the processors.  Determine which
151      range of entries are locally owned.
152   */
153   PetscCall(VecGetOwnershipRange(x, &low, &high));
154 
155   /*
156     Set elements within the exact solution vector in parallel.
157      - Each processor needs to insert only elements that it owns
158        locally (but any non-local entries will be sent to the
159        appropriate processor during vector assembly).
160      - Always specify global locations of vector entries.
161   */
162   PetscCall(VecGetLocalSize(x, &ldim));
163   for (i = 0; i < ldim; i++) {
164     iglobal = i + low;
165     v       = (PetscScalar)(i + 100 * rank);
166     PetscCall(VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES));
167   }
168 
169   /*
170      Assemble vector, using the 2-step process:
171        VecAssemblyBegin(), VecAssemblyEnd()
172      Computations can be done while messages are in transition,
173      by placing code between these two statements.
174   */
175   PetscCall(VecAssemblyBegin(u));
176   PetscCall(VecAssemblyEnd(u));
177 
178   /*
179      Compute right-hand-side vector
180   */
181   PetscCall(MatMult(C, u, b));
182 
183   /*
184     Create linear solver context
185   */
186   PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
187 
188   /*
189      Set operators. Here the matrix that defines the linear system
190      also serves as the matrix from which the preconditioner is constructed.
191   */
192   PetscCall(KSPSetOperators(ksp, C, C));
193 
194   /*
195      Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
196   */
197   PetscCall(KSPSetFromOptions(ksp));
198 
199   /*
200      Solve linear system.  Here we explicitly call KSPSetUp() for more
201      detailed performance monitoring of certain preconditioners, such
202      as ICC and ILU.  This call is optional, as KSPSetUp() will
203      automatically be called within KSPSolve() if it hasn't been
204      called already.
205   */
206   PetscCall(KSPSetUp(ksp));
207   PetscCall(KSPSolve(ksp, b, x));
208 
209   /*
210      Check the residual
211   */
212   PetscCall(VecAXPY(x, none, u));
213   PetscCall(VecNorm(x, NORM_2, &norm));
214   PetscCall(VecNorm(b, NORM_2, &bnorm));
215   PetscCall(KSPGetIterationNumber(ksp, &its));
216   if (!testscaledMat || (!single && norm / bnorm > 1.e-5)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its));
217 
218   /* -------------- Stage 1: Solve Second System ---------------------- */
219   /*
220      Solve another linear system with the same method.  We reuse the KSP
221      context, matrix and vector data structures, and hence save the
222      overhead of creating new ones.
223 
224      Indicate to PETSc profiling that we're concluding the first
225      stage with PetscLogStagePop(), and beginning the second stage with
226      PetscLogStagePush().
227   */
228   PetscCall(PetscLogStagePop());
229   PetscCall(PetscLogStagePush(stages[1]));
230 
231   /*
232      Initialize all matrix entries to zero.  MatZeroEntries() retains the
233      nonzero structure of the matrix for sparse formats.
234   */
235   PetscCall(MatZeroEntries(C));
236 
237   /*
238      Assemble matrix again.  Note that we retain the same matrix data
239      structure and the same nonzero pattern; we just change the values
240      of the matrix entries.
241   */
242   for (i = 0; i < m; i++) {
243     for (j = 2 * rank; j < 2 * rank + 2; j++) {
244       v  = -1.0;
245       Ii = j + n * i;
246       if (i > 0) {
247         J = Ii - n;
248         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
249       }
250       if (i < m - 1) {
251         J = Ii + n;
252         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
253       }
254       if (j > 0) {
255         J = Ii - 1;
256         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
257       }
258       if (j < n - 1) {
259         J = Ii + 1;
260         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
261       }
262       v = 6.0;
263       PetscCall(MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
264     }
265   }
266   if (mat_nonsymmetric) {
267     for (Ii = Istart; Ii < Iend; Ii++) {
268       v = -1.5;
269       i = Ii / n;
270       if (i > 1) {
271         J = Ii - n - 1;
272         PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
273       }
274     }
275   }
276   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
277   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
278 
279   if (testscaledMat) {
280     PetscRandom rctx;
281 
282     /* Scale a(0,0) and a(M-1,M-1) */
283     if (rank == 0) {
284       v  = 6.0 * 0.00001;
285       Ii = 0;
286       J  = 0;
287       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES));
288     } else if (rank == size - 1) {
289       v  = 6.0 * 0.00001;
290       Ii = m * n - 1;
291       J  = m * n - 1;
292       PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES));
293     }
294     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
295     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
296 
297     /* Compute a new right-hand-side vector */
298     PetscCall(VecDestroy(&u));
299     PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
300     PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
301     PetscCall(VecSetFromOptions(u));
302 
303     PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
304     PetscCall(PetscRandomSetFromOptions(rctx));
305     PetscCall(VecSetRandom(u, rctx));
306     PetscCall(PetscRandomDestroy(&rctx));
307     PetscCall(VecAssemblyBegin(u));
308     PetscCall(VecAssemblyEnd(u));
309   }
310 
311   PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_newMat", &testnewC, NULL));
312   if (testnewC) {
313     /*
314      User may use a new matrix C with same nonzero pattern, e.g.
315       ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
316     */
317     Mat Ctmp;
318     PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &Ctmp));
319     PetscCall(MatDestroy(&C));
320     PetscCall(MatDuplicate(Ctmp, MAT_COPY_VALUES, &C));
321     PetscCall(MatDestroy(&Ctmp));
322   }
323 
324   PetscCall(MatMult(C, u, b));
325 
326   /*
327      Set operators. Here the matrix that defines the linear system
328      also serves as the matrix from which the preconditioner is constructed.
329   */
330   PetscCall(KSPSetOperators(ksp, C, C));
331 
332   /*
333      Solve linear system
334   */
335   PetscCall(KSPSetUp(ksp));
336   PetscCall(KSPSolve(ksp, b, x));
337 
338   /*
339      Check the residual
340   */
341   PetscCall(VecAXPY(x, none, u));
342   PetscCall(VecNorm(x, NORM_2, &norm));
343   PetscCall(KSPGetIterationNumber(ksp, &its));
344   if (!testscaledMat || (!single && norm / bnorm > PETSC_SMALL)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its));
345 
346   /*
347      Free work space.  All PETSc objects should be destroyed when they
348      are no longer needed.
349   */
350   PetscCall(KSPDestroy(&ksp));
351   PetscCall(VecDestroy(&u));
352   PetscCall(VecDestroy(&x));
353   PetscCall(VecDestroy(&b));
354   PetscCall(MatDestroy(&C));
355 
356   /*
357      Indicate to PETSc profiling that we're concluding the second stage
358   */
359   PetscCall(PetscLogStagePop());
360 
361   PetscCall(PetscFinalize());
362   return 0;
363 }
364 
365 /*TEST
366 
367    test:
368       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
369 
370    test:
371       suffix: 2
372       nsize: 2
373       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001
374 
375    test:
376       suffix: 5
377       nsize: 2
378       args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor draw::draw_lg -ksp_monitor_true_residual draw::draw_lg
379 
380    test:
381       suffix: asm
382       nsize: 4
383       args: -pc_type asm
384 
385    test:
386       suffix: asm_baij
387       nsize: 4
388       args: -pc_type asm -mat_type baij
389       output_file: output/ex5_asm.out
390 
391    test:
392       suffix: redundant_0
393       args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi
394 
395    test:
396       suffix: redundant_1
397       nsize: 5
398       args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi
399 
400    test:
401       suffix: redundant_2
402       nsize: 5
403       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi
404 
405    test:
406       suffix: redundant_3
407       nsize: 5
408       args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi
409 
410    test:
411       suffix: redundant_4
412       nsize: 5
413       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced
414 
415    test:
416       suffix: superlu_dist
417       nsize: 15
418       requires: superlu_dist
419       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat
420       output_file: output/empty.out
421 
422    test:
423       suffix: superlu_dist_2
424       nsize: 15
425       requires: superlu_dist
426       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
427       output_file: output/empty.out
428 
429    test:
430       suffix: superlu_dist_3
431       nsize: 15
432       requires: superlu_dist
433       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
434       output_file: output/empty.out
435 
436    test:
437       suffix: superlu_dist_3s
438       nsize: 15
439       requires: superlu_dist defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
440       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT -pc_precision single
441       output_file: output/empty.out
442 
443    test:
444       suffix: superlu_dist_0
445       nsize: 1
446       requires: superlu_dist
447       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
448       output_file: output/empty.out
449 
450 TEST*/
451