1 static char help[] = "Solves two linear systems in parallel with KSP. The code\n\
2 illustrates repeated solution of linear systems with the same preconditioner\n\
3 method but different matrices (having the same nonzero structure). The code\n\
4 also uses multiple profiling stages. Input arguments are\n\
5 -m <size> : problem size\n\
6 -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";
7
8 /*
9 Include "petscksp.h" so that we can use KSP solvers. Note that this file
10 automatically includes:
11 petscsys.h - base PETSc routines petscvec.h - vectors
12 petscmat.h - matrices
13 petscis.h - index sets petscksp.h - Krylov subspace methods
14 petscviewer.h - viewers petscpc.h - preconditioners
15 */
16 #include <petscksp.h>
17
main(int argc,char ** args)18 int main(int argc, char **args)
19 {
20 KSP ksp; /* linear solver context */
21 Mat C; /* matrix */
22 Vec x, u, b; /* approx solution, RHS, exact solution */
23 PetscReal norm, bnorm; /* norm of solution error */
24 PetscScalar v, none = -1.0;
25 PetscInt Ii, J, ldim, low, high, iglobal, Istart, Iend;
26 PetscInt i, j, m = 3, n = 2, its;
27 PetscMPIInt size, rank;
28 PetscBool mat_nonsymmetric = PETSC_FALSE;
29 PetscBool testnewC = PETSC_FALSE, testscaledMat = PETSC_FALSE, single = PETSC_FALSE;
30 PetscLogStage stages[2];
31
32 PetscFunctionBeginUser;
33 PetscCall(PetscInitialize(&argc, &args, NULL, help));
34 PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
35 PetscCall(PetscOptionsHasName(NULL, NULL, "-pc_precision", &single));
36 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));
37 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
38 n = 2 * size;
39
40 /*
41 Set flag if we are doing a nonsymmetric problem; the default is symmetric.
42 */
43 PetscCall(PetscOptionsGetBool(NULL, NULL, "-mat_nonsym", &mat_nonsymmetric, NULL));
44
45 PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_scaledMat", &testscaledMat, NULL));
46
47 /*
48 Register two stages for separate profiling of the two linear solves.
49 Use the runtime option -log_view for a printout of performance
50 statistics at the program's conlusion.
51 */
52 PetscCall(PetscLogStageRegister("Original Solve", &stages[0]));
53 PetscCall(PetscLogStageRegister("Second Solve", &stages[1]));
54
55 /* -------------- Stage 0: Solve Original System ---------------------- */
56 /*
57 Indicate to PETSc profiling that we're beginning the first stage
58 */
59 PetscCall(PetscLogStagePush(stages[0]));
60
61 /*
62 Create parallel matrix, specifying only its global dimensions.
63 When using MatCreate(), the matrix format can be specified at
64 runtime. Also, the parallel partitioning of the matrix is
65 determined by PETSc at runtime.
66 */
67 PetscCall(MatCreate(PETSC_COMM_WORLD, &C));
68 PetscCall(MatSetSizes(C, PETSC_DECIDE, PETSC_DECIDE, m * n, m * n));
69 PetscCall(MatSetFromOptions(C));
70 PetscCall(MatSetUp(C));
71
72 /*
73 Currently, all PETSc parallel matrix formats are partitioned by
74 contiguous chunks of rows across the processors. Determine which
75 rows of the matrix are locally owned.
76 */
77 PetscCall(MatGetOwnershipRange(C, &Istart, &Iend));
78
79 /*
80 Set matrix entries matrix in parallel.
81 - Each processor needs to insert only elements that it owns
82 locally (but any non-local elements will be sent to the
83 appropriate processor during matrix assembly).
84 - Always specify global row and columns of matrix entries.
85 */
86 for (Ii = Istart; Ii < Iend; Ii++) {
87 v = -1.0;
88 i = Ii / n;
89 j = Ii - i * n;
90 if (i > 0) {
91 J = Ii - n;
92 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
93 }
94 if (i < m - 1) {
95 J = Ii + n;
96 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
97 }
98 if (j > 0) {
99 J = Ii - 1;
100 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
101 }
102 if (j < n - 1) {
103 J = Ii + 1;
104 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
105 }
106 v = 4.0;
107 PetscCall(MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
108 }
109
110 /*
111 Make the matrix nonsymmetric if desired
112 */
113 if (mat_nonsymmetric) {
114 for (Ii = Istart; Ii < Iend; Ii++) {
115 v = -1.5;
116 i = Ii / n;
117 if (i > 1) {
118 J = Ii - n - 1;
119 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
120 }
121 }
122 } else {
123 PetscCall(MatSetOption(C, MAT_SYMMETRIC, PETSC_TRUE));
124 PetscCall(MatSetOption(C, MAT_SYMMETRY_ETERNAL, PETSC_TRUE));
125 }
126
127 /*
128 Assemble matrix, using the 2-step process:
129 MatAssemblyBegin(), MatAssemblyEnd()
130 Computations can be done while messages are in transition
131 by placing code between these two statements.
132 */
133 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
134 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
135
136 /*
137 Create parallel vectors.
138 - When using VecSetSizes(), we specify only the vector's global
139 dimension; the parallel partitioning is determined at runtime.
140 - Note: We form 1 vector from scratch and then duplicate as needed.
141 */
142 PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
143 PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
144 PetscCall(VecSetFromOptions(u));
145 PetscCall(VecDuplicate(u, &b));
146 PetscCall(VecDuplicate(b, &x));
147
148 /*
149 Currently, all parallel PETSc vectors are partitioned by
150 contiguous chunks across the processors. Determine which
151 range of entries are locally owned.
152 */
153 PetscCall(VecGetOwnershipRange(x, &low, &high));
154
155 /*
156 Set elements within the exact solution vector in parallel.
157 - Each processor needs to insert only elements that it owns
158 locally (but any non-local entries will be sent to the
159 appropriate processor during vector assembly).
160 - Always specify global locations of vector entries.
161 */
162 PetscCall(VecGetLocalSize(x, &ldim));
163 for (i = 0; i < ldim; i++) {
164 iglobal = i + low;
165 v = (PetscScalar)(i + 100 * rank);
166 PetscCall(VecSetValues(u, 1, &iglobal, &v, INSERT_VALUES));
167 }
168
169 /*
170 Assemble vector, using the 2-step process:
171 VecAssemblyBegin(), VecAssemblyEnd()
172 Computations can be done while messages are in transition,
173 by placing code between these two statements.
174 */
175 PetscCall(VecAssemblyBegin(u));
176 PetscCall(VecAssemblyEnd(u));
177
178 /*
179 Compute right-hand-side vector
180 */
181 PetscCall(MatMult(C, u, b));
182
183 /*
184 Create linear solver context
185 */
186 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
187
188 /*
189 Set operators. Here the matrix that defines the linear system
190 also serves as the matrix from which the preconditioner is constructed.
191 */
192 PetscCall(KSPSetOperators(ksp, C, C));
193
194 /*
195 Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
196 */
197 PetscCall(KSPSetFromOptions(ksp));
198
199 /*
200 Solve linear system. Here we explicitly call KSPSetUp() for more
201 detailed performance monitoring of certain preconditioners, such
202 as ICC and ILU. This call is optional, as KSPSetUp() will
203 automatically be called within KSPSolve() if it hasn't been
204 called already.
205 */
206 PetscCall(KSPSetUp(ksp));
207 PetscCall(KSPSolve(ksp, b, x));
208
209 /*
210 Check the residual
211 */
212 PetscCall(VecAXPY(x, none, u));
213 PetscCall(VecNorm(x, NORM_2, &norm));
214 PetscCall(VecNorm(b, NORM_2, &bnorm));
215 PetscCall(KSPGetIterationNumber(ksp, &its));
216 if (!testscaledMat || (!single && norm / bnorm > 1.e-5)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its));
217
218 /* -------------- Stage 1: Solve Second System ---------------------- */
219 /*
220 Solve another linear system with the same method. We reuse the KSP
221 context, matrix and vector data structures, and hence save the
222 overhead of creating new ones.
223
224 Indicate to PETSc profiling that we're concluding the first
225 stage with PetscLogStagePop(), and beginning the second stage with
226 PetscLogStagePush().
227 */
228 PetscCall(PetscLogStagePop());
229 PetscCall(PetscLogStagePush(stages[1]));
230
231 /*
232 Initialize all matrix entries to zero. MatZeroEntries() retains the
233 nonzero structure of the matrix for sparse formats.
234 */
235 PetscCall(MatZeroEntries(C));
236
237 /*
238 Assemble matrix again. Note that we retain the same matrix data
239 structure and the same nonzero pattern; we just change the values
240 of the matrix entries.
241 */
242 for (i = 0; i < m; i++) {
243 for (j = 2 * rank; j < 2 * rank + 2; j++) {
244 v = -1.0;
245 Ii = j + n * i;
246 if (i > 0) {
247 J = Ii - n;
248 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
249 }
250 if (i < m - 1) {
251 J = Ii + n;
252 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
253 }
254 if (j > 0) {
255 J = Ii - 1;
256 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
257 }
258 if (j < n - 1) {
259 J = Ii + 1;
260 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
261 }
262 v = 6.0;
263 PetscCall(MatSetValues(C, 1, &Ii, 1, &Ii, &v, ADD_VALUES));
264 }
265 }
266 if (mat_nonsymmetric) {
267 for (Ii = Istart; Ii < Iend; Ii++) {
268 v = -1.5;
269 i = Ii / n;
270 if (i > 1) {
271 J = Ii - n - 1;
272 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, ADD_VALUES));
273 }
274 }
275 }
276 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
277 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
278
279 if (testscaledMat) {
280 PetscRandom rctx;
281
282 /* Scale a(0,0) and a(M-1,M-1) */
283 if (rank == 0) {
284 v = 6.0 * 0.00001;
285 Ii = 0;
286 J = 0;
287 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES));
288 } else if (rank == size - 1) {
289 v = 6.0 * 0.00001;
290 Ii = m * n - 1;
291 J = m * n - 1;
292 PetscCall(MatSetValues(C, 1, &Ii, 1, &J, &v, INSERT_VALUES));
293 }
294 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
295 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
296
297 /* Compute a new right-hand-side vector */
298 PetscCall(VecDestroy(&u));
299 PetscCall(VecCreate(PETSC_COMM_WORLD, &u));
300 PetscCall(VecSetSizes(u, PETSC_DECIDE, m * n));
301 PetscCall(VecSetFromOptions(u));
302
303 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rctx));
304 PetscCall(PetscRandomSetFromOptions(rctx));
305 PetscCall(VecSetRandom(u, rctx));
306 PetscCall(PetscRandomDestroy(&rctx));
307 PetscCall(VecAssemblyBegin(u));
308 PetscCall(VecAssemblyEnd(u));
309 }
310
311 PetscCall(PetscOptionsGetBool(NULL, NULL, "-test_newMat", &testnewC, NULL));
312 if (testnewC) {
313 /*
314 User may use a new matrix C with same nonzero pattern, e.g.
315 ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
316 */
317 Mat Ctmp;
318 PetscCall(MatDuplicate(C, MAT_COPY_VALUES, &Ctmp));
319 PetscCall(MatDestroy(&C));
320 PetscCall(MatDuplicate(Ctmp, MAT_COPY_VALUES, &C));
321 PetscCall(MatDestroy(&Ctmp));
322 }
323
324 PetscCall(MatMult(C, u, b));
325
326 /*
327 Set operators. Here the matrix that defines the linear system
328 also serves as the matrix from which the preconditioner is constructed.
329 */
330 PetscCall(KSPSetOperators(ksp, C, C));
331
332 /*
333 Solve linear system
334 */
335 PetscCall(KSPSetUp(ksp));
336 PetscCall(KSPSolve(ksp, b, x));
337
338 /*
339 Check the residual
340 */
341 PetscCall(VecAXPY(x, none, u));
342 PetscCall(VecNorm(x, NORM_2, &norm));
343 PetscCall(KSPGetIterationNumber(ksp, &its));
344 if (!testscaledMat || (!single && norm / bnorm > PETSC_SMALL)) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Relative norm of the residual %g, Iterations %" PetscInt_FMT "\n", (double)norm / (double)bnorm, its));
345
346 /*
347 Free work space. All PETSc objects should be destroyed when they
348 are no longer needed.
349 */
350 PetscCall(KSPDestroy(&ksp));
351 PetscCall(VecDestroy(&u));
352 PetscCall(VecDestroy(&x));
353 PetscCall(VecDestroy(&b));
354 PetscCall(MatDestroy(&C));
355
356 /*
357 Indicate to PETSc profiling that we're concluding the second stage
358 */
359 PetscCall(PetscLogStagePop());
360
361 PetscCall(PetscFinalize());
362 return 0;
363 }
364
365 /*TEST
366
367 test:
368 args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
369
370 test:
371 suffix: 2
372 nsize: 2
373 args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001
374
375 test:
376 suffix: 5
377 nsize: 2
378 args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor draw::draw_lg -ksp_monitor_true_residual draw::draw_lg
379
380 test:
381 suffix: asm
382 nsize: 4
383 args: -pc_type asm
384
385 test:
386 suffix: asm_baij
387 nsize: 4
388 args: -pc_type asm -mat_type baij
389 output_file: output/ex5_asm.out
390
391 test:
392 suffix: redundant_0
393 args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi
394
395 test:
396 suffix: redundant_1
397 nsize: 5
398 args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi
399
400 test:
401 suffix: redundant_2
402 nsize: 5
403 args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi
404
405 test:
406 suffix: redundant_3
407 nsize: 5
408 args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi
409
410 test:
411 suffix: redundant_4
412 nsize: 5
413 args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced
414
415 test:
416 suffix: superlu_dist
417 nsize: 15
418 requires: superlu_dist
419 args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat
420 output_file: output/empty.out
421
422 test:
423 suffix: superlu_dist_2
424 nsize: 15
425 requires: superlu_dist
426 args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
427 output_file: output/empty.out
428
429 test:
430 suffix: superlu_dist_3
431 nsize: 15
432 requires: superlu_dist
433 args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
434 output_file: output/empty.out
435
436 test:
437 suffix: superlu_dist_3s
438 nsize: 15
439 requires: superlu_dist defined(PETSC_HAVE_SUPERLU_DIST_SINGLE)
440 args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT -pc_precision single
441 output_file: output/empty.out
442
443 test:
444 suffix: superlu_dist_0
445 nsize: 1
446 requires: superlu_dist
447 args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
448 output_file: output/empty.out
449
450 TEST*/
451