1 static char help[] = "Solve a small system and a large system through preloading\n\
2 Input arguments are:\n\
3 -permute <natural,rcm,nd,...> : solve system in permuted indexing\n\
4 -f0 <small_sys_binary> -f1 <large_sys_binary> \n\n";
5
6 /*
7 Include "petscksp.h" so that we can use KSP solvers. Note that this file
8 automatically includes:
9 petscsys.h - base PETSc routines petscvec.h - vectors
10 petscmat.h - matrices
11 petscis.h - index sets petscksp.h - Krylov subspace methods
12 petscviewer.h - viewers petscpc.h - preconditioners
13 */
14 #include <petscksp.h>
15
16 typedef enum {
17 RHS_FILE,
18 RHS_ONE,
19 RHS_RANDOM
20 } RHSType;
21 const char *const RHSTypes[] = {"FILE", "ONE", "RANDOM", "RHSType", "RHS_", NULL};
22
CheckResult(KSP * ksp,Mat * A,Vec * b,Vec * x,IS * rowperm)23 PetscErrorCode CheckResult(KSP *ksp, Mat *A, Vec *b, Vec *x, IS *rowperm)
24 {
25 PetscReal norm; /* norm of solution error */
26 PetscInt its;
27
28 PetscFunctionBegin;
29 PetscCall(KSPGetTotalIterations(*ksp, &its));
30 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of iterations = %" PetscInt_FMT "\n", its));
31
32 PetscCall(KSPGetResidualNorm(*ksp, &norm));
33 if (norm < 1.e-12) {
34 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Residual norm < 1.e-12\n"));
35 } else {
36 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Residual norm %e\n", (double)norm));
37 }
38
39 PetscCall(KSPDestroy(ksp));
40 PetscCall(MatDestroy(A));
41 PetscCall(VecDestroy(x));
42 PetscCall(VecDestroy(b));
43 PetscCall(ISDestroy(rowperm));
44 PetscFunctionReturn(PETSC_SUCCESS);
45 }
46
CreateSystem(const char filename[PETSC_MAX_PATH_LEN],RHSType rhstype,MatOrderingType ordering,PetscBool permute,IS * colperm_out,Mat * A_out,Vec * b_out,Vec * x_out)47 PetscErrorCode CreateSystem(const char filename[PETSC_MAX_PATH_LEN], RHSType rhstype, MatOrderingType ordering, PetscBool permute, IS *colperm_out, Mat *A_out, Vec *b_out, Vec *x_out)
48 {
49 Vec x, b, b2;
50 Mat A; /* linear system matrix */
51 PetscViewer viewer; /* viewer */
52 PetscBool same;
53 PetscInt j, len, start, idx, n1, n2;
54 const PetscScalar *val;
55 IS rowperm = NULL, colperm = NULL;
56
57 PetscFunctionBegin;
58 /* open binary file. Note that we use FILE_MODE_READ to indicate reading from this file */
59 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &viewer));
60
61 /* load the matrix and vector; then destroy the viewer */
62 PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
63 PetscCall(MatSetFromOptions(A));
64 PetscCall(MatLoad(A, viewer));
65 if (permute) {
66 Mat Aperm;
67 PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
68 PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
69 PetscCall(MatDestroy(&A));
70 A = Aperm; /* Replace original operator with permuted version */
71 }
72 switch (rhstype) {
73 case RHS_FILE:
74 /* Vectors in the file might a different size than the matrix so we need a
75 * Vec whose size hasn't been set yet. It'll get fixed below. Otherwise we
76 * can create the correct size Vec. */
77 PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
78 PetscCall(VecLoad(b, viewer));
79 break;
80 case RHS_ONE:
81 PetscCall(MatCreateVecs(A, &b, NULL));
82 PetscCall(VecSet(b, 1.0));
83 break;
84 case RHS_RANDOM:
85 PetscCall(MatCreateVecs(A, &b, NULL));
86 PetscCall(VecSetRandom(b, NULL));
87 break;
88 }
89 PetscCall(PetscViewerDestroy(&viewer));
90
91 /* if the loaded matrix is larger than the vector (due to being padded
92 to match the block size of the system), then create a new padded vector
93 */
94 PetscCall(MatGetLocalSize(A, NULL, &n1));
95 PetscCall(VecGetLocalSize(b, &n2));
96 same = (n1 == n2) ? PETSC_TRUE : PETSC_FALSE;
97 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &same, 1, MPI_C_BOOL, MPI_LAND, PETSC_COMM_WORLD));
98
99 if (!same) { /* create a new vector b by padding the old one */
100 PetscCall(VecCreate(PETSC_COMM_WORLD, &b2));
101 PetscCall(VecSetSizes(b2, n1, PETSC_DECIDE));
102 PetscCall(VecSetFromOptions(b2));
103 PetscCall(VecGetOwnershipRange(b, &start, NULL));
104 PetscCall(VecGetLocalSize(b, &len));
105 PetscCall(VecGetArrayRead(b, &val));
106 for (j = 0; j < len; j++) {
107 idx = start + j;
108 PetscCall(VecSetValues(b2, 1, &idx, val + j, INSERT_VALUES));
109 }
110 PetscCall(VecRestoreArrayRead(b, &val));
111 PetscCall(VecDestroy(&b));
112 PetscCall(VecAssemblyBegin(b2));
113 PetscCall(VecAssemblyEnd(b2));
114 b = b2;
115 }
116 PetscCall(VecDuplicate(b, &x));
117
118 if (permute) {
119 PetscCall(VecPermute(b, rowperm, PETSC_FALSE));
120 PetscCall(ISDestroy(&rowperm));
121 }
122
123 *b_out = b;
124 *x_out = x;
125 *A_out = A;
126 *colperm_out = colperm;
127 PetscFunctionReturn(PETSC_SUCCESS);
128 }
129
130 /* ATTENTION: this is the example used in the Profiling chapter of the PETSc manual,
131 where we referenced its profiling stages, preloading and output etc.
132 When you modify it, please make sure it is still consistent with the manual.
133 */
main(int argc,char ** args)134 int main(int argc, char **args)
135 {
136 Vec x, b;
137 Mat A; /* linear system matrix */
138 KSP ksp; /* Krylov subspace method context */
139 char file[2][PETSC_MAX_PATH_LEN], ordering[256] = MATORDERINGRCM;
140 RHSType rhstype = RHS_FILE;
141 PetscBool flg, preload = PETSC_FALSE, trans = PETSC_FALSE, permute = PETSC_FALSE;
142 IS colperm = NULL;
143
144 PetscFunctionBeginUser;
145 PetscCall(PetscInitialize(&argc, &args, NULL, help));
146
147 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Preloading example options", "");
148 {
149 /*
150 Determine files from which we read the two linear systems
151 (matrix and right-hand-side vector).
152 */
153 PetscCall(PetscOptionsBool("-trans", "Solve transpose system instead", "", trans, &trans, &flg));
154 PetscCall(PetscOptionsString("-f", "First file to load (small system)", "", file[0], file[0], sizeof(file[0]), &flg));
155 PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solve in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
156
157 if (flg) {
158 PetscCall(PetscStrncpy(file[1], file[0], sizeof(file[1])));
159 preload = PETSC_FALSE;
160 } else {
161 PetscCall(PetscOptionsString("-f0", "First file to load (small system)", "", file[0], file[0], sizeof(file[0]), &flg));
162 PetscCheck(flg, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate binary file with the -f0 or -f option");
163 PetscCall(PetscOptionsString("-f1", "Second file to load (larger system)", "", file[1], file[1], sizeof(file[1]), &flg));
164 if (!flg) preload = PETSC_FALSE; /* don't bother with second system */
165 }
166
167 PetscCall(PetscOptionsEnum("-rhs", "Right hand side", "", RHSTypes, (PetscEnum)rhstype, (PetscEnum *)&rhstype, NULL));
168 }
169 PetscOptionsEnd();
170
171 /*
172 To use preloading, one usually has code like the following:
173
174 PetscPreLoadBegin(preload,"first stage);
175 lines of code
176 PetscPreLoadStage("second stage");
177 lines of code
178 PetscPreLoadEnd();
179
180 The two macro PetscPreLoadBegin() and PetscPreLoadEnd() implicitly form a
181 loop with maximal two iterations, depending whether preloading is turned on or
182 not. If it is, either through the preload arg of PetscPreLoadBegin or through
183 -preload command line, the trip count is 2, otherwise it is 1. One can use the
184 predefined variable PetscPreLoadIt within the loop body to get the current
185 iteration number, which is 0 or 1. If preload is turned on, the runtime doesn't
186 do profiling for the first iteration, but it will do profiling for the second
187 iteration instead.
188
189 One can solve a small system in the first iteration and a large system in
190 the second iteration. This process preloads the instructions with the small
191 system so that more accurate performance monitoring (via -log_view) can be done
192 with the large one (that actually is the system of interest).
193
194 But in this example, we turned off preloading and duplicated the code for
195 the large system. In general, it is a bad practice and one should not duplicate
196 code. We do that because we want to show profiling stages for both the small
197 system and the large system.
198 */
199
200 /*=========================
201 solve a small system
202 =========================*/
203
204 PetscPreLoadBegin(preload, "Load System 0");
205 PetscCall(CreateSystem(file[0], rhstype, ordering, permute, &colperm, &A, &b, &x));
206
207 PetscPreLoadStage("KSPSetUp 0");
208 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
209 PetscCall(KSPSetOperators(ksp, A, A));
210 PetscCall(KSPSetFromOptions(ksp));
211
212 /*
213 Here we explicitly call KSPSetUp() and KSPSetUpOnBlocks() to
214 enable more precise profiling of setting up the preconditioner.
215 These calls are optional, since both will be called within
216 KSPSolve() if they haven't been called already.
217 */
218 PetscCall(KSPSetUp(ksp));
219 PetscCall(KSPSetUpOnBlocks(ksp));
220
221 PetscPreLoadStage("KSPSolve 0");
222 if (trans) PetscCall(KSPSolveTranspose(ksp, b, x));
223 else PetscCall(KSPSolve(ksp, b, x));
224
225 if (permute) PetscCall(VecPermute(x, colperm, PETSC_TRUE));
226
227 PetscCall(CheckResult(&ksp, &A, &b, &x, &colperm));
228
229 /*=========================
230 solve a large system
231 =========================*/
232
233 PetscPreLoadStage("Load System 1");
234
235 PetscCall(CreateSystem(file[1], rhstype, ordering, permute, &colperm, &A, &b, &x));
236
237 PetscPreLoadStage("KSPSetUp 1");
238 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
239 PetscCall(KSPSetOperators(ksp, A, A));
240 PetscCall(KSPSetFromOptions(ksp));
241
242 /*
243 Here we explicitly call KSPSetUp() and KSPSetUpOnBlocks() to
244 enable more precise profiling of setting up the preconditioner.
245 These calls are optional, since both will be called within
246 KSPSolve() if they haven't been called already.
247 */
248 PetscCall(KSPSetUp(ksp));
249 PetscCall(KSPSetUpOnBlocks(ksp));
250
251 PetscPreLoadStage("KSPSolve 1");
252 if (trans) PetscCall(KSPSolveTranspose(ksp, b, x));
253 else PetscCall(KSPSolve(ksp, b, x));
254
255 if (permute) PetscCall(VecPermute(x, colperm, PETSC_TRUE));
256
257 PetscCall(CheckResult(&ksp, &A, &b, &x, &colperm));
258
259 PetscPreLoadEnd();
260 /*
261 Always call PetscFinalize() before exiting a program. This routine
262 - finalizes the PETSc libraries as well as MPI
263 - provides summary and diagnostic information if certain runtime
264 options are chosen (e.g., -log_view).
265 */
266 PetscCall(PetscFinalize());
267 return 0;
268 }
269
270 /*TEST
271
272 test:
273 suffix: 1
274 nsize: 4
275 output_file: output/ex10_1.out
276 requires: datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES)
277 args: -f0 ${DATAFILESPATH}/matrices/medium -f1 ${DATAFILESPATH}/matrices/arco6 -ksp_gmres_classicalgramschmidt -mat_type baij -pc_type bjacobi
278
279 test:
280 suffix: 2
281 nsize: 4
282 output_file: output/ex10_2.out
283 requires: datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES)
284 args: -f0 ${DATAFILESPATH}/matrices/medium -f1 ${DATAFILESPATH}/matrices/arco6 -ksp_gmres_classicalgramschmidt -mat_type baij -pc_type bjacobi -trans
285
286 test:
287 suffix: 3
288 requires: double complex !defined(PETSC_USE_64BIT_INDICES)
289 args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/nh-complex-int32-float64 -ksp_type bicg
290
291 test:
292 suffix: 4
293 args: -f ${DATAFILESPATH}/matrices/medium -ksp_type bicg -permute rcm
294 requires: datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES)
295
296 TEST*/
297