1 /*
2 Poisson in 3D. Modeled by the PDE:
3
4 - delta u(x,y,z) = f(x,y,z)
5
6 With exact solution:
7
8 u(x,y,z) = 1.0
9
10 Example usage:
11
12 Run on GPU (requires respective backends installed):
13 ./bench_kspsolve -mat_type aijcusparse
14 ./bench_kspsolve -mat_type aijhipsparse
15 ./bench_kspsolve -mat_type aijkokkos
16
17 Test only MatMult:
18 ./bench_kspsolve -matmult
19
20 Test MatMult over 1000 iterations:
21 ./bench_kspsolve -matmult -its 1000
22
23 Change size of problem (e.g., use a 128x128x128 grid):
24 ./bench_kspsolve -n 128
25 */
26 static char help[] = "Solves 3D Laplacian with 27-point finite difference stencil.\n";
27
28 #include <petscksp.h>
29
30 typedef struct {
31 PetscMPIInt rank, size;
32 PetscInt n; /* global size in each dimension */
33 PetscInt dim; /* global size */
34 PetscInt nnz; /* local nonzeros */
35 PetscBool matmult; /* Do MatMult() only */
36 PetscBool debug; /* Debug flag for PreallocateCOO() */
37 PetscBool splitksp; /* Split KSPSolve and PCSetUp */
38 PetscInt its; /* No of matmult_iterations */
39 PetscInt Istart, Iend;
40 } AppCtx;
41
PreallocateCOO(Mat A,PetscCtx ctx)42 static PetscErrorCode PreallocateCOO(Mat A, PetscCtx ctx)
43 {
44 AppCtx *user = (AppCtx *)ctx;
45 PetscInt n = user->n, n2 = n * n, n1 = n - 1;
46 PetscInt xstart, ystart, zstart, xend, yend, zend, nm2 = n - 2, idx;
47 PetscInt nnz[] = {8, 12, 18, 27}; /* nnz for corner, edge, face, and center. */
48
49 PetscFunctionBeginUser;
50 xstart = user->Istart % n;
51 ystart = (user->Istart / n) % n;
52 zstart = user->Istart / n2;
53 xend = (user->Iend - 1) % n;
54 yend = ((user->Iend - 1) / n) % n;
55 zend = (user->Iend - 1) / n2;
56
57 /* bottom xy-plane */
58 user->nnz = 0;
59 idx = !zstart ? 0 : 1;
60 if (zstart == zend && (!zstart || zstart == n1)) idx = 0;
61 if (zstart == zend && (zstart && zstart < n1)) idx = 1;
62 if (!xstart && !ystart) // bottom left
63 user->nnz += 4 * nnz[idx] + 4 * nm2 * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
64 else if (xstart && xstart < n1 && !ystart) // bottom
65 user->nnz += 3 * nnz[idx] + (3 * nm2 + n1 - xstart) * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
66 else if (xstart == n1 && !ystart) // bottom right
67 user->nnz += 3 * nnz[idx] + (3 * nm2) * nnz[idx + 1] + nm2 * nm2 * nnz[idx + 2];
68 else if (!xstart && ystart && ystart < n1) // left
69 user->nnz += 2 * nnz[idx] + (nm2 + 2 * (n1 - ystart)) * nnz[idx + 1] + nm2 * (n1 - ystart) * nnz[idx + 2];
70 else if (xstart && xstart < n1 && ystart && ystart < n1) // center
71 user->nnz += 2 * nnz[idx] + (nm2 + (n1 - ystart) + (nm2 - ystart)) * nnz[idx + 1] + (nm2 * (nm2 - ystart) + (n1 - xstart)) * nnz[idx + 2];
72 else if (xstart == n1 && ystart && ystart < n1) // right
73 user->nnz += 2 * nnz[idx] + (nm2 + n1 - ystart + nm2 - ystart) * nnz[idx + 1] + nm2 * (nm2 - ystart) * nnz[idx + 2];
74 else if (ystart == n1 && !xstart) // top left
75 user->nnz += 2 * nnz[idx] + nm2 * nnz[idx + 1];
76 else if (ystart == n1 && xstart && xstart < n1) // top
77 user->nnz += nnz[idx] + (n1 - xstart) * nnz[idx + 1];
78 else if (xstart == n1 && ystart == n1) // top right
79 user->nnz += nnz[idx];
80
81 /* top and middle plane the same */
82 if (zend == zstart) user->nnz = user->nnz - 4 * nnz[idx] - 4 * nm2 * nnz[idx + 1] - nm2 * nm2 * nnz[idx + 2];
83
84 /* middle xy-planes */
85 if (zend - zstart > 1) user->nnz = user->nnz + (zend - zstart - 1) * (4 * nnz[1] + 4 * nnz[2] * nm2 + nnz[3] * nm2 * nm2);
86
87 /* top xy-plane */
88 idx = zend == n1 ? 0 : 1;
89 if (zstart == zend && (!zend || zend == n1)) idx = 0;
90 if (zstart == zend && (zend && zend < n1)) idx = 1;
91 if (!xend && !yend) // bottom left
92 user->nnz += nnz[idx];
93 else if (xend && xend < n1 && !yend) // bottom
94 user->nnz += nnz[idx] + xend * nnz[idx + 1];
95 else if (xend == n1 && !yend) // bottom right
96 user->nnz += 2 * nnz[idx] + nm2 * nnz[idx + 1];
97 else if (!xend && yend && yend < n1) // left
98 user->nnz += 2 * nnz[idx] + (nm2 + yend + yend - 1) * nnz[idx + 1] + nm2 * (yend - 1) * nnz[idx + 2];
99 else if (xend && xend < n1 && yend && yend < n1) // center
100 user->nnz += 2 * nnz[idx] + (nm2 + yend + yend - 1) * nnz[idx + 1] + (nm2 * (yend - 1) + xend) * nnz[idx + 2];
101 else if (xend == n1 && yend && yend < n1) // right
102 user->nnz += 2 * nnz[idx] + (nm2 + 2 * yend) * nnz[idx + 1] + (nm2 * yend) * nnz[idx + 2];
103 else if (!xend && yend == n1) // top left
104 user->nnz += 3 * nnz[idx] + (3 * nm2) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
105 else if (xend && xend < n1 && yend == n1) // top
106 user->nnz += 3 * nnz[idx] + (3 * nm2 + xend) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
107 else if (xend == n1 && yend == n1) // top right
108 user->nnz += 4 * nnz[idx] + (4 * nm2) * nnz[idx + 1] + (nm2 * nm2) * nnz[idx + 2];
109 if (user->debug)
110 PetscCall(PetscPrintf(PETSC_COMM_SELF, "rank %d: xyzstart = %" PetscInt_FMT ",%" PetscInt_FMT ",%" PetscInt_FMT ", xvzend = %" PetscInt_FMT ",%" PetscInt_FMT ",%" PetscInt_FMT ", nnz = %" PetscInt_FMT "\n", user->rank, xstart, ystart, zstart, xend, yend, zend,
111 user->nnz));
112 PetscFunctionReturn(PETSC_SUCCESS);
113 }
114
FillCOO(Mat A,PetscCtx ctx)115 PetscErrorCode FillCOO(Mat A, PetscCtx ctx)
116 {
117 AppCtx *user = (AppCtx *)ctx;
118 PetscInt Ii, x, y, z, n = user->n, n2 = n * n, n1 = n - 1;
119 PetscInt count = 0;
120 PetscInt *coo_i, *coo_j;
121 PetscScalar *coo_v;
122 PetscScalar h = 1.0 / (n - 1);
123 PetscScalar vcorn = -1.0 / 13 * h; //-1.0/12.0*h;
124 PetscScalar vedge = -3.0 / 26 * h; //-1.0/6.0*h;
125 PetscScalar vface = -3.0 / 13 * h; //-1.0e-9*h;
126 PetscScalar vcent = 44.0 / 13 * h; //8.0/3.0*h;
127
128 PetscFunctionBeginUser;
129 PetscCall(PetscCalloc3(user->nnz, &coo_i, user->nnz, &coo_j, user->nnz, &coo_v));
130 /* Fill COO arrays */
131 for (Ii = user->Istart; Ii < user->Iend; Ii++) {
132 x = Ii % n;
133 y = (Ii / n) % n;
134 z = Ii / n2;
135 if (x > 0 && y > 0 && z > 0) {
136 coo_i[count] = Ii;
137 coo_j[count] = Ii - 1 - n - n2;
138 coo_v[count] = vcorn;
139 count++;
140 }
141 if (x > 0 && y < n1 && z > 0) {
142 coo_i[count] = Ii;
143 coo_j[count] = Ii - 1 + n - n2;
144 coo_v[count] = vcorn;
145 count++;
146 }
147 if (x < n1 && y > 0 && z > 0) {
148 coo_i[count] = Ii;
149 coo_j[count] = Ii + 1 - n - n2;
150 coo_v[count] = vcorn;
151 count++;
152 }
153 if (x < n1 && y < n1 && z > 0) {
154 coo_i[count] = Ii;
155 coo_j[count] = Ii + 1 + n - n2;
156 coo_v[count] = vcorn;
157 count++;
158 }
159 if (x > 0 && y > 0 && z < n1) {
160 coo_i[count] = Ii;
161 coo_j[count] = Ii - 1 - n + n2;
162 coo_v[count] = vcorn;
163 count++;
164 }
165 if (x > 0 && y < n1 && z < n1) {
166 coo_i[count] = Ii;
167 coo_j[count] = Ii - 1 + n + n2;
168 coo_v[count] = vcorn;
169 count++;
170 }
171 if (x < n1 && y > 0 && z < n1) {
172 coo_i[count] = Ii;
173 coo_j[count] = Ii + 1 - n + n2;
174 coo_v[count] = vcorn;
175 count++;
176 }
177 if (x < n1 && y < n1 && z < n1) {
178 coo_i[count] = Ii;
179 coo_j[count] = Ii + 1 + n + n2;
180 coo_v[count] = vcorn;
181 count++;
182 }
183 /* 12 edges */
184 if (x > 0 && y > 0) {
185 coo_i[count] = Ii;
186 coo_j[count] = Ii - 1 - n;
187 coo_v[count] = vedge;
188 count++;
189 }
190 if (x > 0 && y < n1) {
191 coo_i[count] = Ii;
192 coo_j[count] = Ii - 1 + n;
193 coo_v[count] = vedge;
194 count++;
195 }
196 if (x < n1 && y > 0) {
197 coo_i[count] = Ii;
198 coo_j[count] = Ii + 1 - n;
199 coo_v[count] = vedge;
200 count++;
201 }
202 if (x < n1 && y < n1) {
203 coo_i[count] = Ii;
204 coo_j[count] = Ii + 1 + n;
205 coo_v[count] = vedge;
206 count++;
207 }
208 if (x > 0 && z > 0) {
209 coo_i[count] = Ii;
210 coo_j[count] = Ii - 1 - n2;
211 coo_v[count] = vedge;
212 count++;
213 }
214 if (x > 0 && z < n1) {
215 coo_i[count] = Ii;
216 coo_j[count] = Ii - 1 + n2;
217 coo_v[count] = vedge;
218 count++;
219 }
220 if (x < n1 && z > 0) {
221 coo_i[count] = Ii;
222 coo_j[count] = Ii + 1 - n2;
223 coo_v[count] = vedge;
224 count++;
225 }
226 if (x < n1 && z < n1) {
227 coo_i[count] = Ii;
228 coo_j[count] = Ii + 1 + n2;
229 coo_v[count] = vedge;
230 count++;
231 }
232 if (y > 0 && z > 0) {
233 coo_i[count] = Ii;
234 coo_j[count] = Ii - n - n2;
235 coo_v[count] = vedge;
236 count++;
237 }
238 if (y > 0 && z < n1) {
239 coo_i[count] = Ii;
240 coo_j[count] = Ii - n + n2;
241 coo_v[count] = vedge;
242 count++;
243 }
244 if (y < n1 && z > 0) {
245 coo_i[count] = Ii;
246 coo_j[count] = Ii + n - n2;
247 coo_v[count] = vedge;
248 count++;
249 }
250 if (y < n1 && z < n1) {
251 coo_i[count] = Ii;
252 coo_j[count] = Ii + n + n2;
253 coo_v[count] = vedge;
254 count++;
255 }
256 /* 6 faces */
257 if (x > 0) {
258 coo_i[count] = Ii;
259 coo_j[count] = Ii - 1;
260 coo_v[count] = vface;
261 count++;
262 }
263 if (x < n1) {
264 coo_i[count] = Ii;
265 coo_j[count] = Ii + 1;
266 coo_v[count] = vface;
267 count++;
268 }
269 if (y > 0) {
270 coo_i[count] = Ii;
271 coo_j[count] = Ii - n;
272 coo_v[count] = vface;
273 count++;
274 }
275 if (y < n1) {
276 coo_i[count] = Ii;
277 coo_j[count] = Ii + n;
278 coo_v[count] = vface;
279 count++;
280 }
281 if (z > 0) {
282 coo_i[count] = Ii;
283 coo_j[count] = Ii - n2;
284 coo_v[count] = vface;
285 count++;
286 }
287 if (z < n1) {
288 coo_i[count] = Ii;
289 coo_j[count] = Ii + n2;
290 coo_v[count] = vface;
291 count++;
292 }
293 /* Center */
294 coo_i[count] = Ii;
295 coo_j[count] = Ii;
296 coo_v[count] = vcent;
297 count++;
298 }
299 PetscCheck(count == user->nnz, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "Expected %" PetscInt_FMT " nonzeros but got %" PetscInt_FMT " nonzeros in COO format", user->nnz, count);
300 PetscCall(MatSetPreallocationCOO(A, user->nnz, coo_i, coo_j));
301 PetscCall(MatSetValuesCOO(A, coo_v, INSERT_VALUES));
302 PetscCall(PetscFree3(coo_i, coo_j, coo_v));
303 PetscFunctionReturn(PETSC_SUCCESS);
304 }
305
main(int argc,char ** argv)306 int main(int argc, char **argv)
307 {
308 AppCtx user; /* Application context */
309 Vec x, b, u; /* approx solution, RHS, and exact solution */
310 Mat A; /* linear system matrix */
311 KSP ksp; /* linear solver context */
312 PC pc; /* Preconditioner */
313 PetscReal norm; /* Error norm */
314 PetscInt nlocal = PETSC_DECIDE, ksp_its; /* Number of KSP iterations */
315 PetscInt global_nnz = 0; /* Total number of nonzeros */
316 PetscLogDouble time_start, time_mid1 = 0.0, time_mid2 = 0.0, time_end, time_avg, floprate;
317 PetscBool printTiming = PETSC_TRUE; /* If run in CI, do not print timing result */
318 PETSC_UNUSED PetscLogStage stage;
319
320 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
321 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &user.size));
322 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &user.rank));
323
324 user.n = 100; /* Default grid points per dimension */
325 user.matmult = PETSC_FALSE; /* Test MatMult only */
326 user.its = 100; /* Default no. of iterations for MatMult test */
327 user.debug = PETSC_FALSE; /* Debug PreallocateCOO() */
328 user.splitksp = PETSC_FALSE; /* Split KSPSolve and PCSetUp */
329 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &user.n, NULL));
330 PetscCall(PetscOptionsGetInt(NULL, NULL, "-its", &user.its, NULL));
331 PetscCall(PetscOptionsGetBool(NULL, NULL, "-matmult", &user.matmult, NULL));
332 PetscCall(PetscOptionsGetBool(NULL, NULL, "-debug", &user.debug, NULL));
333 PetscCall(PetscOptionsGetBool(NULL, NULL, "-print_timing", &printTiming, NULL));
334 PetscCall(PetscOptionsGetBool(NULL, NULL, "-split_ksp", &user.splitksp, NULL));
335
336 user.dim = user.n * user.n * user.n;
337 global_nnz = 64 + 27 * (user.n - 2) * (user.n - 2) * (user.n - 2) + 108 * (user.n - 2) * (user.n - 2) + 144 * (user.n - 2);
338 PetscCheck(user.n >= 2, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Requires at least 2 grid points (-n 2), you specified -n %" PetscInt_FMT, user.n);
339 PetscCheck(user.dim >= user.size, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "MPI size (%d) exceeds the grid size %" PetscInt_FMT " (-n %" PetscInt_FMT ")", user.size, user.dim, user.n);
340 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
341 if (user.matmult) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test: MatMult performance - Poisson\n"));
342 else PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Test: KSP performance - Poisson\n"));
343 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tInput matrix: 27-pt finite difference stencil\n"));
344 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\t-n %" PetscInt_FMT "\n", user.n));
345 if (user.matmult) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\t-its %" PetscInt_FMT "\n", user.its));
346 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tDoFs = %" PetscInt_FMT "\n", user.dim));
347 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\tNumber of nonzeros = %" PetscInt_FMT "\n", global_nnz));
348
349 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
350 /* Create the Vecs and Mat */
351 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
352 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nStep1 - creating Vecs and Mat...\n"));
353 PetscCall(PetscLogStageRegister("Step1 - Vecs and Mat", &stage));
354 PetscCall(PetscLogStagePush(stage));
355 PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
356 PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, user.dim, user.dim));
357 PetscCall(MatSetFromOptions(A));
358
359 /* cannot use MatGetOwnershipRange() because the matrix has yet to be preallocated; that happens in MatSetPreallocateCOO() */
360 PetscCall(PetscSplitOwnership(PetscObjectComm((PetscObject)A), &nlocal, &user.dim));
361 PetscCallMPI(MPI_Scan(&nlocal, &user.Istart, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)A)));
362 user.Istart -= nlocal;
363 user.Iend = user.Istart + nlocal;
364
365 PetscCall(PreallocateCOO(A, &user)); /* Determine local number of nonzeros */
366 PetscCall(FillCOO(A, &user)); /* Fill COO Matrix */
367 #if !defined(PETSC_HAVE_HIP) /* Due to errors in MatSolve_SeqAIJHIPSPARSE_ICC0() */
368 PetscCall(MatSetOption(A, MAT_SPD, PETSC_TRUE));
369 PetscCall(MatSetOption(A, MAT_SPD_ETERNAL, PETSC_TRUE));
370 #endif
371 PetscCall(MatCreateVecs(A, &u, &b));
372 if (!user.matmult) PetscCall(VecDuplicate(b, &x));
373 PetscCall(VecSet(u, 1.0)); /* Exact solution */
374 PetscCall(MatMult(A, u, b)); /* Compute RHS based on exact solution */
375 PetscCall(PetscLogStagePop());
376
377 if (user.matmult) {
378 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
379 /* MatMult */
380 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
381 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2 - running MatMult() %" PetscInt_FMT " times...\n", user.its));
382 PetscCall(PetscLogStageRegister("Step2 - MatMult", &stage));
383 PetscCall(PetscLogStagePush(stage));
384 PetscCall(PetscTime(&time_start));
385 for (int i = 0; i < user.its; i++) PetscCall(MatMult(A, u, b));
386 PetscCall(PetscTime(&time_end));
387 PetscCall(PetscLogStagePop());
388 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
389 /* Calculate Performance metrics */
390 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
391 time_avg = (time_end - time_start) / ((PetscLogDouble)user.its);
392 floprate = 2 * global_nnz / time_avg * 1e-9;
393 if (printTiming) {
394 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n%-15s%-7.5f seconds\n", "Average time:", time_avg));
395 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-9.3e Gflops/sec\n", "FOM:", floprate)); /* figure of merit */
396 }
397 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
398 } else {
399 if (!user.splitksp) {
400 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
401 /* Solve the linear system of equations */
402 /* Measure only time of PCSetUp() and KSPSolve() */
403 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
404 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2 - running KSPSolve()...\n"));
405 PetscCall(PetscLogStageRegister("Step2 - KSPSolve", &stage));
406 PetscCall(PetscLogStagePush(stage));
407 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
408 PetscCall(KSPSetErrorIfNotConverged(ksp, PETSC_TRUE));
409 PetscCall(KSPSetOperators(ksp, A, A));
410 PetscCall(KSPSetFromOptions(ksp));
411 PetscCall(PetscTime(&time_start));
412 PetscCall(KSPSolve(ksp, b, x));
413 PetscCall(PetscTime(&time_end));
414 PetscCall(PetscLogStagePop());
415 } else {
416 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
417 /* Solve the linear system of equations */
418 /* Measure only time of PCSetUp() and KSPSolve() */
419 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
420 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2a - running PCSetUp()...\n"));
421 PetscCall(PetscLogStageRegister("Step2a - PCSetUp", &stage));
422 PetscCall(PetscLogStagePush(stage));
423 PetscCall(KSPCreate(PETSC_COMM_WORLD, &ksp));
424 PetscCall(KSPSetErrorIfNotConverged(ksp, PETSC_TRUE));
425 PetscCall(KSPSetOperators(ksp, A, A));
426 PetscCall(KSPSetFromOptions(ksp));
427 PetscCall(KSPGetPC(ksp, &pc));
428 PetscCall(PetscTime(&time_start));
429 PetscCall(PCSetUp(pc));
430 PetscCall(PCSetUpOnBlocks(pc));
431 PetscCall(PetscTime(&time_mid1));
432 PetscCall(PetscLogStagePop());
433 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step2b - running KSPSolve()...\n"));
434 PetscCall(PetscLogStageRegister("Step2b - KSPSolve", &stage));
435 PetscCall(PetscLogStagePush(stage));
436 PetscCall(PetscTime(&time_mid2));
437 PetscCall(KSPSolve(ksp, b, x));
438 PetscCall(PetscTime(&time_end));
439 PetscCall(PetscLogStagePop());
440 }
441
442 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
443 /* Calculate error norm */
444 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
445 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Step3 - calculating error norm...\n"));
446 PetscCall(PetscLogStageRegister("Step3 - Error norm", &stage));
447 PetscCall(PetscLogStagePush(stage));
448 PetscCall(VecAXPY(x, -1.0, u));
449 PetscCall(VecNorm(x, NORM_2, &norm));
450 PetscCall(PetscLogStagePop());
451
452 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
453 /* Summary */
454 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
455 PetscCall(KSPGetIterationNumber(ksp, &ksp_its));
456 if (printTiming) {
457 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n%-15s%-1.3e\n", "Error norm:", (double)norm));
458 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-3" PetscInt_FMT "\n", "KSP iters:", ksp_its));
459 if (user.splitksp) {
460 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "PCSetUp:", time_mid1 - time_start));
461 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "KSPSolve:", time_end - time_mid2));
462 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "Total Solve:", time_end - time_start));
463 } else {
464 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-7.5f seconds\n", "KSPSolve:", time_end - time_start));
465 }
466 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%-15s%-1.3e DoFs/sec\n", "FOM:", user.dim / (time_end - time_start))); /* figure of merit */
467 }
468 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "===========================================\n"));
469 }
470 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
471 /* Free up memory */
472 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
473 if (!user.matmult) {
474 PetscCall(KSPDestroy(&ksp));
475 PetscCall(VecDestroy(&x));
476 }
477 PetscCall(VecDestroy(&u));
478 PetscCall(VecDestroy(&b));
479 PetscCall(MatDestroy(&A));
480 PetscCall(PetscFinalize());
481 return 0;
482 }
483
484 /*TEST
485
486 testset:
487 args: -print_timing false -matmult -its 10 -n 8
488 nsize: {{1 3}}
489 output_file: output/bench_kspsolve_matmult.out
490
491 test:
492 suffix: matmult
493
494 test:
495 suffix: hip_matmult
496 requires: hip
497 args: -mat_type aijhipsparse
498
499 test:
500 suffix: cuda_matmult
501 requires: cuda
502 args: -mat_type aijcusparse
503
504 test:
505 suffix: kok_matmult
506 requires: kokkos_kernels
507 args: -mat_type aijkokkos
508
509 testset:
510 args: -print_timing false -its 10 -n 8
511 nsize: {{3 5}}
512 output_file: output/bench_kspsolve_ksp.out
513
514 test:
515 suffix: ksp
516
517 test:
518 suffix: nbr
519 requires: defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
520 args: -sf_type neighbor -sf_neighbor_persistent {{0 1}}
521
522 test:
523 suffix: hip_ksp
524 requires: hip
525 args: -mat_type aijhipsparse -sub_pc_factor_mat_factor_on_host {{0 1}}
526
527 test:
528 suffix: cuda_ksp
529 requires: cuda
530 args: -mat_type aijcusparse -sub_pc_factor_mat_factor_on_host {{0 1}}
531
532 test:
533 suffix: kok_ksp_1
534 requires: kokkos_kernels
535 args: -mat_type aijkokkos -pc_type bjacobi -sub_pc_type {{ilu icc}}
536
537 test:
538 suffix: kok_ksp_2
539 requires: kokkos_kernels
540 args: -mat_type aijkokkos -pc_type bjacobi -sub_pc_type {{ilu icc}} -sub_pc_factor_mat_factor_on_host -sub_pc_factor_mat_solve_on_host {{0 1}}
541
542 test:
543 suffix: kok_hypre
544 requires: kokkos_kernels defined(PETSC_HAVE_HYPRE_DEVICE)
545 args: -mat_type aijkokkos -pc_type hypre
546
547 test:
548 suffix: kok_nbr
549 requires: kokkos_kernels defined(PETSC_HAVE_MPI_PERSISTENT_NEIGHBORHOOD_COLLECTIVES)
550 args: -mat_type aijkokkos -sf_type neighbor -sf_neighbor_persistent {{0 1}}
551
552 TEST*/
553