1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
2
3 #include <petscdmda_kokkos.hpp>
4 #include <petscsnes.h>
5
6 /*
7 User-defined application context
8 */
9 typedef struct {
10 DM da; /* distributed array */
11 Vec F; /* right-hand side of PDE */
12 PetscReal h; /* mesh spacing */
13 } ApplicationCtx;
14
15 /* ------------------------------------------------------------------- */
16 /*
17 FormInitialGuess - Computes initial guess.
18
19 Input/Output Parameter:
20 . x - the solution vector
21 */
FormInitialGuess(Vec x)22 PetscErrorCode FormInitialGuess(Vec x)
23 {
24 PetscScalar pfive = .50;
25
26 PetscFunctionBeginUser;
27 PetscCall(VecSet(x, pfive));
28 PetscFunctionReturn(PETSC_SUCCESS);
29 }
30
31 /* ------------------------------------------------------------------- */
32 /*
33 CpuFunction - Evaluates nonlinear function, F(x) on CPU
34
35 Input Parameters:
36 . snes - the SNES context
37 . x - input vector
38 . ctx - optional user-defined context, as set by SNESSetFunction()
39
40 Output Parameter:
41 . r - function vector
42
43 Note:
44 The user-defined context can contain any application-specific
45 data needed for the function evaluation.
46 */
CpuFunction(SNES snes,Vec x,Vec r,PetscCtx ctx)47 PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, PetscCtx ctx)
48 {
49 ApplicationCtx *user = (ApplicationCtx *)ctx;
50 DM da = user->da;
51 PetscScalar *X, *R, *F, d;
52 PetscInt i, M, xs, xm;
53 Vec xl;
54
55 PetscFunctionBeginUser;
56 PetscCall(DMGetLocalVector(da, &xl));
57 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
58 PetscCall(DMDAVecGetArray(da, xl, &X));
59 PetscCall(DMDAVecGetArray(da, r, &R));
60 PetscCall(DMDAVecGetArray(da, user->F, &F));
61
62 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
63 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
64
65 if (xs == 0) { /* left boundary */
66 R[0] = X[0];
67 xs++;
68 xm--;
69 }
70 if (xs + xm == M) { /* right boundary */
71 R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
72 xm--;
73 }
74 d = 1.0 / (user->h * user->h);
75 for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];
76
77 PetscCall(DMDAVecRestoreArray(da, xl, &X));
78 PetscCall(DMDAVecRestoreArray(da, r, &R));
79 PetscCall(DMDAVecRestoreArray(da, user->F, &F));
80 PetscCall(DMRestoreLocalVector(da, &xl));
81 PetscFunctionReturn(PETSC_SUCCESS);
82 }
83
84 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace;
85 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
86 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
87 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;
88
KokkosFunction(SNES snes,Vec x,Vec r,PetscCtx ctx)89 PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, PetscCtx ctx)
90 {
91 ApplicationCtx *user = (ApplicationCtx *)ctx;
92 DM da = user->da;
93 PetscScalar d;
94 PetscInt M;
95 Vec xl;
96 PetscScalarKokkosOffsetView R;
97 ConstPetscScalarKokkosOffsetView X, F;
98
99 PetscFunctionBeginUser;
100 PetscCall(DMGetLocalVector(da, &xl));
101 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
102 d = 1.0 / (user->h * user->h);
103 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
104 PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X)); /* read only */
105 PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R)); /* write only */
106 PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */
107 Kokkos::parallel_for(
108 Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
109 if (i == 0) R(0) = X(0); /* left boundary */
110 else if (i == M - 1) R(i) = X(i) - 1.0; /* right boundary */
111 else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
112 });
113 PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X));
114 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R));
115 PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F));
116 PetscCall(DMRestoreLocalVector(da, &xl));
117 PetscFunctionReturn(PETSC_SUCCESS);
118 }
119
StubFunction(SNES snes,Vec x,Vec r,PetscCtx ctx)120 PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, PetscCtx ctx)
121 {
122 ApplicationCtx *user = (ApplicationCtx *)ctx;
123 DM da = user->da;
124 Vec rk;
125 PetscReal norm = 0;
126
127 PetscFunctionBeginUser;
128 PetscCall(DMGetGlobalVector(da, &rk));
129 PetscCall(CpuFunction(snes, x, r, ctx));
130 PetscCall(KokkosFunction(snes, x, rk, ctx));
131 PetscCall(VecAXPY(rk, -1.0, r));
132 PetscCall(VecNorm(rk, NORM_2, &norm));
133 PetscCall(DMRestoreGlobalVector(da, &rk));
134 PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm);
135 PetscFunctionReturn(PETSC_SUCCESS);
136 }
137 /* ------------------------------------------------------------------- */
138 /*
139 FormJacobian - Evaluates Jacobian matrix.
140
141 Input Parameters:
142 . snes - the SNES context
143 . x - input vector
144 . dummy - optional user-defined context (not used here)
145
146 Output Parameters:
147 . jac - Jacobian matrix
148 . B - optionally different matrix used to construct the preconditioner
149
150 */
FormJacobian(SNES snes,Vec x,Mat jac,Mat B,PetscCtx ctx)151 PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, PetscCtx ctx)
152 {
153 ApplicationCtx *user = (ApplicationCtx *)ctx;
154 PetscScalar *xx, d, A[3];
155 PetscInt i, j[3], M, xs, xm;
156 DM da = user->da;
157
158 PetscFunctionBeginUser;
159 /*
160 Get pointer to vector data
161 */
162 PetscCall(DMDAVecGetArrayRead(da, x, &xx));
163 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
164
165 /*
166 Get range of locally owned matrix
167 */
168 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
169
170 /*
171 Determine starting and ending local indices for interior grid points.
172 Set Jacobian entries for boundary points.
173 */
174
175 if (xs == 0) { /* left boundary */
176 i = 0;
177 A[0] = 1.0;
178
179 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
180 xs++;
181 xm--;
182 }
183 if (xs + xm == M) { /* right boundary */
184 i = M - 1;
185 A[0] = 1.0;
186 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
187 xm--;
188 }
189
190 /*
191 Interior grid points
192 - Note that in this case we set all elements for a particular
193 row at once.
194 */
195 d = 1.0 / (user->h * user->h);
196 for (i = xs; i < xs + xm; i++) {
197 j[0] = i - 1;
198 j[1] = i;
199 j[2] = i + 1;
200 A[0] = A[2] = d;
201 A[1] = -2.0 * d + 2.0 * xx[i];
202 PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES));
203 }
204
205 /*
206 Assemble matrix, using the 2-step process:
207 MatAssemblyBegin(), MatAssemblyEnd().
208 By placing code between these two statements, computations can be
209 done while messages are in transition.
210
211 Also, restore vector.
212 */
213
214 PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
215 PetscCall(DMDAVecRestoreArrayRead(da, x, &xx));
216 PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
217 PetscFunctionReturn(PETSC_SUCCESS);
218 }
219
main(int argc,char ** argv)220 int main(int argc, char **argv)
221 {
222 SNES snes; /* SNES context */
223 Mat J; /* Jacobian matrix */
224 ApplicationCtx ctx; /* user-defined context */
225 Vec x, r, U, F; /* vectors */
226 PetscScalar none = -1.0;
227 PetscInt its, N = 5, maxit, maxf;
228 PetscReal abstol, rtol, stol, norm;
229 PetscBool viewinitial = PETSC_FALSE;
230
231 PetscFunctionBeginUser;
232 PetscCall(PetscInitialize(&argc, &argv, nullptr, help));
233 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
234 ctx.h = 1.0 / (N - 1);
235 PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL));
236
237 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
238 Create nonlinear solver context
239 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
240 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
241
242 /*
243 Create distributed array (DMDA) to manage parallel grid and vectors
244 */
245 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da));
246 PetscCall(DMSetFromOptions(ctx.da));
247 PetscCall(DMSetUp(ctx.da));
248
249 /*
250 Extract global and local vectors from DMDA; then duplicate for remaining
251 vectors that are the same types
252 */
253 PetscCall(DMCreateGlobalVector(ctx.da, &x));
254 PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution"));
255 PetscCall(VecDuplicate(x, &r));
256 PetscCall(VecDuplicate(x, &F));
257 ctx.F = F;
258 PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function"));
259 PetscCall(VecDuplicate(x, &U));
260 PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution"));
261
262 /*
263 Set function evaluation routine and vector. Whenever the nonlinear
264 solver needs to compute the nonlinear function, it will call this
265 routine.
266 - Note that the final routine argument is the user-defined
267 context that provides application-specific data for the
268 function evaluation routine.
269
270 At the beginning, one can use a stub function that checks the Kokkos version
271 against the CPU version to quickly expose errors.
272 PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx));
273 */
274 PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx));
275
276 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
277 Create matrix data structure; set Jacobian evaluation routine
278 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
279 PetscCall(DMCreateMatrix(ctx.da, &J));
280
281 /*
282 Set Jacobian matrix data structure and default Jacobian evaluation
283 routine. Whenever the nonlinear solver needs to compute the
284 Jacobian matrix, it will call this routine.
285 - Note that the final routine argument is the user-defined
286 context that provides application-specific data for the
287 Jacobian evaluation routine.
288 */
289 PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx));
290 PetscCall(SNESSetFromOptions(snes));
291 PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf));
292 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf));
293
294 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
295 Initialize application:
296 Store forcing function of PDE and exact solution
297 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
298 {
299 PetscScalarKokkosOffsetView FF, UU;
300 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF));
301 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU));
302 Kokkos::parallel_for(
303 Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
304 PetscReal xp = i * ctx.h;
305 FF(i) = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
306 UU(i) = xp * xp * xp;
307 });
308 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF));
309 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU));
310 }
311
312 if (viewinitial) {
313 PetscCall(VecView(U, NULL));
314 PetscCall(VecView(F, NULL));
315 }
316
317 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
318 Evaluate initial guess; then solve nonlinear system
319 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
320
321 /*
322 Note: The user should initialize the vector, x, with the initial guess
323 for the nonlinear solver prior to calling SNESSolve(). In particular,
324 to employ an initial guess of zero, the user should explicitly set
325 this vector to zero by calling VecSet().
326 */
327 PetscCall(FormInitialGuess(x));
328 PetscCall(SNESSolve(snes, NULL, x));
329 PetscCall(SNESGetIterationNumber(snes, &its));
330 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its));
331
332 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
333 Check solution and clean up
334 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
335 /*
336 Check the error
337 */
338 PetscCall(VecAXPY(x, none, U));
339 PetscCall(VecNorm(x, NORM_2, &norm));
340 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its));
341
342 /*
343 Free work space. All PETSc objects should be destroyed when they
344 are no longer needed.
345 */
346 PetscCall(VecDestroy(&x));
347 PetscCall(VecDestroy(&r));
348 PetscCall(VecDestroy(&U));
349 PetscCall(VecDestroy(&F));
350 PetscCall(MatDestroy(&J));
351 PetscCall(SNESDestroy(&snes));
352 PetscCall(DMDestroy(&ctx.da));
353 PetscCall(PetscFinalize());
354 return 0;
355 }
356
357 /*TEST
358
359 build:
360 requires: kokkos_kernels
361
362 test:
363 requires: kokkos_kernels !complex !single
364 nsize: 2
365 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
366 output_file: output/ex3k_1.out
367
368 TEST*/
369