xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision a69119a591a03a9d906b29c0a4e9802e4d7c9795)
1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
2 
3 #include <petscdmda_kokkos.hpp>
4 #include <petscsnes.h>
5 
6 /*
7    User-defined application context
8 */
9 typedef struct {
10   DM        da; /* distributed array */
11   Vec       F;  /* right-hand-side of PDE */
12   PetscReal h;  /* mesh spacing */
13 } ApplicationCtx;
14 
15 /* ------------------------------------------------------------------- */
16 /*
17    FormInitialGuess - Computes initial guess.
18 
19    Input/Output Parameter:
20 .  x - the solution vector
21 */
22 PetscErrorCode FormInitialGuess(Vec x) {
23   PetscScalar pfive = .50;
24 
25   PetscFunctionBeginUser;
26   PetscCall(VecSet(x, pfive));
27   PetscFunctionReturn(0);
28 }
29 
30 /* ------------------------------------------------------------------- */
31 /*
32    CpuFunction - Evaluates nonlinear function, F(x) on CPU
33 
34    Input Parameters:
35 .  snes - the SNES context
36 .  x - input vector
37 .  ctx - optional user-defined context, as set by SNESSetFunction()
38 
39    Output Parameter:
40 .  r - function vector
41 
42    Note:
43    The user-defined context can contain any application-specific
44    data needed for the function evaluation.
45 */
46 PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx) {
47   ApplicationCtx *user = (ApplicationCtx *)ctx;
48   DM              da   = user->da;
49   PetscScalar    *X, *R, *F, d;
50   PetscInt        i, M, xs, xm;
51   Vec             xl;
52 
53   PetscFunctionBeginUser;
54   PetscCall(DMGetLocalVector(da, &xl));
55   PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
56   PetscCall(DMDAVecGetArray(da, xl, &X));
57   PetscCall(DMDAVecGetArray(da, r, &R));
58   PetscCall(DMDAVecGetArray(da, user->F, &F));
59 
60   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
61   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
62 
63   if (xs == 0) { /* left boundary */
64     R[0] = X[0];
65     xs++;
66     xm--;
67   }
68   if (xs + xm == M) { /* right boundary */
69     R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
70     xm--;
71   }
72   d = 1.0 / (user->h * user->h);
73   for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];
74 
75   PetscCall(DMDAVecRestoreArray(da, xl, &X));
76   PetscCall(DMDAVecRestoreArray(da, r, &R));
77   PetscCall(DMDAVecRestoreArray(da, user->F, &F));
78   PetscCall(DMRestoreLocalVector(da, &xl));
79   PetscFunctionReturn(0);
80 }
81 
82 using DefaultExecutionSpace            = Kokkos::DefaultExecutionSpace;
83 using DefaultMemorySpace               = Kokkos::DefaultExecutionSpace::memory_space;
84 using PetscScalarKokkosOffsetView      = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
85 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;
86 
87 PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx) {
88   ApplicationCtx                  *user = (ApplicationCtx *)ctx;
89   DM                               da   = user->da;
90   PetscScalar                      d;
91   PetscInt                         M;
92   Vec                              xl;
93   PetscScalarKokkosOffsetView      R;
94   ConstPetscScalarKokkosOffsetView X, F;
95 
96   PetscFunctionBeginUser;
97   PetscCall(DMGetLocalVector(da, &xl));
98   PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
99   d = 1.0 / (user->h * user->h);
100   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
101   PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X));      /* read only */
102   PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R));  /* write only */
103   PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */
104   Kokkos::parallel_for(
105     Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
106       if (i == 0) R(0) = X(0);                                                 /* left boundary */
107       else if (i == M - 1) R(i) = X(i) - 1.0;                                  /* right boundary */
108       else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
109     });
110   PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X));
111   PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R));
112   PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F));
113   PetscCall(DMRestoreLocalVector(da, &xl));
114   PetscFunctionReturn(0);
115 }
116 
117 PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx) {
118   ApplicationCtx *user = (ApplicationCtx *)ctx;
119   DM              da   = user->da;
120   Vec             rk;
121   PetscReal       norm = 0;
122 
123   PetscFunctionBeginUser;
124   PetscCall(DMGetGlobalVector(da, &rk));
125   PetscCall(CpuFunction(snes, x, r, ctx));
126   PetscCall(KokkosFunction(snes, x, rk, ctx));
127   PetscCall(VecAXPY(rk, -1.0, r));
128   PetscCall(VecNorm(rk, NORM_2, &norm));
129   PetscCall(DMRestoreGlobalVector(da, &rk));
130   PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm);
131   PetscFunctionReturn(0);
132 }
133 /* ------------------------------------------------------------------- */
134 /*
135    FormJacobian - Evaluates Jacobian matrix.
136 
137    Input Parameters:
138 .  snes - the SNES context
139 .  x - input vector
140 .  dummy - optional user-defined context (not used here)
141 
142    Output Parameters:
143 .  jac - Jacobian matrix
144 .  B - optionally different preconditioning matrix
145 .  flag - flag indicating matrix structure
146 */
147 PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx) {
148   ApplicationCtx *user = (ApplicationCtx *)ctx;
149   PetscScalar    *xx, d, A[3];
150   PetscInt        i, j[3], M, xs, xm;
151   DM              da = user->da;
152 
153   PetscFunctionBeginUser;
154   /*
155      Get pointer to vector data
156   */
157   PetscCall(DMDAVecGetArrayRead(da, x, &xx));
158   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
159 
160   /*
161     Get range of locally owned matrix
162   */
163   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
164 
165   /*
166      Determine starting and ending local indices for interior grid points.
167      Set Jacobian entries for boundary points.
168   */
169 
170   if (xs == 0) { /* left boundary */
171     i    = 0;
172     A[0] = 1.0;
173 
174     PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
175     xs++;
176     xm--;
177   }
178   if (xs + xm == M) { /* right boundary */
179     i    = M - 1;
180     A[0] = 1.0;
181     PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
182     xm--;
183   }
184 
185   /*
186      Interior grid points
187       - Note that in this case we set all elements for a particular
188         row at once.
189   */
190   d = 1.0 / (user->h * user->h);
191   for (i = xs; i < xs + xm; i++) {
192     j[0] = i - 1;
193     j[1] = i;
194     j[2] = i + 1;
195     A[0] = A[2] = d;
196     A[1]        = -2.0 * d + 2.0 * xx[i];
197     PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES));
198   }
199 
200   /*
201      Assemble matrix, using the 2-step process:
202        MatAssemblyBegin(), MatAssemblyEnd().
203      By placing code between these two statements, computations can be
204      done while messages are in transition.
205 
206      Also, restore vector.
207   */
208 
209   PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
210   PetscCall(DMDAVecRestoreArrayRead(da, x, &xx));
211   PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
212 
213   PetscFunctionReturn(0);
214 }
215 
216 int main(int argc, char **argv) {
217   SNES           snes;       /* SNES context */
218   Mat            J;          /* Jacobian matrix */
219   ApplicationCtx ctx;        /* user-defined context */
220   Vec            x, r, U, F; /* vectors */
221   PetscScalar    none = -1.0;
222   PetscInt       its, N = 5, maxit, maxf;
223   PetscReal      abstol, rtol, stol, norm;
224   PetscBool      viewinitial = PETSC_FALSE;
225 
226   PetscFunctionBeginUser;
227   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
228   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
229   ctx.h = 1.0 / (N - 1);
230   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL));
231 
232   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
233      Create nonlinear solver context
234      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
235   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
236 
237   /*
238      Create distributed array (DMDA) to manage parallel grid and vectors
239   */
240   PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da));
241   PetscCall(DMSetFromOptions(ctx.da));
242   PetscCall(DMSetUp(ctx.da));
243 
244   /*
245      Extract global and local vectors from DMDA; then duplicate for remaining
246      vectors that are the same types
247   */
248   PetscCall(DMCreateGlobalVector(ctx.da, &x));
249   PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution"));
250   PetscCall(VecDuplicate(x, &r));
251   PetscCall(VecDuplicate(x, &F));
252   ctx.F = F;
253   PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function"));
254   PetscCall(VecDuplicate(x, &U));
255   PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution"));
256 
257   /*
258      Set function evaluation routine and vector.  Whenever the nonlinear
259      solver needs to compute the nonlinear function, it will call this
260      routine.
261       - Note that the final routine argument is the user-defined
262         context that provides application-specific data for the
263         function evaluation routine.
264 
265      At the beginning, one can use a stub function that checks the Kokkos version
266      against the CPU version to quickly expose errors.
267      PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx));
268   */
269   PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx));
270 
271   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
272      Create matrix data structure; set Jacobian evaluation routine
273      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
274   PetscCall(DMCreateMatrix(ctx.da, &J));
275 
276   /*
277      Set Jacobian matrix data structure and default Jacobian evaluation
278      routine.  Whenever the nonlinear solver needs to compute the
279      Jacobian matrix, it will call this routine.
280       - Note that the final routine argument is the user-defined
281         context that provides application-specific data for the
282         Jacobian evaluation routine.
283   */
284   PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx));
285   PetscCall(SNESSetFromOptions(snes));
286   PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf));
287   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf));
288 
289   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
290      Initialize application:
291      Store forcing function of PDE and exact solution
292    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
293   {
294     PetscScalarKokkosOffsetView FF, UU;
295     PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF));
296     PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU));
297     Kokkos::parallel_for(
298       Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
299         PetscReal xp = i * ctx.h;
300         FF(i)        = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
301         UU(i)        = xp * xp * xp;
302       });
303     PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF));
304     PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU));
305   }
306 
307   if (viewinitial) {
308     PetscCall(VecView(U, NULL));
309     PetscCall(VecView(F, NULL));
310   }
311 
312   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
313      Evaluate initial guess; then solve nonlinear system
314    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
315 
316   /*
317      Note: The user should initialize the vector, x, with the initial guess
318      for the nonlinear solver prior to calling SNESSolve().  In particular,
319      to employ an initial guess of zero, the user should explicitly set
320      this vector to zero by calling VecSet().
321   */
322   PetscCall(FormInitialGuess(x));
323   PetscCall(SNESSolve(snes, NULL, x));
324   PetscCall(SNESGetIterationNumber(snes, &its));
325   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its));
326 
327   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
328      Check solution and clean up
329    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
330   /*
331      Check the error
332   */
333   PetscCall(VecAXPY(x, none, U));
334   PetscCall(VecNorm(x, NORM_2, &norm));
335   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its));
336 
337   /*
338      Free work space.  All PETSc objects should be destroyed when they
339      are no longer needed.
340   */
341   PetscCall(VecDestroy(&x));
342   PetscCall(VecDestroy(&r));
343   PetscCall(VecDestroy(&U));
344   PetscCall(VecDestroy(&F));
345   PetscCall(MatDestroy(&J));
346   PetscCall(SNESDestroy(&snes));
347   PetscCall(DMDestroy(&ctx.da));
348   PetscCall(PetscFinalize());
349   return 0;
350 }
351 
352 /*TEST
353 
354    build:
355      requires: kokkos_kernels
356 
357    test:
358      requires: kokkos_kernels !complex !single
359      nsize: 2
360      args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
361      output_file: output/ex3k_1.out
362 
363 TEST*/
364