xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision 5cb80ecd61fc93edbe681011dc160a66bbda2b5d)
1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
2 
3 #include <petscdmda_kokkos.hpp>
4 #include <petscsnes.h>
5 
6 /*
7    User-defined application context
8 */
9 typedef struct
10 {
11   DM        da; /* distributed array */
12   Vec       F;  /* right-hand-side of PDE */
13   PetscReal h;  /* mesh spacing */
14 } ApplicationCtx;
15 
16 /* ------------------------------------------------------------------- */
17 /*
18    FormInitialGuess - Computes initial guess.
19 
20    Input/Output Parameter:
21 .  x - the solution vector
22 */
23 PetscErrorCode FormInitialGuess(Vec x)
24 {
25   PetscScalar pfive = .50;
26 
27   PetscFunctionBeginUser;
28   PetscCall(VecSet(x, pfive));
29   PetscFunctionReturn(0);
30 }
31 
32 /* ------------------------------------------------------------------- */
33 /*
34    CpuFunction - Evaluates nonlinear function, F(x) on CPU
35 
36    Input Parameters:
37 .  snes - the SNES context
38 .  x - input vector
39 .  ctx - optional user-defined context, as set by SNESSetFunction()
40 
41    Output Parameter:
42 .  r - function vector
43 
44    Note:
45    The user-defined context can contain any application-specific
46    data needed for the function evaluation.
47 */
48 PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx)
49 {
50   ApplicationCtx *user = (ApplicationCtx *)ctx;
51   DM              da   = user->da;
52   PetscScalar    *X, *R, *F, d;
53   PetscInt        i, M, xs, xm;
54   Vec             xl;
55 
56   PetscFunctionBeginUser;
57   PetscCall(DMGetLocalVector(da, &xl));
58   PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
59   PetscCall(DMDAVecGetArray(da, xl, &X));
60   PetscCall(DMDAVecGetArray(da, r, &R));
61   PetscCall(DMDAVecGetArray(da, user->F, &F));
62 
63   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
64   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
65 
66   if (xs == 0) { /* left boundary */
67     R[0] = X[0];
68     xs++;
69     xm--;
70   }
71   if (xs + xm == M) { /* right boundary */
72     R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
73     xm--;
74   }
75   d = 1.0 / (user->h * user->h);
76   for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];
77 
78   PetscCall(DMDAVecRestoreArray(da, xl, &X));
79   PetscCall(DMDAVecRestoreArray(da, r, &R));
80   PetscCall(DMDAVecRestoreArray(da, user->F, &F));
81   PetscCall(DMRestoreLocalVector(da, &xl));
82   PetscFunctionReturn(0);
83 }
84 
85 using DefaultExecutionSpace            = Kokkos::DefaultExecutionSpace;
86 using DefaultMemorySpace               = Kokkos::DefaultExecutionSpace::memory_space;
87 using PetscScalarKokkosOffsetView      = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
88 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;
89 
90 PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx)
91 {
92   ApplicationCtx                  *user = (ApplicationCtx *)ctx;
93   DM                               da   = user->da;
94   PetscScalar                      d;
95   PetscInt                         M;
96   Vec                              xl;
97   PetscScalarKokkosOffsetView      R;
98   ConstPetscScalarKokkosOffsetView X, F;
99 
100   PetscFunctionBeginUser;
101   PetscCall(DMGetLocalVector(da, &xl));
102   PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
103   d = 1.0 / (user->h * user->h);
104   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
105   PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X));      /* read only */
106   PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R));  /* write only */
107   PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */
108   Kokkos::parallel_for(
109     Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
110       if (i == 0) R(0) = X(0);                                                 /* left boundary */
111       else if (i == M - 1) R(i) = X(i) - 1.0;                                  /* right boundary */
112       else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
113     });
114   PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X));
115   PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R));
116   PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F));
117   PetscCall(DMRestoreLocalVector(da, &xl));
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx)
122 {
123   ApplicationCtx *user = (ApplicationCtx *)ctx;
124   DM              da   = user->da;
125   Vec             rk;
126   PetscReal       norm = 0;
127 
128   PetscFunctionBeginUser;
129   PetscCall(DMGetGlobalVector(da, &rk));
130   PetscCall(CpuFunction(snes, x, r, ctx));
131   PetscCall(KokkosFunction(snes, x, rk, ctx));
132   PetscCall(VecAXPY(rk, -1.0, r));
133   PetscCall(VecNorm(rk, NORM_2, &norm));
134   PetscCall(DMRestoreGlobalVector(da, &rk));
135   PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm);
136   PetscFunctionReturn(0);
137 }
138 /* ------------------------------------------------------------------- */
139 /*
140    FormJacobian - Evaluates Jacobian matrix.
141 
142    Input Parameters:
143 .  snes - the SNES context
144 .  x - input vector
145 .  dummy - optional user-defined context (not used here)
146 
147    Output Parameters:
148 .  jac - Jacobian matrix
149 .  B - optionally different preconditioning matrix
150 .  flag - flag indicating matrix structure
151 */
152 PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx)
153 {
154   ApplicationCtx *user = (ApplicationCtx *)ctx;
155   PetscScalar    *xx, d, A[3];
156   PetscInt        i, j[3], M, xs, xm;
157   DM              da = user->da;
158 
159   PetscFunctionBeginUser;
160   /*
161      Get pointer to vector data
162   */
163   PetscCall(DMDAVecGetArrayRead(da, x, &xx));
164   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
165 
166   /*
167     Get range of locally owned matrix
168   */
169   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
170 
171   /*
172      Determine starting and ending local indices for interior grid points.
173      Set Jacobian entries for boundary points.
174   */
175 
176   if (xs == 0) { /* left boundary */
177     i    = 0;
178     A[0] = 1.0;
179 
180     PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
181     xs++;
182     xm--;
183   }
184   if (xs + xm == M) { /* right boundary */
185     i    = M - 1;
186     A[0] = 1.0;
187     PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
188     xm--;
189   }
190 
191   /*
192      Interior grid points
193       - Note that in this case we set all elements for a particular
194         row at once.
195   */
196   d = 1.0 / (user->h * user->h);
197   for (i = xs; i < xs + xm; i++) {
198     j[0] = i - 1;
199     j[1] = i;
200     j[2] = i + 1;
201     A[0] = A[2] = d;
202     A[1]        = -2.0 * d + 2.0 * xx[i];
203     PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES));
204   }
205 
206   /*
207      Assemble matrix, using the 2-step process:
208        MatAssemblyBegin(), MatAssemblyEnd().
209      By placing code between these two statements, computations can be
210      done while messages are in transition.
211 
212      Also, restore vector.
213   */
214 
215   PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
216   PetscCall(DMDAVecRestoreArrayRead(da, x, &xx));
217   PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
218 
219   PetscFunctionReturn(0);
220 }
221 
222 int main(int argc, char **argv)
223 {
224   SNES           snes;       /* SNES context */
225   Mat            J;          /* Jacobian matrix */
226   ApplicationCtx ctx;        /* user-defined context */
227   Vec            x, r, U, F; /* vectors */
228   PetscScalar    none = -1.0;
229   PetscInt       its, N = 5, maxit, maxf;
230   PetscReal      abstol, rtol, stol, norm;
231   PetscBool      viewinitial = PETSC_FALSE;
232 
233   PetscFunctionBeginUser;
234   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
235   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
236   ctx.h = 1.0 / (N - 1);
237   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL));
238 
239   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
240      Create nonlinear solver context
241      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
242   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
243 
244   /*
245      Create distributed array (DMDA) to manage parallel grid and vectors
246   */
247   PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da));
248   PetscCall(DMSetFromOptions(ctx.da));
249   PetscCall(DMSetUp(ctx.da));
250 
251   /*
252      Extract global and local vectors from DMDA; then duplicate for remaining
253      vectors that are the same types
254   */
255   PetscCall(DMCreateGlobalVector(ctx.da, &x));
256   PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution"));
257   PetscCall(VecDuplicate(x, &r));
258   PetscCall(VecDuplicate(x, &F));
259   ctx.F = F;
260   PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function"));
261   PetscCall(VecDuplicate(x, &U));
262   PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution"));
263 
264   /*
265      Set function evaluation routine and vector.  Whenever the nonlinear
266      solver needs to compute the nonlinear function, it will call this
267      routine.
268       - Note that the final routine argument is the user-defined
269         context that provides application-specific data for the
270         function evaluation routine.
271 
272      At the beginning, one can use a stub function that checks the Kokkos version
273      against the CPU version to quickly expose errors.
274      PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx));
275   */
276   PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx));
277 
278   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
279      Create matrix data structure; set Jacobian evaluation routine
280      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
281   PetscCall(DMCreateMatrix(ctx.da, &J));
282 
283   /*
284      Set Jacobian matrix data structure and default Jacobian evaluation
285      routine.  Whenever the nonlinear solver needs to compute the
286      Jacobian matrix, it will call this routine.
287       - Note that the final routine argument is the user-defined
288         context that provides application-specific data for the
289         Jacobian evaluation routine.
290   */
291   PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx));
292   PetscCall(SNESSetFromOptions(snes));
293   PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf));
294   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf));
295 
296   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
297      Initialize application:
298      Store forcing function of PDE and exact solution
299    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
300   {
301     PetscScalarKokkosOffsetView FF, UU;
302     PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF));
303     PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU));
304     Kokkos::parallel_for(
305       Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
306         PetscReal xp = i * ctx.h;
307         FF(i)        = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
308         UU(i)        = xp * xp * xp;
309       });
310     PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF));
311     PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU));
312   }
313 
314   if (viewinitial) {
315     PetscCall(VecView(U, NULL));
316     PetscCall(VecView(F, NULL));
317   }
318 
319   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
320      Evaluate initial guess; then solve nonlinear system
321    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
322 
323   /*
324      Note: The user should initialize the vector, x, with the initial guess
325      for the nonlinear solver prior to calling SNESSolve().  In particular,
326      to employ an initial guess of zero, the user should explicitly set
327      this vector to zero by calling VecSet().
328   */
329   PetscCall(FormInitialGuess(x));
330   PetscCall(SNESSolve(snes, NULL, x));
331   PetscCall(SNESGetIterationNumber(snes, &its));
332   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its));
333 
334   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
335      Check solution and clean up
336    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
337   /*
338      Check the error
339   */
340   PetscCall(VecAXPY(x, none, U));
341   PetscCall(VecNorm(x, NORM_2, &norm));
342   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its));
343 
344   /*
345      Free work space.  All PETSc objects should be destroyed when they
346      are no longer needed.
347   */
348   PetscCall(VecDestroy(&x));
349   PetscCall(VecDestroy(&r));
350   PetscCall(VecDestroy(&U));
351   PetscCall(VecDestroy(&F));
352   PetscCall(MatDestroy(&J));
353   PetscCall(SNESDestroy(&snes));
354   PetscCall(DMDestroy(&ctx.da));
355   PetscCall(PetscFinalize());
356   return 0;
357 }
358 
359 /*TEST
360 
361    build:
362      requires: kokkos_kernels
363 
364    test:
365      requires: kokkos_kernels !complex !single
366      nsize: 2
367      args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
368      output_file: output/ex3k_1.out
369 
370 TEST*/
371