xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision 21e3ffae2f3b73c0bd738cf6d0a809700fc04bb0)
1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
2 
3 #include <petscdmda_kokkos.hpp>
4 #include <petscsnes.h>
5 
6 /*
7    User-defined application context
8 */
9 typedef struct {
10   DM        da; /* distributed array */
11   Vec       F;  /* right-hand-side of PDE */
12   PetscReal h;  /* mesh spacing */
13 } ApplicationCtx;
14 
15 /* ------------------------------------------------------------------- */
16 /*
17    FormInitialGuess - Computes initial guess.
18 
19    Input/Output Parameter:
20 .  x - the solution vector
21 */
22 PetscErrorCode FormInitialGuess(Vec x)
23 {
24   PetscScalar pfive = .50;
25 
26   PetscFunctionBeginUser;
27   PetscCall(VecSet(x, pfive));
28   PetscFunctionReturn(PETSC_SUCCESS);
29 }
30 
31 /* ------------------------------------------------------------------- */
32 /*
33    CpuFunction - Evaluates nonlinear function, F(x) on CPU
34 
35    Input Parameters:
36 .  snes - the SNES context
37 .  x - input vector
38 .  ctx - optional user-defined context, as set by SNESSetFunction()
39 
40    Output Parameter:
41 .  r - function vector
42 
43    Note:
44    The user-defined context can contain any application-specific
45    data needed for the function evaluation.
46 */
47 PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx)
48 {
49   ApplicationCtx *user = (ApplicationCtx *)ctx;
50   DM              da   = user->da;
51   PetscScalar    *X, *R, *F, d;
52   PetscInt        i, M, xs, xm;
53   Vec             xl;
54 
55   PetscFunctionBeginUser;
56   PetscCall(DMGetLocalVector(da, &xl));
57   PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
58   PetscCall(DMDAVecGetArray(da, xl, &X));
59   PetscCall(DMDAVecGetArray(da, r, &R));
60   PetscCall(DMDAVecGetArray(da, user->F, &F));
61 
62   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
63   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
64 
65   if (xs == 0) { /* left boundary */
66     R[0] = X[0];
67     xs++;
68     xm--;
69   }
70   if (xs + xm == M) { /* right boundary */
71     R[xs + xm - 1] = X[xs + xm - 1] - 1.0;
72     xm--;
73   }
74   d = 1.0 / (user->h * user->h);
75   for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i];
76 
77   PetscCall(DMDAVecRestoreArray(da, xl, &X));
78   PetscCall(DMDAVecRestoreArray(da, r, &R));
79   PetscCall(DMDAVecRestoreArray(da, user->F, &F));
80   PetscCall(DMRestoreLocalVector(da, &xl));
81   PetscFunctionReturn(PETSC_SUCCESS);
82 }
83 
84 using DefaultExecutionSpace            = Kokkos::DefaultExecutionSpace;
85 using DefaultMemorySpace               = Kokkos::DefaultExecutionSpace::memory_space;
86 using PetscScalarKokkosOffsetView      = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>;
87 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>;
88 
89 PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx)
90 {
91   ApplicationCtx                  *user = (ApplicationCtx *)ctx;
92   DM                               da   = user->da;
93   PetscScalar                      d;
94   PetscInt                         M;
95   Vec                              xl;
96   PetscScalarKokkosOffsetView      R;
97   ConstPetscScalarKokkosOffsetView X, F;
98 
99   PetscFunctionBeginUser;
100   PetscCall(DMGetLocalVector(da, &xl));
101   PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl));
102   d = 1.0 / (user->h * user->h);
103   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
104   PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X));      /* read only */
105   PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R));  /* write only */
106   PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */
107   Kokkos::parallel_for(
108     Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) {
109       if (i == 0) R(0) = X(0);                                                 /* left boundary */
110       else if (i == M - 1) R(i) = X(i) - 1.0;                                  /* right boundary */
111       else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */
112     });
113   PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X));
114   PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R));
115   PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F));
116   PetscCall(DMRestoreLocalVector(da, &xl));
117   PetscFunctionReturn(PETSC_SUCCESS);
118 }
119 
120 PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx)
121 {
122   ApplicationCtx *user = (ApplicationCtx *)ctx;
123   DM              da   = user->da;
124   Vec             rk;
125   PetscReal       norm = 0;
126 
127   PetscFunctionBeginUser;
128   PetscCall(DMGetGlobalVector(da, &rk));
129   PetscCall(CpuFunction(snes, x, r, ctx));
130   PetscCall(KokkosFunction(snes, x, rk, ctx));
131   PetscCall(VecAXPY(rk, -1.0, r));
132   PetscCall(VecNorm(rk, NORM_2, &norm));
133   PetscCall(DMRestoreGlobalVector(da, &rk));
134   PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm);
135   PetscFunctionReturn(PETSC_SUCCESS);
136 }
137 /* ------------------------------------------------------------------- */
138 /*
139    FormJacobian - Evaluates Jacobian matrix.
140 
141    Input Parameters:
142 .  snes - the SNES context
143 .  x - input vector
144 .  dummy - optional user-defined context (not used here)
145 
146    Output Parameters:
147 .  jac - Jacobian matrix
148 .  B - optionally different preconditioning matrix
149 .  flag - flag indicating matrix structure
150 */
151 PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx)
152 {
153   ApplicationCtx *user = (ApplicationCtx *)ctx;
154   PetscScalar    *xx, d, A[3];
155   PetscInt        i, j[3], M, xs, xm;
156   DM              da = user->da;
157 
158   PetscFunctionBeginUser;
159   /*
160      Get pointer to vector data
161   */
162   PetscCall(DMDAVecGetArrayRead(da, x, &xx));
163   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
164 
165   /*
166     Get range of locally owned matrix
167   */
168   PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL));
169 
170   /*
171      Determine starting and ending local indices for interior grid points.
172      Set Jacobian entries for boundary points.
173   */
174 
175   if (xs == 0) { /* left boundary */
176     i    = 0;
177     A[0] = 1.0;
178 
179     PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
180     xs++;
181     xm--;
182   }
183   if (xs + xm == M) { /* right boundary */
184     i    = M - 1;
185     A[0] = 1.0;
186     PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES));
187     xm--;
188   }
189 
190   /*
191      Interior grid points
192       - Note that in this case we set all elements for a particular
193         row at once.
194   */
195   d = 1.0 / (user->h * user->h);
196   for (i = xs; i < xs + xm; i++) {
197     j[0] = i - 1;
198     j[1] = i;
199     j[2] = i + 1;
200     A[0] = A[2] = d;
201     A[1]        = -2.0 * d + 2.0 * xx[i];
202     PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES));
203   }
204 
205   /*
206      Assemble matrix, using the 2-step process:
207        MatAssemblyBegin(), MatAssemblyEnd().
208      By placing code between these two statements, computations can be
209      done while messages are in transition.
210 
211      Also, restore vector.
212   */
213 
214   PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY));
215   PetscCall(DMDAVecRestoreArrayRead(da, x, &xx));
216   PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY));
217 
218   PetscFunctionReturn(PETSC_SUCCESS);
219 }
220 
221 int main(int argc, char **argv)
222 {
223   SNES           snes;       /* SNES context */
224   Mat            J;          /* Jacobian matrix */
225   ApplicationCtx ctx;        /* user-defined context */
226   Vec            x, r, U, F; /* vectors */
227   PetscScalar    none = -1.0;
228   PetscInt       its, N = 5, maxit, maxf;
229   PetscReal      abstol, rtol, stol, norm;
230   PetscBool      viewinitial = PETSC_FALSE;
231 
232   PetscFunctionBeginUser;
233   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
234   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL));
235   ctx.h = 1.0 / (N - 1);
236   PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL));
237 
238   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
239      Create nonlinear solver context
240      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
241   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
242 
243   /*
244      Create distributed array (DMDA) to manage parallel grid and vectors
245   */
246   PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da));
247   PetscCall(DMSetFromOptions(ctx.da));
248   PetscCall(DMSetUp(ctx.da));
249 
250   /*
251      Extract global and local vectors from DMDA; then duplicate for remaining
252      vectors that are the same types
253   */
254   PetscCall(DMCreateGlobalVector(ctx.da, &x));
255   PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution"));
256   PetscCall(VecDuplicate(x, &r));
257   PetscCall(VecDuplicate(x, &F));
258   ctx.F = F;
259   PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function"));
260   PetscCall(VecDuplicate(x, &U));
261   PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution"));
262 
263   /*
264      Set function evaluation routine and vector.  Whenever the nonlinear
265      solver needs to compute the nonlinear function, it will call this
266      routine.
267       - Note that the final routine argument is the user-defined
268         context that provides application-specific data for the
269         function evaluation routine.
270 
271      At the beginning, one can use a stub function that checks the Kokkos version
272      against the CPU version to quickly expose errors.
273      PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx));
274   */
275   PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx));
276 
277   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
278      Create matrix data structure; set Jacobian evaluation routine
279      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
280   PetscCall(DMCreateMatrix(ctx.da, &J));
281 
282   /*
283      Set Jacobian matrix data structure and default Jacobian evaluation
284      routine.  Whenever the nonlinear solver needs to compute the
285      Jacobian matrix, it will call this routine.
286       - Note that the final routine argument is the user-defined
287         context that provides application-specific data for the
288         Jacobian evaluation routine.
289   */
290   PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx));
291   PetscCall(SNESSetFromOptions(snes));
292   PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf));
293   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf));
294 
295   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
296      Initialize application:
297      Store forcing function of PDE and exact solution
298    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
299   {
300     PetscScalarKokkosOffsetView FF, UU;
301     PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF));
302     PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU));
303     Kokkos::parallel_for(
304       Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) {
305         PetscReal xp = i * ctx.h;
306         FF(i)        = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */
307         UU(i)        = xp * xp * xp;
308       });
309     PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF));
310     PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU));
311   }
312 
313   if (viewinitial) {
314     PetscCall(VecView(U, NULL));
315     PetscCall(VecView(F, NULL));
316   }
317 
318   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
319      Evaluate initial guess; then solve nonlinear system
320    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
321 
322   /*
323      Note: The user should initialize the vector, x, with the initial guess
324      for the nonlinear solver prior to calling SNESSolve().  In particular,
325      to employ an initial guess of zero, the user should explicitly set
326      this vector to zero by calling VecSet().
327   */
328   PetscCall(FormInitialGuess(x));
329   PetscCall(SNESSolve(snes, NULL, x));
330   PetscCall(SNESGetIterationNumber(snes, &its));
331   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its));
332 
333   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
334      Check solution and clean up
335    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
336   /*
337      Check the error
338   */
339   PetscCall(VecAXPY(x, none, U));
340   PetscCall(VecNorm(x, NORM_2, &norm));
341   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its));
342 
343   /*
344      Free work space.  All PETSc objects should be destroyed when they
345      are no longer needed.
346   */
347   PetscCall(VecDestroy(&x));
348   PetscCall(VecDestroy(&r));
349   PetscCall(VecDestroy(&U));
350   PetscCall(VecDestroy(&F));
351   PetscCall(MatDestroy(&J));
352   PetscCall(SNESDestroy(&snes));
353   PetscCall(DMDestroy(&ctx.da));
354   PetscCall(PetscFinalize());
355   return 0;
356 }
357 
358 /*TEST
359 
360    build:
361      requires: kokkos_kernels
362 
363    test:
364      requires: kokkos_kernels !complex !single
365      nsize: 2
366      args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
367      output_file: output/ex3k_1.out
368 
369 TEST*/
370