xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision 40badf4fbc550ac1f60bd080eaff6de6d55b946d)
1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
2 
3 #include <petscdmda_kokkos.hpp>
4 #include <petscsnes.h>
5 
6 /*
7    User-defined application context
8 */
9 typedef struct {
10   DM          da;      /* distributed array */
11   Vec         F;       /* right-hand-side of PDE */
12   PetscReal   h;       /* mesh spacing */
13 } ApplicationCtx;
14 
15 /* ------------------------------------------------------------------- */
16 /*
17    FormInitialGuess - Computes initial guess.
18 
19    Input/Output Parameter:
20 .  x - the solution vector
21 */
22 PetscErrorCode FormInitialGuess(Vec x)
23 {
24   PetscScalar    pfive = .50;
25 
26   PetscFunctionBeginUser;
27   CHKERRQ(VecSet(x,pfive));
28   PetscFunctionReturn(0);
29 }
30 
31 /* ------------------------------------------------------------------- */
32 /*
33    CpuFunction - Evaluates nonlinear function, F(x) on CPU
34 
35    Input Parameters:
36 .  snes - the SNES context
37 .  x - input vector
38 .  ctx - optional user-defined context, as set by SNESSetFunction()
39 
40    Output Parameter:
41 .  r - function vector
42 
43    Note:
44    The user-defined context can contain any application-specific
45    data needed for the function evaluation.
46 */
47 PetscErrorCode CpuFunction(SNES snes,Vec x,Vec r,void *ctx)
48 {
49   ApplicationCtx *user = (ApplicationCtx*) ctx;
50   DM             da    = user->da;
51   PetscScalar    *X,*R,*F,d;
52   PetscInt       i,M,xs,xm;
53   Vec            xl;
54 
55   PetscFunctionBeginUser;
56   CHKERRQ(DMGetLocalVector(da,&xl));
57   CHKERRQ(DMGlobalToLocal(da,x,INSERT_VALUES,xl));
58   CHKERRQ(DMDAVecGetArray(da,xl,&X));
59   CHKERRQ(DMDAVecGetArray(da,r,&R));
60   CHKERRQ(DMDAVecGetArray(da,user->F,&F));
61 
62   CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL));
63   CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL));
64 
65   if (xs == 0) { /* left boundary */
66     R[0] = X[0];
67     xs++;xm--;
68   }
69   if (xs+xm == M) {  /* right boundary */
70     R[xs+xm-1] = X[xs+xm-1] - 1.0;
71     xm--;
72   }
73   d = 1.0/(user->h*user->h);
74   for (i=xs; i<xs+xm; i++) R[i] = d*(X[i-1] - 2.0*X[i] + X[i+1]) + X[i]*X[i] - F[i];
75 
76   CHKERRQ(DMDAVecRestoreArray(da,xl,&X));
77   CHKERRQ(DMDAVecRestoreArray(da,r,&R));
78   CHKERRQ(DMDAVecRestoreArray(da,user->F,&F));
79   CHKERRQ(DMRestoreLocalVector(da,&xl));
80   PetscFunctionReturn(0);
81 }
82 
83 using DefaultExecutionSpace             = Kokkos::DefaultExecutionSpace;
84 using DefaultMemorySpace                = Kokkos::DefaultExecutionSpace::memory_space;
85 using PetscScalarKokkosOffsetView       = Kokkos::Experimental::OffsetView<PetscScalar*,DefaultMemorySpace>;
86 using ConstPetscScalarKokkosOffsetView  = Kokkos::Experimental::OffsetView<const PetscScalar*,DefaultMemorySpace>;
87 
88 PetscErrorCode KokkosFunction(SNES snes,Vec x,Vec r,void *ctx)
89 {
90   ApplicationCtx                       *user = (ApplicationCtx*) ctx;
91   DM                                   da = user->da;
92   PetscScalar                          d;
93   PetscInt                             M;
94   Vec                                  xl;
95   PetscScalarKokkosOffsetView          R;
96   ConstPetscScalarKokkosOffsetView     X,F;
97 
98   PetscFunctionBeginUser;
99   CHKERRQ(DMGetLocalVector(da,&xl));
100   CHKERRQ(DMGlobalToLocal(da,x,INSERT_VALUES,xl));
101   d    = 1.0/(user->h*user->h);
102   CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL));
103   CHKERRQ(DMDAVecGetKokkosOffsetView(da,xl,&X)); /* read only */
104   CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(da,r,&R)); /* write only */
105   CHKERRQ(DMDAVecGetKokkosOffsetView(da,user->F,&F)); /* read only */
106   Kokkos:: parallel_for (Kokkos::RangePolicy<>(R.begin(0),R.end(0)),KOKKOS_LAMBDA (int i) {
107     if (i == 0)        R(0) = X(0);        /* left boundary */
108     else if (i == M-1) R(i) = X(i) - 1.0;  /* right boundary */
109     else               R(i) = d*(X(i-1) - 2.0*X(i) + X(i+1)) + X(i)*X(i) - F(i); /* interior */
110   });
111   CHKERRQ(DMDAVecRestoreKokkosOffsetView(da,xl,&X));
112   CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(da,r,&R));
113   CHKERRQ(DMDAVecRestoreKokkosOffsetView(da,user->F,&F));
114   CHKERRQ(DMRestoreLocalVector(da,&xl));
115   PetscFunctionReturn(0);
116 }
117 
118 PetscErrorCode StubFunction(SNES snes ,Vec x,Vec r,void *ctx)
119 {
120   ApplicationCtx                       *user = (ApplicationCtx*) ctx;
121   DM                                   da = user->da;
122   Vec                                  rk;
123   PetscReal                            norm=0;
124 
125   PetscFunctionBeginUser;
126   CHKERRQ(DMGetGlobalVector(da,&rk));
127   CHKERRQ(CpuFunction(snes,x,r,ctx));
128   CHKERRQ(KokkosFunction(snes,x,rk,ctx));
129   CHKERRQ(VecAXPY(rk,-1.0,r));
130   CHKERRQ(VecNorm(rk,NORM_2,&norm));
131   CHKERRQ(DMRestoreGlobalVector(da,&rk));
132   PetscCheckFalse(norm > 1e-6,PETSC_COMM_SELF,PETSC_ERR_PLIB,"KokkosFunction() different from CpuFunction() with a diff norm = %g",norm);
133   PetscFunctionReturn(0);
134 }
135 /* ------------------------------------------------------------------- */
136 /*
137    FormJacobian - Evaluates Jacobian matrix.
138 
139    Input Parameters:
140 .  snes - the SNES context
141 .  x - input vector
142 .  dummy - optional user-defined context (not used here)
143 
144    Output Parameters:
145 .  jac - Jacobian matrix
146 .  B - optionally different preconditioning matrix
147 .  flag - flag indicating matrix structure
148 */
149 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx)
150 {
151   ApplicationCtx *user = (ApplicationCtx*) ctx;
152   PetscScalar    *xx,d,A[3];
153   PetscInt       i,j[3],M,xs,xm;
154   DM             da = user->da;
155 
156   PetscFunctionBeginUser;
157   /*
158      Get pointer to vector data
159   */
160   CHKERRQ(DMDAVecGetArrayRead(da,x,&xx));
161   CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL));
162 
163   /*
164     Get range of locally owned matrix
165   */
166   CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL));
167 
168   /*
169      Determine starting and ending local indices for interior grid points.
170      Set Jacobian entries for boundary points.
171   */
172 
173   if (xs == 0) {  /* left boundary */
174     i = 0; A[0] = 1.0;
175 
176     CHKERRQ(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES));
177     xs++;xm--;
178   }
179   if (xs+xm == M) { /* right boundary */
180     i    = M-1;
181     A[0] = 1.0;
182     CHKERRQ(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES));
183     xm--;
184   }
185 
186   /*
187      Interior grid points
188       - Note that in this case we set all elements for a particular
189         row at once.
190   */
191   d = 1.0/(user->h*user->h);
192   for (i=xs; i<xs+xm; i++) {
193     j[0] = i - 1; j[1] = i; j[2] = i + 1;
194     A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i];
195     CHKERRQ(MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES));
196   }
197 
198   /*
199      Assemble matrix, using the 2-step process:
200        MatAssemblyBegin(), MatAssemblyEnd().
201      By placing code between these two statements, computations can be
202      done while messages are in transition.
203 
204      Also, restore vector.
205   */
206 
207   CHKERRQ(MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY));
208   CHKERRQ(DMDAVecRestoreArrayRead(da,x,&xx));
209   CHKERRQ(MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY));
210 
211   PetscFunctionReturn(0);
212 }
213 
214 int main(int argc,char **argv)
215 {
216   SNES                        snes;                 /* SNES context */
217   Mat                         J;                    /* Jacobian matrix */
218   ApplicationCtx              ctx;                  /* user-defined context */
219   Vec                         x,r,U,F;              /* vectors */
220   PetscScalar                 none = -1.0;
221   PetscErrorCode              ierr;
222   PetscInt                    its,N = 5,maxit,maxf;
223   PetscReal                   abstol,rtol,stol,norm;
224   PetscBool                   viewinitial = PETSC_FALSE;
225 
226   ierr  = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
227   CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL));
228   ctx.h = 1.0/(N-1);
229   CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL));
230 
231   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
232      Create nonlinear solver context
233      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
234   CHKERRQ(SNESCreate(PETSC_COMM_WORLD,&snes));
235 
236   /*
237      Create distributed array (DMDA) to manage parallel grid and vectors
238   */
239   CHKERRQ(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da));
240   CHKERRQ(DMSetFromOptions(ctx.da));
241   CHKERRQ(DMSetUp(ctx.da));
242 
243   /*
244      Extract global and local vectors from DMDA; then duplicate for remaining
245      vectors that are the same types
246   */
247   CHKERRQ(DMCreateGlobalVector(ctx.da,&x));
248   CHKERRQ(PetscObjectSetName((PetscObject)x,"Approximate Solution"));
249   CHKERRQ(VecDuplicate(x,&r));
250   CHKERRQ(VecDuplicate(x,&F)); ctx.F = F;
251   CHKERRQ(PetscObjectSetName((PetscObject)F,"Forcing function"));
252   CHKERRQ(VecDuplicate(x,&U));
253   CHKERRQ(PetscObjectSetName((PetscObject)U,"Exact Solution"));
254 
255   /*
256      Set function evaluation routine and vector.  Whenever the nonlinear
257      solver needs to compute the nonlinear function, it will call this
258      routine.
259       - Note that the final routine argument is the user-defined
260         context that provides application-specific data for the
261         function evaluation routine.
262 
263      At the beginning, one can use a stub function that checks the Kokkos version
264      against the CPU version to quickly expose errors.
265      CHKERRQ(SNESSetFunction(snes,r,StubFunction,&ctx));
266   */
267   CHKERRQ(SNESSetFunction(snes,r,KokkosFunction,&ctx));
268 
269   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
270      Create matrix data structure; set Jacobian evaluation routine
271      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
272   CHKERRQ(DMCreateMatrix(ctx.da,&J));
273 
274   /*
275      Set Jacobian matrix data structure and default Jacobian evaluation
276      routine.  Whenever the nonlinear solver needs to compute the
277      Jacobian matrix, it will call this routine.
278       - Note that the final routine argument is the user-defined
279         context that provides application-specific data for the
280         Jacobian evaluation routine.
281   */
282   CHKERRQ(SNESSetJacobian(snes,J,J,FormJacobian,&ctx));
283   CHKERRQ(SNESSetFromOptions(snes));
284   CHKERRQ(SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf));
285   CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf));
286 
287   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
288      Initialize application:
289      Store forcing function of PDE and exact solution
290    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
291   {
292     PetscScalarKokkosOffsetView FF,UU;
293     CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(ctx.da,F,&FF));
294     CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(ctx.da,U,&UU));
295     Kokkos:: parallel_for (Kokkos::RangePolicy<>(FF.begin(0),FF.end(0)),KOKKOS_LAMBDA (int i) {
296       PetscReal xp = i*ctx.h;
297       FF(i) = 6.0*xp + pow(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */
298       UU(i) = xp*xp*xp;
299     });
300     CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,F,&FF));
301     CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,U,&UU));
302   }
303 
304   if (viewinitial) {
305     CHKERRQ(VecView(U,NULL));
306     CHKERRQ(VecView(F,NULL));
307   }
308 
309   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
310      Evaluate initial guess; then solve nonlinear system
311    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
312 
313   /*
314      Note: The user should initialize the vector, x, with the initial guess
315      for the nonlinear solver prior to calling SNESSolve().  In particular,
316      to employ an initial guess of zero, the user should explicitly set
317      this vector to zero by calling VecSet().
318   */
319   CHKERRQ(FormInitialGuess(x));
320   CHKERRQ(SNESSolve(snes,NULL,x));
321   CHKERRQ(SNESGetIterationNumber(snes,&its));
322   CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its));
323 
324   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
325      Check solution and clean up
326    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
327   /*
328      Check the error
329   */
330   CHKERRQ(VecAXPY(x,none,U));
331   CHKERRQ(VecNorm(x,NORM_2,&norm));
332   CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its));
333 
334   /*
335      Free work space.  All PETSc objects should be destroyed when they
336      are no longer needed.
337   */
338   CHKERRQ(VecDestroy(&x));
339   CHKERRQ(VecDestroy(&r));
340   CHKERRQ(VecDestroy(&U));
341   CHKERRQ(VecDestroy(&F));
342   CHKERRQ(MatDestroy(&J));
343   CHKERRQ(SNESDestroy(&snes));
344   CHKERRQ(DMDestroy(&ctx.da));
345   ierr = PetscFinalize();
346   return ierr;
347 }
348 
349 /*TEST
350 
351    build:
352      requires: kokkos_kernels
353 
354    test:
355      requires: kokkos_kernels !complex !single
356      nsize: 2
357      args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
358      output_file: output/ex3k_1.out
359 
360 TEST*/
361