xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision 030f984af8d8bb4c203755d35bded3c05b3d83ce)
1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
2 
3 #include <petscdmda_kokkos.hpp>
4 #include <petscsnes.h>
5 
6 /*
7    User-defined application context
8 */
9 typedef struct {
10   DM          da;      /* distributed array */
11   Vec         F;       /* right-hand-side of PDE */
12   PetscReal   h;       /* mesh spacing */
13 } ApplicationCtx;
14 
15 /* ------------------------------------------------------------------- */
16 /*
17    FormInitialGuess - Computes initial guess.
18 
19    Input/Output Parameter:
20 .  x - the solution vector
21 */
22 PetscErrorCode FormInitialGuess(Vec x)
23 {
24   PetscErrorCode ierr;
25   PetscScalar    pfive = .50;
26 
27   PetscFunctionBeginUser;
28   ierr = VecSet(x,pfive);CHKERRQ(ierr);
29   PetscFunctionReturn(0);
30 }
31 
32 /* ------------------------------------------------------------------- */
33 /*
34    CpuFunction - Evaluates nonlinear function, F(x) on CPU
35 
36    Input Parameters:
37 .  snes - the SNES context
38 .  x - input vector
39 .  ctx - optional user-defined context, as set by SNESSetFunction()
40 
41    Output Parameter:
42 .  r - function vector
43 
44    Note:
45    The user-defined context can contain any application-specific
46    data needed for the function evaluation.
47 */
48 PetscErrorCode CpuFunction(SNES snes,Vec x,Vec r,void *ctx)
49 {
50   ApplicationCtx *user = (ApplicationCtx*) ctx;
51   DM             da    = user->da;
52   PetscScalar    *X,*R,*F,d;
53   PetscErrorCode ierr;
54   PetscInt       i,M,xs,xm;
55   Vec            xl;
56 
57   PetscFunctionBeginUser;
58   ierr = DMGetLocalVector(da,&xl);CHKERRQ(ierr);
59   ierr = DMGlobalToLocal(da,x,INSERT_VALUES,xl);CHKERRQ(ierr);
60   ierr = DMDAVecGetArray(da,xl,&X);CHKERRQ(ierr);
61   ierr = DMDAVecGetArray(da,r,&R);CHKERRQ(ierr);
62   ierr = DMDAVecGetArray(da,user->F,&F);CHKERRQ(ierr);
63 
64   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
65   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
66 
67   if (xs == 0) { /* left boundary */
68     R[0] = X[0];
69     xs++;xm--;
70   }
71   if (xs+xm == M) {  /* right boundary */
72     R[xs+xm-1] = X[xs+xm-1] - 1.0;
73     xm--;
74   }
75   d = 1.0/(user->h*user->h);
76   for (i=xs; i<xs+xm; i++) R[i] = d*(X[i-1] - 2.0*X[i] + X[i+1]) + X[i]*X[i] - F[i];
77 
78   ierr = DMDAVecRestoreArray(da,xl,&X);CHKERRQ(ierr);
79   ierr = DMDAVecRestoreArray(da,r,&R);CHKERRQ(ierr);
80   ierr = DMDAVecRestoreArray(da,user->F,&F);CHKERRQ(ierr);
81   ierr = DMRestoreLocalVector(da,&xl);CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 
85 using DefaultExecutionSpace             = Kokkos::DefaultExecutionSpace;
86 using DefaultMemorySpace                = Kokkos::DefaultExecutionSpace::memory_space;
87 using PetscScalarKokkosOffsetView       = Kokkos::Experimental::OffsetView<PetscScalar*,DefaultMemorySpace>;
88 using ConstPetscScalarKokkosOffsetView  = Kokkos::Experimental::OffsetView<const PetscScalar*,DefaultMemorySpace>;
89 
90 PetscErrorCode KokkosFunction(SNES snes,Vec x,Vec r,void *ctx)
91 {
92   PetscErrorCode                       ierr;
93   ApplicationCtx                       *user = (ApplicationCtx*) ctx;
94   DM                                   da = user->da;
95   PetscScalar                          d;
96   PetscInt                             M;
97   Vec                                  xl;
98   PetscScalarKokkosOffsetView          R;
99   ConstPetscScalarKokkosOffsetView     X,F;
100 
101   PetscFunctionBeginUser;
102   ierr = DMGetLocalVector(da,&xl);CHKERRQ(ierr);
103   ierr = DMGlobalToLocal(da,x,INSERT_VALUES,xl);CHKERRQ(ierr);
104   d    = 1.0/(user->h*user->h);
105   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
106   ierr = DMDAVecGetKokkosOffsetView(da,xl,&X);CHKERRQ(ierr); /* read only */
107   ierr = DMDAVecGetKokkosOffsetViewWrite(da,r,&R);CHKERRQ(ierr); /* write only */
108   ierr = DMDAVecGetKokkosOffsetView(da,user->F,&F);CHKERRQ(ierr); /* read only */
109   Kokkos:: parallel_for (Kokkos::RangePolicy<>(R.begin(0),R.end(0)),KOKKOS_LAMBDA (int i) {
110     if (i == 0)        R(0) = X(0);        /* left boundary */
111     else if (i == M-1) R(i) = X(i) - 1.0;  /* right boundary */
112     else               R(i) = d*(X(i-1) - 2.0*X(i) + X(i+1)) + X(i)*X(i) - F(i); /* interior */
113   });
114   ierr = DMDAVecRestoreKokkosOffsetView(da,xl,&X);CHKERRQ(ierr);
115   ierr = DMDAVecRestoreKokkosOffsetViewWrite(da,r,&R);CHKERRQ(ierr);
116   ierr = DMDAVecRestoreKokkosOffsetView(da,user->F,&F);CHKERRQ(ierr);
117   ierr = DMRestoreLocalVector(da,&xl);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode StubFunction(SNES snes ,Vec x,Vec r,void *ctx)
122 {
123   PetscErrorCode                       ierr;
124   ApplicationCtx                       *user = (ApplicationCtx*) ctx;
125   DM                                   da = user->da;
126   Vec                                  rk;
127   PetscReal                            norm=0;
128 
129   PetscFunctionBeginUser;
130   ierr = DMGetGlobalVector(da,&rk);CHKERRQ(ierr);
131   ierr = CpuFunction(snes,x,r,ctx);CHKERRQ(ierr);
132   ierr = KokkosFunction(snes,x,rk,ctx);CHKERRQ(ierr);
133   ierr = VecAXPY(rk,-1.0,r);CHKERRQ(ierr);
134   ierr = VecNorm(rk,NORM_2,&norm);CHKERRQ(ierr);
135   ierr = DMRestoreGlobalVector(da,&rk);CHKERRQ(ierr);
136   if (norm > 1e-6) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"KokkosFunction() different from CpuFunction() with a diff norm = %g\n",norm);
137   PetscFunctionReturn(0);
138 }
139 /* ------------------------------------------------------------------- */
140 /*
141    FormJacobian - Evaluates Jacobian matrix.
142 
143    Input Parameters:
144 .  snes - the SNES context
145 .  x - input vector
146 .  dummy - optional user-defined context (not used here)
147 
148    Output Parameters:
149 .  jac - Jacobian matrix
150 .  B - optionally different preconditioning matrix
151 .  flag - flag indicating matrix structure
152 */
153 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx)
154 {
155   ApplicationCtx *user = (ApplicationCtx*) ctx;
156   PetscScalar    *xx,d,A[3];
157   PetscErrorCode ierr;
158   PetscInt       i,j[3],M,xs,xm;
159   DM             da = user->da;
160 
161   PetscFunctionBeginUser;
162   /*
163      Get pointer to vector data
164   */
165   ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr);
166   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
167 
168   /*
169     Get range of locally owned matrix
170   */
171   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
172 
173   /*
174      Determine starting and ending local indices for interior grid points.
175      Set Jacobian entries for boundary points.
176   */
177 
178   if (xs == 0) {  /* left boundary */
179     i = 0; A[0] = 1.0;
180 
181     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
182     xs++;xm--;
183   }
184   if (xs+xm == M) { /* right boundary */
185     i    = M-1;
186     A[0] = 1.0;
187     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
188     xm--;
189   }
190 
191   /*
192      Interior grid points
193       - Note that in this case we set all elements for a particular
194         row at once.
195   */
196   d = 1.0/(user->h*user->h);
197   for (i=xs; i<xs+xm; i++) {
198     j[0] = i - 1; j[1] = i; j[2] = i + 1;
199     A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i];
200     ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr);
201   }
202 
203   /*
204      Assemble matrix, using the 2-step process:
205        MatAssemblyBegin(), MatAssemblyEnd().
206      By placing code between these two statements, computations can be
207      done while messages are in transition.
208 
209      Also, restore vector.
210   */
211 
212   ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
213   ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr);
214   ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
215 
216   PetscFunctionReturn(0);
217 }
218 
219 int main(int argc,char **argv)
220 {
221   SNES                        snes;                 /* SNES context */
222   Mat                         J;                    /* Jacobian matrix */
223   ApplicationCtx              ctx;                  /* user-defined context */
224   Vec                         x,r,U,F;              /* vectors */
225   PetscScalar                 none = -1.0;
226   PetscErrorCode              ierr;
227   PetscInt                    its,N = 5,maxit,maxf;
228   PetscReal                   abstol,rtol,stol,norm;
229   PetscBool                   viewinitial = PETSC_FALSE;
230 
231   ierr  = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
232   ierr  = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr);
233   ctx.h = 1.0/(N-1);
234   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr);
235 
236   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
237      Create nonlinear solver context
238      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
239   ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr);
240 
241   /*
242      Create distributed array (DMDA) to manage parallel grid and vectors
243   */
244   ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr);
245   ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr);
246   ierr = DMSetUp(ctx.da);CHKERRQ(ierr);
247 
248   /*
249      Extract global and local vectors from DMDA; then duplicate for remaining
250      vectors that are the same types
251   */
252   ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr);
253   ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr);
254   ierr = VecDuplicate(x,&r);CHKERRQ(ierr);
255   ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F;
256   ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr);
257   ierr = VecDuplicate(x,&U);CHKERRQ(ierr);
258   ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr);
259 
260   /*
261      Set function evaluation routine and vector.  Whenever the nonlinear
262      solver needs to compute the nonlinear function, it will call this
263      routine.
264       - Note that the final routine argument is the user-defined
265         context that provides application-specific data for the
266         function evaluation routine.
267 
268      At the begining, one can use a stub function that checks the Kokkos version
269      against the CPU version to quickly expose errors.
270      ierr = SNESSetFunction(snes,r,StubFunction,&ctx);CHKERRQ(ierr);
271   */
272   ierr = SNESSetFunction(snes,r,KokkosFunction,&ctx);CHKERRQ(ierr);
273 
274   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
275      Create matrix data structure; set Jacobian evaluation routine
276      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
277   ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr);
278 
279   /*
280      Set Jacobian matrix data structure and default Jacobian evaluation
281      routine.  Whenever the nonlinear solver needs to compute the
282      Jacobian matrix, it will call this routine.
283       - Note that the final routine argument is the user-defined
284         context that provides application-specific data for the
285         Jacobian evaluation routine.
286   */
287   ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr);
288   ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
289   ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr);
290   ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr);
291 
292   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
293      Initialize application:
294      Store forcing function of PDE and exact solution
295    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
296   {
297     PetscScalarKokkosOffsetView FF,UU;
298     ierr = DMDAVecGetKokkosOffsetViewWrite(ctx.da,F,&FF);CHKERRQ(ierr);
299     ierr = DMDAVecGetKokkosOffsetViewWrite(ctx.da,U,&UU);CHKERRQ(ierr);
300     Kokkos:: parallel_for (Kokkos::RangePolicy<>(FF.begin(0),FF.end(0)),KOKKOS_LAMBDA (int i) {
301       PetscReal xp = i*ctx.h;
302       FF(i) = 6.0*xp + pow(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */
303       UU(i) = xp*xp*xp;
304     });
305     ierr = DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,F,&FF);CHKERRQ(ierr);
306     ierr = DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,U,&UU);CHKERRQ(ierr);
307   }
308 
309   if (viewinitial) {
310     ierr = VecView(U,NULL);CHKERRQ(ierr);
311     ierr = VecView(F,NULL);CHKERRQ(ierr);
312   }
313 
314   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
315      Evaluate initial guess; then solve nonlinear system
316    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
317 
318   /*
319      Note: The user should initialize the vector, x, with the initial guess
320      for the nonlinear solver prior to calling SNESSolve().  In particular,
321      to employ an initial guess of zero, the user should explicitly set
322      this vector to zero by calling VecSet().
323   */
324   ierr = FormInitialGuess(x);CHKERRQ(ierr);
325   ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr);
326   ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
327   ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr);
328 
329   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
330      Check solution and clean up
331    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
332   /*
333      Check the error
334   */
335   ierr = VecAXPY(x,none,U);CHKERRQ(ierr);
336   ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
337   ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr);
338 
339   /*
340      Free work space.  All PETSc objects should be destroyed when they
341      are no longer needed.
342   */
343   ierr = VecDestroy(&x);CHKERRQ(ierr);
344   ierr = VecDestroy(&r);CHKERRQ(ierr);
345   ierr = VecDestroy(&U);CHKERRQ(ierr);
346   ierr = VecDestroy(&F);CHKERRQ(ierr);
347   ierr = MatDestroy(&J);CHKERRQ(ierr);
348   ierr = SNESDestroy(&snes);CHKERRQ(ierr);
349   ierr = DMDestroy(&ctx.da);CHKERRQ(ierr);
350   ierr = PetscFinalize();
351   return ierr;
352 }
353 
354 /*TEST
355 
356    build:
357      requires: kokkos_kernels
358 
359    test:
360      requires: kokkos_kernels !complex !single cuda
361      nsize: 2
362      args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor
363      output_file: output/ex3k_1.out
364 
365 TEST*/
366