xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision a3d0cf85f51e7fdec4f84d2d39b29bdf0224b99b) !
1 
2 
3 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
4 
5 /*
6     This is a simplied version of ex3.c except it uses Kokkos to set the initial conditions
7 */
8 
9 /*
10    Include "petscdm.h" so that we can use data management objects (DMs)
11    Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
12    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
13 */
14 
15 #include <petscdm.h>
16 #include <petscdmda.h>
17 #include <petscsnes.h>
18 
19 /*
20     Include Kokkos files
21 
22 */
23 #include <Kokkos_Core.hpp>
24 #include <Kokkos_OffsetView.hpp>
25 
26 /*
27    Application-defined routines.
28 */
29 PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*);
30 PetscErrorCode FormFunction(SNES,Vec,Vec,void*);
31 PetscErrorCode FormInitialGuess(Vec);
32 
33 /*
34    User-defined application context
35 */
36 typedef struct {
37   DM          da;      /* distributed array */
38   Vec         F;       /* right-hand-side of PDE */
39   PetscReal   h;       /* mesh spacing */
40 } ApplicationCtx;
41 
42 int main(int argc,char **argv)
43 {
44   SNES           snes;                 /* SNES context */
45   Mat            J;                    /* Jacobian matrix */
46   ApplicationCtx ctx;                  /* user-defined context */
47   Vec            x,r,U,F;              /* vectors */
48   PetscScalar    *FF,*UU,none = -1.0;
49   PetscErrorCode ierr;
50   PetscInt       its,N = 5,maxit,maxf,xs,xm;
51   PetscReal      abstol,rtol,stol,norm;
52   PetscBool      viewinitial = PETSC_FALSE;
53   PetscBool      view_kokkos_configuration = PETSC_TRUE;
54 
55 
56   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
57   ierr  = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr);
58   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_kokkos_configuration",&view_kokkos_configuration,NULL);CHKERRQ(ierr);
59   ctx.h = 1.0/(N-1);
60   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr);
61 
62   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
63      Create nonlinear solver context
64      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
65 
66   ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr);
67 
68   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
69      Create vector data structures; set function evaluation routine
70      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
71 
72   /*
73      Create distributed array (DMDA) to manage parallel grid and vectors
74   */
75   ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr);
76   ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr);
77   ierr = DMSetUp(ctx.da);CHKERRQ(ierr);
78 
79   /*
80      Extract global and local vectors from DMDA; then duplicate for remaining
81      vectors that are the same types
82   */
83   ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr);
84   ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr);
85   ierr = VecDuplicate(x,&r);CHKERRQ(ierr);
86   ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F;
87   ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr);
88   ierr = VecDuplicate(x,&U);CHKERRQ(ierr);
89   ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr);
90 
91   /*
92      Set function evaluation routine and vector.  Whenever the nonlinear
93      solver needs to compute the nonlinear function, it will call this
94      routine.
95       - Note that the final routine argument is the user-defined
96         context that provides application-specific data for the
97         function evaluation routine.
98   */
99   ierr = SNESSetFunction(snes,r,FormFunction,&ctx);CHKERRQ(ierr);
100 
101   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
102      Create matrix data structure; set Jacobian evaluation routine
103      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
104 
105   ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr);
106 
107   /*
108      Set Jacobian matrix data structure and default Jacobian evaluation
109      routine.  Whenever the nonlinear solver needs to compute the
110      Jacobian matrix, it will call this routine.
111       - Note that the final routine argument is the user-defined
112         context that provides application-specific data for the
113         Jacobian evaluation routine.
114   */
115   ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr);
116   ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
117   ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr);
118   ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr);
119 
120   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
121      Initialize application:
122      Store forcing function of PDE and exact solution
123    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
124 
125   /*
126      Get local grid boundaries (for 1-dimensional DMDA):
127        xs, xm - starting grid index, width of local grid (no ghost points)
128   */
129   ierr = DMDAGetCorners(ctx.da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
130 
131   /*
132      Get pointers to vector data
133   */
134   ierr = VecGetArrayWrite(F,&FF);CHKERRQ(ierr);
135   ierr = VecGetArrayWrite(U,&UU);CHKERRQ(ierr);
136 
137   /* -------------------------------------------------------------------------------------------------- */
138   /*  ./configure --download-kokkos --download-hwloc [one of --with-openmp --with-pthread --with-cuda] */
139   /*  export OMP_PROC_BIND="threads"  export OMP_PROC_BIND="spread" */
140 
141   if (!getenv("KOKKOS_NUM_THREADS")) setenv("KOKKOS_NUM_THREADS","4",1);
142   ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); /* Init Kokkos if not yet */
143 
144   if (view_kokkos_configuration) {
145     Kokkos::print_configuration(std::cout, true);
146   }
147 
148   /* introduce a view object; reference like object  */
149   Kokkos::Experimental::OffsetView<PetscScalar*> xFF(Kokkos::View<PetscScalar*>(FF,xm),{xs}), xUU(Kokkos::View<PetscScalar*>(UU,xm),{xs});
150 
151   PetscReal xpbase = xs*ctx.h;
152   Kokkos:: parallel_for (Kokkos::RangePolicy<> (xs,xm), KOKKOS_LAMBDA ( int j) {
153     PetscReal xp = xpbase + j*ctx.h;
154     xFF(j) = 6.0*xp + PetscPowScalar(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */
155     xUU(j) = xp*xp*xp;
156   });
157 
158   /*
159      Restore vectors
160   */
161   ierr = VecRestoreArrayWrite(F,&FF);CHKERRQ(ierr);
162   ierr = VecRestoreArrayWrite(U,&UU);CHKERRQ(ierr);
163   if (viewinitial) {
164     ierr = VecView(U,NULL);CHKERRQ(ierr);
165     ierr = VecView(F,NULL);CHKERRQ(ierr);
166   }
167 
168   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
169      Evaluate initial guess; then solve nonlinear system
170    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
171 
172   /*
173      Note: The user should initialize the vector, x, with the initial guess
174      for the nonlinear solver prior to calling SNESSolve().  In particular,
175      to employ an initial guess of zero, the user should explicitly set
176      this vector to zero by calling VecSet().
177   */
178   ierr = FormInitialGuess(x);CHKERRQ(ierr);
179   ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr);
180   ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
181   ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr);
182 
183   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
184      Check solution and clean up
185    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
186 
187   /*
188      Check the error
189   */
190   ierr = VecAXPY(x,none,U);CHKERRQ(ierr);
191   ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
192   ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr);
193 
194   /*
195      Free work space.  All PETSc objects should be destroyed when they
196      are no longer needed.
197   */
198   ierr = VecDestroy(&x);CHKERRQ(ierr);
199   ierr = VecDestroy(&r);CHKERRQ(ierr);
200   ierr = VecDestroy(&U);CHKERRQ(ierr);
201   ierr = VecDestroy(&F);CHKERRQ(ierr);
202   ierr = MatDestroy(&J);CHKERRQ(ierr);
203   ierr = SNESDestroy(&snes);CHKERRQ(ierr);
204   ierr = DMDestroy(&ctx.da);CHKERRQ(ierr);
205   ierr = PetscFinalize();
206   return ierr;
207 }
208 
209 /* ------------------------------------------------------------------- */
210 /*
211    FormInitialGuess - Computes initial guess.
212 
213    Input/Output Parameter:
214 .  x - the solution vector
215 */
216 PetscErrorCode FormInitialGuess(Vec x)
217 {
218   PetscErrorCode ierr;
219   PetscScalar    pfive = .50;
220 
221   PetscFunctionBeginUser;
222   ierr = VecSet(x,pfive);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 /* ------------------------------------------------------------------- */
227 /*
228    FormFunction - Evaluates nonlinear function, F(x).
229 
230    Input Parameters:
231 .  snes - the SNES context
232 .  x - input vector
233 .  ctx - optional user-defined context, as set by SNESSetFunction()
234 
235    Output Parameter:
236 .  f - function vector
237 
238    Note:
239    The user-defined context can contain any application-specific
240    data needed for the function evaluation.
241 */
242 PetscErrorCode FormFunction(SNES snes,Vec x,Vec f,void *ctx)
243 {
244   ApplicationCtx *user = (ApplicationCtx*) ctx;
245   DM             da    = user->da;
246   PetscScalar    *xx,*ff,*FF,d;
247   PetscErrorCode ierr;
248   PetscInt       i,M,xs,xm;
249   Vec            xlocal;
250 
251   PetscFunctionBeginUser;
252   ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr);
253   /*
254      Scatter ghost points to local vector, using the 2-step process
255         DMGlobalToLocalBegin(), DMGlobalToLocalEnd().
256      By placing code between these two statements, computations can
257      be done while messages are in transition.
258   */
259   ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
260   ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
261 
262   /*
263      Get pointers to vector data.
264        - The vector xlocal includes ghost point; the vectors x and f do
265          NOT include ghost points.
266        - Using DMDAVecGetArray() allows accessing the values using global ordering
267   */
268   ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr);
269   ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr);
270   ierr = DMDAVecGetArray(da,user->F,&FF);CHKERRQ(ierr);
271 
272   /*
273      Get local grid boundaries (for 1-dimensional DMDA):
274        xs, xm  - starting grid index, width of local grid (no ghost points)
275   */
276   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
277   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
278 
279   /*
280      Set function values for boundary points; define local interior grid point range:
281         xsi - starting interior grid index
282         xei - ending interior grid index
283   */
284   if (xs == 0) { /* left boundary */
285     ff[0] = xx[0];
286     xs++;xm--;
287   }
288   if (xs+xm == M) {  /* right boundary */
289     ff[xs+xm-1] = xx[xs+xm-1] - 1.0;
290     xm--;
291   }
292 
293   /*
294      Compute function over locally owned part of the grid (interior points only)
295   */
296   d = 1.0/(user->h*user->h);
297   for (i=xs; i<xs+xm; i++) ff[i] = d*(xx[i-1] - 2.0*xx[i] + xx[i+1]) + xx[i]*xx[i] - FF[i];
298 
299   /*
300      Restore vectors
301   */
302   ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr);
303   ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr);
304   ierr = DMDAVecRestoreArray(da,user->F,&FF);CHKERRQ(ierr);
305   ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 /* ------------------------------------------------------------------- */
310 /*
311    FormJacobian - Evaluates Jacobian matrix.
312 
313    Input Parameters:
314 .  snes - the SNES context
315 .  x - input vector
316 .  dummy - optional user-defined context (not used here)
317 
318    Output Parameters:
319 .  jac - Jacobian matrix
320 .  B - optionally different preconditioning matrix
321 .  flag - flag indicating matrix structure
322 */
323 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx)
324 {
325   ApplicationCtx *user = (ApplicationCtx*) ctx;
326   PetscScalar    *xx,d,A[3];
327   PetscErrorCode ierr;
328   PetscInt       i,j[3],M,xs,xm;
329   DM             da = user->da;
330 
331   PetscFunctionBeginUser;
332   /*
333      Get pointer to vector data
334   */
335   ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr);
336   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
337 
338   /*
339     Get range of locally owned matrix
340   */
341   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
342 
343   /*
344      Determine starting and ending local indices for interior grid points.
345      Set Jacobian entries for boundary points.
346   */
347 
348   if (xs == 0) {  /* left boundary */
349     i = 0; A[0] = 1.0;
350 
351     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
352     xs++;xm--;
353   }
354   if (xs+xm == M) { /* right boundary */
355     i    = M-1;
356     A[0] = 1.0;
357     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
358     xm--;
359   }
360 
361   /*
362      Interior grid points
363       - Note that in this case we set all elements for a particular
364         row at once.
365   */
366   d = 1.0/(user->h*user->h);
367   for (i=xs; i<xs+xm; i++) {
368     j[0] = i - 1; j[1] = i; j[2] = i + 1;
369     A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i];
370     ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr);
371   }
372 
373   /*
374      Assemble matrix, using the 2-step process:
375        MatAssemblyBegin(), MatAssemblyEnd().
376      By placing code between these two statements, computations can be
377      done while messages are in transition.
378 
379      Also, restore vector.
380   */
381 
382   ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
383   ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr);
384   ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
385 
386   PetscFunctionReturn(0);
387 }
388 
389 /*
390      The first test works for Kokkos wtih OpenMP and PThreads, the second with CUDA.
391 
392      The second test requires -dm_vec_type cuda and -vec_pinned_memory_min 0 because Kokkos
393      assumes all memory it is given is allocated with cudaMallocHost() and can be used with
394      cudaMemcpy() without indicating it the memory is GPU or CPU. Note this is NOT unified memory.
395 
396      The third test requires -dm_mat_type aijcusparse to create the correct vectors for block Jacobi
397 */
398 
399 /*TEST
400 
401    build:
402      requires: kokkos
403 
404    test:
405      requires: kokkos double !complex !single !cuda
406      args: -view_initial -view_kokkos_configuration false -snes_monitor
407      filter: grep -v type:
408 
409    test:
410      suffix: 2
411      requires: kokkos double !complex !single cuda
412      args: -dm_vec_type cuda -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false  -snes_monitor
413      output_file: output/ex3k_1.out
414      filter: grep -v type:
415 
416    test:
417      suffix: 3
418      TODO: broken
419      requires: kokkos double !complex !single cuda
420      nsize: 2
421      args: -dm_vec_type cuda -dm_mat_type aijcusparse -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false  -snes_monitor
422      output_file: output/ex3k_1.out
423      filter: grep -v type:
424 
425 TEST*/
426