xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision 066ea43f7f75752f012be6cd06b6107ebe84cc6d)
1 
2 
3 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
4 
5 /*
6     This is a simplied version of ex3.c except it uses Kokkos to set the initial conditions
7 */
8 
9 /*
10    Include "petscdm.h" so that we can use data management objects (DMs)
11    Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
12    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
13 */
14 
15 #include <petscdm.h>
16 #include <petscdmda.h>
17 #include <petscsnes.h>
18 
19 /*
20     Include Kokkos files
21 
22 */
23 #include <Kokkos_Core.hpp>
24 #include <Kokkos_OffsetView.hpp>
25 
26 /*
27    Application-defined routines.
28 */
29 PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*);
30 PetscErrorCode FormFunction(SNES,Vec,Vec,void*);
31 PetscErrorCode FormInitialGuess(Vec);
32 
33 /*
34    User-defined application context
35 */
36 typedef struct {
37   DM          da;      /* distributed array */
38   Vec         F;       /* right-hand-side of PDE */
39   PetscReal   h;       /* mesh spacing */
40 } ApplicationCtx;
41 
42 int main(int argc,char **argv)
43 {
44   SNES           snes;                 /* SNES context */
45   Mat            J;                    /* Jacobian matrix */
46   ApplicationCtx ctx;                  /* user-defined context */
47   Vec            x,r,U,F;              /* vectors */
48   PetscScalar    *FF,*UU,none = -1.0;
49   PetscErrorCode ierr;
50   PetscInt       its,N = 5,maxit,maxf,xs,xm;
51   PetscReal      abstol,rtol,stol,norm;
52   PetscBool      viewinitial = PETSC_FALSE;
53   PetscBool      view_kokkos_configuration = PETSC_TRUE;
54 
55 
56   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
57   ierr  = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr);
58   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_kokkos_configuration",&view_kokkos_configuration,NULL);CHKERRQ(ierr);
59   ctx.h = 1.0/(N-1);
60   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr);
61 
62   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
63      Create nonlinear solver context
64      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
65 
66   ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr);
67 
68   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
69      Create vector data structures; set function evaluation routine
70      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
71 
72   /*
73      Create distributed array (DMDA) to manage parallel grid and vectors
74   */
75   ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr);
76   ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr);
77   ierr = DMSetUp(ctx.da);CHKERRQ(ierr);
78 
79   /*
80      Extract global and local vectors from DMDA; then duplicate for remaining
81      vectors that are the same types
82   */
83   ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr);
84   ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr);
85   ierr = VecDuplicate(x,&r);CHKERRQ(ierr);
86   ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F;
87   ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr);
88   ierr = VecDuplicate(x,&U);CHKERRQ(ierr);
89   ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr);
90 
91   /*
92      Set function evaluation routine and vector.  Whenever the nonlinear
93      solver needs to compute the nonlinear function, it will call this
94      routine.
95       - Note that the final routine argument is the user-defined
96         context that provides application-specific data for the
97         function evaluation routine.
98   */
99   ierr = SNESSetFunction(snes,r,FormFunction,&ctx);CHKERRQ(ierr);
100 
101   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
102      Create matrix data structure; set Jacobian evaluation routine
103      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
104 
105   ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr);
106 
107   /*
108      Set Jacobian matrix data structure and default Jacobian evaluation
109      routine.  Whenever the nonlinear solver needs to compute the
110      Jacobian matrix, it will call this routine.
111       - Note that the final routine argument is the user-defined
112         context that provides application-specific data for the
113         Jacobian evaluation routine.
114   */
115   ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr);
116   ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
117   ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr);
118   ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr);
119 
120   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
121      Initialize application:
122      Store forcing function of PDE and exact solution
123    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
124 
125   /*
126      Get local grid boundaries (for 1-dimensional DMDA):
127        xs, xm - starting grid index, width of local grid (no ghost points)
128   */
129   ierr = DMDAGetCorners(ctx.da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
130 
131   /*
132      Get pointers to vector data
133   */
134   ierr = VecGetArrayWrite(F,&FF);CHKERRQ(ierr);
135   ierr = VecGetArrayWrite(U,&UU);CHKERRQ(ierr);
136 
137   /* -------------------------------------------------------------------------------------------------- */
138   /*  ./configure --download-kokkos --download-hwloc [one of --with-openmp --with-pthread --with-cuda] */
139   /*  export OMP_PROC_BIND="threads"  export OMP_PROC_BIND="spread" */
140 
141   if (!getenv("KOKKOS_NUM_THREADS")) setenv("KOKKOS_NUM_THREADS","4",1);
142   if (!view_kokkos_configuration) {
143     /* use private routine to turn off warnings about negative number of cores etc */
144     Kokkos::Impl::pre_initialize(Kokkos::InitArguments(-1, -1, -1, true));
145   }
146   Kokkos::initialize( argc, argv );
147   if (view_kokkos_configuration) {
148     Kokkos::print_configuration(std::cout, true);
149   }
150 
151   /* introduce a view object; reference like object  */
152   Kokkos::Experimental::OffsetView<PetscScalar*> xFF(Kokkos::View<PetscScalar*>(FF,xm),{xs}), xUU(Kokkos::View<PetscScalar*>(UU,xm),{xs});
153 
154   PetscReal xpbase = xs*ctx.h;
155   Kokkos:: parallel_for(Kokkos::RangePolicy<> (xs,xm), KOKKOS_LAMBDA ( int j ) {
156     PetscReal xp = xpbase + j*ctx.h;
157     xFF(j) = 6.0*xp + PetscPowScalar(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */
158     xUU(j) = xp*xp*xp;
159   });
160 
161   Kokkos::finalize();
162 
163   /*
164      Restore vectors
165   */
166   ierr = VecRestoreArrayWrite(F,&FF);CHKERRQ(ierr);
167   ierr = VecRestoreArrayWrite(U,&UU);CHKERRQ(ierr);
168   if (viewinitial) {
169     ierr = VecView(U,NULL);CHKERRQ(ierr);
170     ierr = VecView(F,NULL);CHKERRQ(ierr);
171   }
172 
173   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
174      Evaluate initial guess; then solve nonlinear system
175    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
176 
177   /*
178      Note: The user should initialize the vector, x, with the initial guess
179      for the nonlinear solver prior to calling SNESSolve().  In particular,
180      to employ an initial guess of zero, the user should explicitly set
181      this vector to zero by calling VecSet().
182   */
183   ierr = FormInitialGuess(x);CHKERRQ(ierr);
184   ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr);
185   ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
186   ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr);
187 
188   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
189      Check solution and clean up
190    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
191 
192   /*
193      Check the error
194   */
195   ierr = VecAXPY(x,none,U);CHKERRQ(ierr);
196   ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
197   ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr);
198 
199   /*
200      Free work space.  All PETSc objects should be destroyed when they
201      are no longer needed.
202   */
203   ierr = VecDestroy(&x);CHKERRQ(ierr);
204   ierr = VecDestroy(&r);CHKERRQ(ierr);
205   ierr = VecDestroy(&U);CHKERRQ(ierr);
206   ierr = VecDestroy(&F);CHKERRQ(ierr);
207   ierr = MatDestroy(&J);CHKERRQ(ierr);
208   ierr = SNESDestroy(&snes);CHKERRQ(ierr);
209   ierr = DMDestroy(&ctx.da);CHKERRQ(ierr);
210   ierr = PetscFinalize();
211   return ierr;
212 }
213 
214 /* ------------------------------------------------------------------- */
215 /*
216    FormInitialGuess - Computes initial guess.
217 
218    Input/Output Parameter:
219 .  x - the solution vector
220 */
221 PetscErrorCode FormInitialGuess(Vec x)
222 {
223   PetscErrorCode ierr;
224   PetscScalar    pfive = .50;
225 
226   PetscFunctionBeginUser;
227   ierr = VecSet(x,pfive);CHKERRQ(ierr);
228   PetscFunctionReturn(0);
229 }
230 
231 /* ------------------------------------------------------------------- */
232 /*
233    FormFunction - Evaluates nonlinear function, F(x).
234 
235    Input Parameters:
236 .  snes - the SNES context
237 .  x - input vector
238 .  ctx - optional user-defined context, as set by SNESSetFunction()
239 
240    Output Parameter:
241 .  f - function vector
242 
243    Note:
244    The user-defined context can contain any application-specific
245    data needed for the function evaluation.
246 */
247 PetscErrorCode FormFunction(SNES snes,Vec x,Vec f,void *ctx)
248 {
249   ApplicationCtx *user = (ApplicationCtx*) ctx;
250   DM             da    = user->da;
251   PetscScalar    *xx,*ff,*FF,d;
252   PetscErrorCode ierr;
253   PetscInt       i,M,xs,xm;
254   Vec            xlocal;
255 
256   PetscFunctionBeginUser;
257   ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr);
258   /*
259      Scatter ghost points to local vector, using the 2-step process
260         DMGlobalToLocalBegin(), DMGlobalToLocalEnd().
261      By placing code between these two statements, computations can
262      be done while messages are in transition.
263   */
264   ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
265   ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
266 
267   /*
268      Get pointers to vector data.
269        - The vector xlocal includes ghost point; the vectors x and f do
270          NOT include ghost points.
271        - Using DMDAVecGetArray() allows accessing the values using global ordering
272   */
273   ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr);
274   ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr);
275   ierr = DMDAVecGetArray(da,user->F,&FF);CHKERRQ(ierr);
276 
277   /*
278      Get local grid boundaries (for 1-dimensional DMDA):
279        xs, xm  - starting grid index, width of local grid (no ghost points)
280   */
281   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
282   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
283 
284   /*
285      Set function values for boundary points; define local interior grid point range:
286         xsi - starting interior grid index
287         xei - ending interior grid index
288   */
289   if (xs == 0) { /* left boundary */
290     ff[0] = xx[0];
291     xs++;xm--;
292   }
293   if (xs+xm == M) {  /* right boundary */
294     ff[xs+xm-1] = xx[xs+xm-1] - 1.0;
295     xm--;
296   }
297 
298   /*
299      Compute function over locally owned part of the grid (interior points only)
300   */
301   d = 1.0/(user->h*user->h);
302   for (i=xs; i<xs+xm; i++) ff[i] = d*(xx[i-1] - 2.0*xx[i] + xx[i+1]) + xx[i]*xx[i] - FF[i];
303 
304   /*
305      Restore vectors
306   */
307   ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr);
308   ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr);
309   ierr = DMDAVecRestoreArray(da,user->F,&FF);CHKERRQ(ierr);
310   ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr);
311   PetscFunctionReturn(0);
312 }
313 
314 /* ------------------------------------------------------------------- */
315 /*
316    FormJacobian - Evaluates Jacobian matrix.
317 
318    Input Parameters:
319 .  snes - the SNES context
320 .  x - input vector
321 .  dummy - optional user-defined context (not used here)
322 
323    Output Parameters:
324 .  jac - Jacobian matrix
325 .  B - optionally different preconditioning matrix
326 .  flag - flag indicating matrix structure
327 */
328 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx)
329 {
330   ApplicationCtx *user = (ApplicationCtx*) ctx;
331   PetscScalar    *xx,d,A[3];
332   PetscErrorCode ierr;
333   PetscInt       i,j[3],M,xs,xm;
334   DM             da = user->da;
335 
336   PetscFunctionBeginUser;
337   /*
338      Get pointer to vector data
339   */
340   ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr);
341   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
342 
343   /*
344     Get range of locally owned matrix
345   */
346   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
347 
348   /*
349      Determine starting and ending local indices for interior grid points.
350      Set Jacobian entries for boundary points.
351   */
352 
353   if (xs == 0) {  /* left boundary */
354     i = 0; A[0] = 1.0;
355 
356     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
357     xs++;xm--;
358   }
359   if (xs+xm == M) { /* right boundary */
360     i    = M-1;
361     A[0] = 1.0;
362     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
363     xm--;
364   }
365 
366   /*
367      Interior grid points
368       - Note that in this case we set all elements for a particular
369         row at once.
370   */
371   d = 1.0/(user->h*user->h);
372   for (i=xs; i<xs+xm; i++) {
373     j[0] = i - 1; j[1] = i; j[2] = i + 1;
374     A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i];
375     ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr);
376   }
377 
378   /*
379      Assemble matrix, using the 2-step process:
380        MatAssemblyBegin(), MatAssemblyEnd().
381      By placing code between these two statements, computations can be
382      done while messages are in transition.
383 
384      Also, restore vector.
385   */
386 
387   ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
388   ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr);
389   ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
390 
391   PetscFunctionReturn(0);
392 }
393 
394 /*
395      The first test works for Kokkos wtih OpenMP and PThreads, the second with CUDA.
396 
397      The second test requires -dm_vec_type cuda and -vec_pinned_memory_min 0 because Kokkos
398      assumes all memory it is given is allocated with cudaMallocHost() and can be used with
399      cudaMemcpy() without indicating it the memory is GPU or CPU. Note this is NOT unified memory.
400 
401      The third test requires -dm_mat_type aijcusparse to create the correct vectors for block Jacobi
402 */
403 
404 /*TEST
405 
406    build:
407      requires: kokkos
408 
409    test:
410      requires: kokkos double !complex !single !cuda
411      args: -view_initial -view_kokkos_configuration false -snes_monitor
412      filter: grep -v type:
413 
414    test:
415      suffix: 2
416      requires: kokkos double !complex !single cuda
417      args: -dm_vec_type cuda -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false  -snes_monitor
418      output_file: output/ex3k_1.out
419      filter: grep -v type:
420 
421    test:
422      suffix: 3
423      requires: kokkos double !complex !single defined(PETSC_HAVE_cusparseCreateSolveAnalysisInfo) cuda
424      nsize: 2
425      args: -dm_vec_type cuda -dm_mat_type aijcusparse -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false  -snes_monitor
426      output_file: output/ex3k_1.out
427      filter: grep -v type:
428 
429 TEST*/
430