xref: /petsc/src/snes/tutorials/ex3k.kokkos.cxx (revision efbe7e8a80d07327753dbe0b33efee01e046af3f)
1 
2 
3 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n";
4 
5 /*
6     This is a simplied version of ex3.c except it uses Kokkos to set the initial conditions
7 */
8 
9 /*
10    Include "petscdm.h" so that we can use data management objects (DMs)
11    Include "petscdmda.h" so that we can use distributed arrays (DMDAs).
12    Include "petscsnes.h" so that we can use SNES solvers.  Note that this
13 */
14 
15 #include <petscdm.h>
16 #include <petscdmda.h>
17 #include <petscsnes.h>
18 
19 /*
20     Include Kokkos files
21 
22 */
23 #include <Kokkos_Core.hpp>
24 #include <Kokkos_OffsetView.hpp>
25 
26 /*
27    Application-defined routines.
28 */
29 PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*);
30 PetscErrorCode FormFunction(SNES,Vec,Vec,void*);
31 PetscErrorCode FormInitialGuess(Vec);
32 
33 /*
34    User-defined application context
35 */
36 typedef struct {
37   DM          da;      /* distributed array */
38   Vec         F;       /* right-hand-side of PDE */
39   PetscReal   h;       /* mesh spacing */
40 } ApplicationCtx;
41 
42 int main(int argc,char **argv)
43 {
44   SNES           snes;                 /* SNES context */
45   Mat            J;                    /* Jacobian matrix */
46   ApplicationCtx ctx;                  /* user-defined context */
47   Vec            x,r,U,F;              /* vectors */
48   PetscScalar    *FF,*UU,none = -1.0;
49   PetscErrorCode ierr;
50   PetscInt       its,N = 5,maxit,maxf,xs,xm;
51   PetscReal      abstol,rtol,stol,norm;
52   PetscBool      viewinitial = PETSC_FALSE;
53   PetscBool      view_kokkos_configuration = PETSC_TRUE;
54 
55 
56   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
57   ierr  = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr);
58   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_kokkos_configuration",&view_kokkos_configuration,NULL);CHKERRQ(ierr);
59   ctx.h = 1.0/(N-1);
60   ierr  = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr);
61 
62   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
63      Create nonlinear solver context
64      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
65 
66   ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr);
67 
68   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
69      Create vector data structures; set function evaluation routine
70      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
71 
72   /*
73      Create distributed array (DMDA) to manage parallel grid and vectors
74   */
75   ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr);
76   ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr);
77   ierr = DMSetUp(ctx.da);CHKERRQ(ierr);
78 
79   /*
80      Extract global and local vectors from DMDA; then duplicate for remaining
81      vectors that are the same types
82   */
83   ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr);
84   ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr);
85   ierr = VecDuplicate(x,&r);CHKERRQ(ierr);
86   ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F;
87   ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr);
88   ierr = VecDuplicate(x,&U);CHKERRQ(ierr);
89   ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr);
90 
91   /*
92      Set function evaluation routine and vector.  Whenever the nonlinear
93      solver needs to compute the nonlinear function, it will call this
94      routine.
95       - Note that the final routine argument is the user-defined
96         context that provides application-specific data for the
97         function evaluation routine.
98   */
99   ierr = SNESSetFunction(snes,r,FormFunction,&ctx);CHKERRQ(ierr);
100 
101   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
102      Create matrix data structure; set Jacobian evaluation routine
103      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
104 
105   ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr);
106 
107   /*
108      Set Jacobian matrix data structure and default Jacobian evaluation
109      routine.  Whenever the nonlinear solver needs to compute the
110      Jacobian matrix, it will call this routine.
111       - Note that the final routine argument is the user-defined
112         context that provides application-specific data for the
113         Jacobian evaluation routine.
114   */
115   ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr);
116   ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
117   ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr);
118   ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr);
119 
120   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
121      Initialize application:
122      Store forcing function of PDE and exact solution
123    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
124 
125   /*
126      Get local grid boundaries (for 1-dimensional DMDA):
127        xs, xm - starting grid index, width of local grid (no ghost points)
128   */
129   ierr = DMDAGetCorners(ctx.da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
130 
131   /*
132      Get pointers to vector data
133   */
134   ierr = VecGetArrayWrite(F,&FF);CHKERRQ(ierr);
135   ierr = VecGetArrayWrite(U,&UU);CHKERRQ(ierr);
136 
137   /* -------------------------------------------------------------------------------------------------- */
138   /*  ./configure --download-kokkos --download-hwloc [one of --with-openmp --with-pthread --with-cuda] */
139   /*  export OMP_PROC_BIND="threads"  export OMP_PROC_BIND="spread" */
140 
141   if (!getenv("KOKKOS_NUM_THREADS")) setenv("KOKKOS_NUM_THREADS","4",1);
142 
143   if (view_kokkos_configuration) {
144     Kokkos::print_configuration(std::cout, true);
145   }
146 
147   /* introduce a view object; reference like object  */
148   Kokkos::Experimental::OffsetView<PetscScalar*> xFF(Kokkos::View<PetscScalar*>(FF,xm),{xs}), xUU(Kokkos::View<PetscScalar*>(UU,xm),{xs});
149 
150   PetscReal xpbase = xs*ctx.h;
151   Kokkos:: parallel_for (Kokkos::RangePolicy<> (xs,xm), KOKKOS_LAMBDA ( int j) {
152     PetscReal xp = xpbase + j*ctx.h;
153     xFF(j) = 6.0*xp + PetscPowScalar(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */
154     xUU(j) = xp*xp*xp;
155   });
156 
157   /*
158      Restore vectors
159   */
160   ierr = VecRestoreArrayWrite(F,&FF);CHKERRQ(ierr);
161   ierr = VecRestoreArrayWrite(U,&UU);CHKERRQ(ierr);
162   if (viewinitial) {
163     ierr = VecView(U,NULL);CHKERRQ(ierr);
164     ierr = VecView(F,NULL);CHKERRQ(ierr);
165   }
166 
167   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
168      Evaluate initial guess; then solve nonlinear system
169    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
170 
171   /*
172      Note: The user should initialize the vector, x, with the initial guess
173      for the nonlinear solver prior to calling SNESSolve().  In particular,
174      to employ an initial guess of zero, the user should explicitly set
175      this vector to zero by calling VecSet().
176   */
177   ierr = FormInitialGuess(x);CHKERRQ(ierr);
178   ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr);
179   ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr);
180   ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr);
181 
182   /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
183      Check solution and clean up
184    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
185 
186   /*
187      Check the error
188   */
189   ierr = VecAXPY(x,none,U);CHKERRQ(ierr);
190   ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr);
191   ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr);
192 
193   /*
194      Free work space.  All PETSc objects should be destroyed when they
195      are no longer needed.
196   */
197   ierr = VecDestroy(&x);CHKERRQ(ierr);
198   ierr = VecDestroy(&r);CHKERRQ(ierr);
199   ierr = VecDestroy(&U);CHKERRQ(ierr);
200   ierr = VecDestroy(&F);CHKERRQ(ierr);
201   ierr = MatDestroy(&J);CHKERRQ(ierr);
202   ierr = SNESDestroy(&snes);CHKERRQ(ierr);
203   ierr = DMDestroy(&ctx.da);CHKERRQ(ierr);
204   ierr = PetscFinalize();
205   return ierr;
206 }
207 
208 /* ------------------------------------------------------------------- */
209 /*
210    FormInitialGuess - Computes initial guess.
211 
212    Input/Output Parameter:
213 .  x - the solution vector
214 */
215 PetscErrorCode FormInitialGuess(Vec x)
216 {
217   PetscErrorCode ierr;
218   PetscScalar    pfive = .50;
219 
220   PetscFunctionBeginUser;
221   ierr = VecSet(x,pfive);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 /* ------------------------------------------------------------------- */
226 /*
227    FormFunction - Evaluates nonlinear function, F(x).
228 
229    Input Parameters:
230 .  snes - the SNES context
231 .  x - input vector
232 .  ctx - optional user-defined context, as set by SNESSetFunction()
233 
234    Output Parameter:
235 .  f - function vector
236 
237    Note:
238    The user-defined context can contain any application-specific
239    data needed for the function evaluation.
240 */
241 PetscErrorCode FormFunction(SNES snes,Vec x,Vec f,void *ctx)
242 {
243   ApplicationCtx *user = (ApplicationCtx*) ctx;
244   DM             da    = user->da;
245   PetscScalar    *xx,*ff,*FF,d;
246   PetscErrorCode ierr;
247   PetscInt       i,M,xs,xm;
248   Vec            xlocal;
249 
250   PetscFunctionBeginUser;
251   ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr);
252   /*
253      Scatter ghost points to local vector, using the 2-step process
254         DMGlobalToLocalBegin(), DMGlobalToLocalEnd().
255      By placing code between these two statements, computations can
256      be done while messages are in transition.
257   */
258   ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
259   ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
260 
261   /*
262      Get pointers to vector data.
263        - The vector xlocal includes ghost point; the vectors x and f do
264          NOT include ghost points.
265        - Using DMDAVecGetArray() allows accessing the values using global ordering
266   */
267   ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr);
268   ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr);
269   ierr = DMDAVecGetArray(da,user->F,&FF);CHKERRQ(ierr);
270 
271   /*
272      Get local grid boundaries (for 1-dimensional DMDA):
273        xs, xm  - starting grid index, width of local grid (no ghost points)
274   */
275   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
276   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
277 
278   /*
279      Set function values for boundary points; define local interior grid point range:
280         xsi - starting interior grid index
281         xei - ending interior grid index
282   */
283   if (xs == 0) { /* left boundary */
284     ff[0] = xx[0];
285     xs++;xm--;
286   }
287   if (xs+xm == M) {  /* right boundary */
288     ff[xs+xm-1] = xx[xs+xm-1] - 1.0;
289     xm--;
290   }
291 
292   /*
293      Compute function over locally owned part of the grid (interior points only)
294   */
295   d = 1.0/(user->h*user->h);
296   for (i=xs; i<xs+xm; i++) ff[i] = d*(xx[i-1] - 2.0*xx[i] + xx[i+1]) + xx[i]*xx[i] - FF[i];
297 
298   /*
299      Restore vectors
300   */
301   ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr);
302   ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr);
303   ierr = DMDAVecRestoreArray(da,user->F,&FF);CHKERRQ(ierr);
304   ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 /* ------------------------------------------------------------------- */
309 /*
310    FormJacobian - Evaluates Jacobian matrix.
311 
312    Input Parameters:
313 .  snes - the SNES context
314 .  x - input vector
315 .  dummy - optional user-defined context (not used here)
316 
317    Output Parameters:
318 .  jac - Jacobian matrix
319 .  B - optionally different preconditioning matrix
320 .  flag - flag indicating matrix structure
321 */
322 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx)
323 {
324   ApplicationCtx *user = (ApplicationCtx*) ctx;
325   PetscScalar    *xx,d,A[3];
326   PetscErrorCode ierr;
327   PetscInt       i,j[3],M,xs,xm;
328   DM             da = user->da;
329 
330   PetscFunctionBeginUser;
331   /*
332      Get pointer to vector data
333   */
334   ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr);
335   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
336 
337   /*
338     Get range of locally owned matrix
339   */
340   ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr);
341 
342   /*
343      Determine starting and ending local indices for interior grid points.
344      Set Jacobian entries for boundary points.
345   */
346 
347   if (xs == 0) {  /* left boundary */
348     i = 0; A[0] = 1.0;
349 
350     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
351     xs++;xm--;
352   }
353   if (xs+xm == M) { /* right boundary */
354     i    = M-1;
355     A[0] = 1.0;
356     ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr);
357     xm--;
358   }
359 
360   /*
361      Interior grid points
362       - Note that in this case we set all elements for a particular
363         row at once.
364   */
365   d = 1.0/(user->h*user->h);
366   for (i=xs; i<xs+xm; i++) {
367     j[0] = i - 1; j[1] = i; j[2] = i + 1;
368     A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i];
369     ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr);
370   }
371 
372   /*
373      Assemble matrix, using the 2-step process:
374        MatAssemblyBegin(), MatAssemblyEnd().
375      By placing code between these two statements, computations can be
376      done while messages are in transition.
377 
378      Also, restore vector.
379   */
380 
381   ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
382   ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr);
383   ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
384 
385   PetscFunctionReturn(0);
386 }
387 
388 /*
389      The first test works for Kokkos wtih OpenMP and PThreads, the second with CUDA.
390 
391      The second test requires -dm_vec_type cuda and -vec_pinned_memory_min 0 because Kokkos
392      assumes all memory it is given is allocated with cudaMallocHost() and can be used with
393      cudaMemcpy() without indicating it the memory is GPU or CPU. Note this is NOT unified memory.
394 
395      The third test requires -dm_mat_type aijcusparse to create the correct vectors for block Jacobi
396 */
397 
398 /*TEST
399 
400    build:
401      requires: kokkos
402 
403    test:
404      requires: kokkos double !complex !single !cuda
405      args: -view_initial -view_kokkos_configuration false -snes_monitor
406      filter: grep -v type:
407 
408    test:
409      suffix: 2
410      requires: kokkos double !complex !single cuda
411      args: -dm_vec_type cuda -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false  -snes_monitor
412      output_file: output/ex3k_1.out
413      filter: grep -v type:
414 
415    test:
416      suffix: 3
417      requires: kokkos double !complex !single define(PETSC_HAVE_cusparseCreateSolveAnalysisInfo) cuda
418      nsize: 2
419      args: -dm_vec_type cuda -dm_mat_type aijcusparse -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false  -snes_monitor
420      output_file: output/ex3k_1.out
421      filter: grep -v type:
422 
423 TEST*/
424