xref: /petsc/src/snes/tutorials/ex47cu.cu (revision ebead697dbf761eb322f829370bbe90b3bd93fa3)
1 static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
2 /*
3    Same as ex47.c except it also uses the GPU to evaluate the function
4 */
5 
6 #include <petscdm.h>
7 #include <petscdmda.h>
8 #include <petscsnes.h>
9 
10 #include <thrust/device_ptr.h>
11 #include <thrust/for_each.h>
12 #include <thrust/tuple.h>
13 #include <thrust/iterator/constant_iterator.h>
14 #include <thrust/iterator/counting_iterator.h>
15 #include <thrust/iterator/zip_iterator.h>
16 
17 extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*);
18 PetscBool useCUDA = PETSC_FALSE;
19 
20 int main(int argc,char **argv)
21 {
22   SNES           snes;
23   Vec            x,f;
24   Mat            J;
25   DM             da;
26   char           *tmp,typeName[256];
27   PetscBool      flg;
28 
29   PetscFunctionBeginUser;
30   PetscCall(PetscInitialize(&argc,&argv,(char*)0,help));
31   PetscCall(PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,sizeof(typeName),&flg));
32   if (flg) {
33     PetscCall(PetscStrstr(typeName,"cuda",&tmp));
34     if (tmp) useCUDA = PETSC_TRUE;
35   }
36 
37   PetscCall(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da));
38   PetscCall(DMSetFromOptions(da));
39   PetscCall(DMSetUp(da));
40   PetscCall(DMCreateGlobalVector(da,&x); VecDuplicate(x,&f));
41   PetscCall(DMCreateMatrix(da,&J));
42 
43   PetscCall(SNESCreate(PETSC_COMM_WORLD,&snes));
44   PetscCall(SNESSetFunction(snes,f,ComputeFunction,da));
45   PetscCall(SNESSetJacobian(snes,J,J,ComputeJacobian,da));
46   PetscCall(SNESSetFromOptions(snes));
47   PetscCall(SNESSolve(snes,NULL,x));
48 
49   PetscCall(MatDestroy(&J));
50   PetscCall(VecDestroy(&x));
51   PetscCall(VecDestroy(&f));
52   PetscCall(SNESDestroy(&snes));
53   PetscCall(DMDestroy(&da));
54 
55   PetscCall(PetscFinalize());
56   return 0;
57 }
58 
59 struct ApplyStencil
60 {
61   template <typename Tuple>
62   __host__ __device__
63   void operator()(Tuple t)
64   {
65     /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
66     thrust::get<0>(t) = 1;
67     if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
68       thrust::get<0>(t) = (((PetscScalar)2.0)*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
69     } else if (thrust::get<4>(t) == 0) {
70       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
71     } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
72       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
73     }
74   }
75 };
76 
77 PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
78 {
79   PetscInt          i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize;
80   PetscScalar       *xx,*ff,hx;
81   DM                da = (DM) ctx;
82   Vec               xlocal;
83   PetscMPIInt       rank,size;
84   MPI_Comm          comm;
85   PetscScalar const *xarray;
86   PetscScalar       *farray;
87 
88   PetscCall(DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE));
89   hx   = 1.0/(PetscReal)(Mx-1);
90   PetscCall(DMGetLocalVector(da,&xlocal));
91   PetscCall(DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal));
92   PetscCall(DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal));
93 
94   if (useCUDA) {
95     PetscCall(PetscObjectGetComm((PetscObject)da,&comm));
96     PetscCallMPI(MPI_Comm_size(comm,&size));
97     PetscCallMPI(MPI_Comm_rank(comm,&rank));
98     PetscCall(VecCUDAGetArrayRead(xlocal,&xarray));
99     PetscCall(VecCUDAGetArrayWrite(f,&farray));
100     if (rank) xstartshift = 1;
101     else xstartshift = 0;
102     if (rank != size-1) xendshift = 1;
103     else xendshift = 0;
104     PetscCall(VecGetOwnershipRange(f,&fstart,NULL));
105     PetscCall(VecGetLocalSize(x,&lsize));
106     try {
107       thrust::for_each(
108         thrust::make_zip_iterator(
109           thrust::make_tuple(
110             thrust::device_ptr<PetscScalar>(farray),
111             thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
112             thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
113             thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
114             thrust::counting_iterator<int>(fstart),
115             thrust::constant_iterator<int>(Mx),
116             thrust::constant_iterator<PetscScalar>(hx))),
117         thrust::make_zip_iterator(
118           thrust::make_tuple(
119             thrust::device_ptr<PetscScalar>(farray + lsize),
120             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
121             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
122             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
123             thrust::counting_iterator<int>(fstart) + lsize,
124             thrust::constant_iterator<int>(Mx),
125             thrust::constant_iterator<PetscScalar>(hx))),
126         ApplyStencil());
127     }
128     catch (char *all) {
129       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n"));
130     }
131     PetscCall(VecCUDARestoreArrayRead(xlocal,&xarray));
132     PetscCall(VecCUDARestoreArrayWrite(f,&farray));
133   } else {
134     PetscCall(DMDAVecGetArray(da,xlocal,&xx));
135     PetscCall(DMDAVecGetArray(da,f,&ff));
136     PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL));
137 
138     for (i=xs; i<xs+xm; i++) {
139       if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
140       else ff[i] =  (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
141     }
142     PetscCall(DMDAVecRestoreArray(da,xlocal,&xx));
143     PetscCall(DMDAVecRestoreArray(da,f,&ff));
144   }
145   PetscCall(DMRestoreLocalVector(da,&xlocal));
146   return 0;
147 
148 }
149 PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx)
150 {
151   DM             da = (DM) ctx;
152   PetscInt       i,Mx,xm,xs;
153   PetscScalar    hx,*xx;
154   Vec            xlocal;
155 
156   PetscCall(DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE));
157   hx   = 1.0/(PetscReal)(Mx-1);
158   PetscCall(DMGetLocalVector(da,&xlocal));
159   PetscCall(DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal));
160   PetscCall(DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal));
161   PetscCall(DMDAVecGetArray(da,xlocal,&xx));
162   PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL));
163 
164   for (i=xs; i<xs+xm; i++) {
165     if (i == 0 || i == Mx-1) {
166       PetscCall(MatSetValue(J,i,i,1.0/hx,INSERT_VALUES));
167     } else {
168       PetscCall(MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES));
169       PetscCall(MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES));
170       PetscCall(MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES));
171     }
172   }
173   PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY));
174   PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY));
175   PetscCall(DMDAVecRestoreArray(da,xlocal,&xx));
176   PetscCall(DMRestoreLocalVector(da,&xlocal));
177   return 0;
178 }
179 
180 /*TEST
181 
182    build:
183       requires: cuda
184 
185    testset:
186       args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
187       output_file: output/ex47cu_1.out
188       test:
189         suffix: 1
190         nsize:  1
191       test:
192         suffix: 2
193         nsize:  2
194 
195 TEST*/
196