xref: /petsc/src/snes/tutorials/ex47cu.cu (revision f0fc11cebb1bb284829732915f9e84cabc170c2f)
1 static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
2 /*
3    Same as ex47.c except it also uses the GPU to evaluate the function
4 */
5 
6 #include <petscdm.h>
7 #include <petscdmda.h>
8 #include <petscsnes.h>
9 
10 #include <thrust/device_ptr.h>
11 #include <thrust/for_each.h>
12 #include <thrust/tuple.h>
13 #include <thrust/iterator/constant_iterator.h>
14 #include <thrust/iterator/counting_iterator.h>
15 #include <thrust/iterator/zip_iterator.h>
16 
17 extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*);
18 PetscBool useCUDA = PETSC_FALSE;
19 
20 int main(int argc,char **argv)
21 {
22   SNES           snes;
23   Vec            x,f;
24   Mat            J;
25   DM             da;
26   PetscErrorCode ierr;
27   char           *tmp,typeName[256];
28   PetscBool      flg;
29 
30   ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
31   ierr = PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,256,&flg);CHKERRQ(ierr);
32   if (flg) {
33     ierr = PetscStrstr(typeName,"cuda",&tmp);CHKERRQ(ierr);
34     if (tmp) useCUDA = PETSC_TRUE;
35   }
36 
37   ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da);CHKERRQ(ierr);
38   ierr = DMSetFromOptions(da);CHKERRQ(ierr);
39   ierr = DMSetUp(da);CHKERRQ(ierr);
40   ierr = DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);CHKERRQ(ierr);
41   ierr = DMSetMatType(da,MATAIJ);CHKERRQ(ierr);
42   ierr = DMCreateMatrix(da,&J);CHKERRQ(ierr);
43 
44   ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr);
45   ierr = SNESSetFunction(snes,f,ComputeFunction,da);CHKERRQ(ierr);
46   ierr = SNESSetJacobian(snes,J,J,ComputeJacobian,da);CHKERRQ(ierr);
47   ierr = SNESSetFromOptions(snes);CHKERRQ(ierr);
48   ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr);
49 
50   ierr = MatDestroy(&J);CHKERRQ(ierr);
51   ierr = VecDestroy(&x);CHKERRQ(ierr);
52   ierr = VecDestroy(&f);CHKERRQ(ierr);
53   ierr = SNESDestroy(&snes);CHKERRQ(ierr);
54   ierr = DMDestroy(&da);CHKERRQ(ierr);
55 
56   ierr = PetscFinalize();
57   return ierr;
58 }
59 
60 struct ApplyStencil
61 {
62   template <typename Tuple>
63   __host__ __device__
64   void operator()(Tuple t)
65   {
66     /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
67     thrust::get<0>(t) = 1;
68     if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) {
69       thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t));
70     } else if (thrust::get<4>(t) == 0) {
71       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
72     } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) {
73       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
74     }
75   }
76 };
77 
78 PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx)
79 {
80   PetscInt          i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize;
81   PetscScalar       *xx,*ff,hx;
82   DM                da = (DM) ctx;
83   Vec               xlocal;
84   PetscErrorCode    ierr;
85   PetscMPIInt       rank,size;
86   MPI_Comm          comm;
87   PetscScalar const *xarray;
88   PetscScalar       *farray;
89 
90   ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr);
91   hx   = 1.0/(PetscReal)(Mx-1);
92   ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr);
93   ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
94   ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
95 
96   if (useCUDA) {
97     ierr = VecCUDAGetArrayRead(xlocal,&xarray);CHKERRQ(ierr);
98     ierr = VecCUDAGetArrayWrite(f,&farray);CHKERRQ(ierr);
99     ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr);
100     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
101     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
102     if (rank) xstartshift = 1;
103     else xstartshift = 0;
104     if (rank != size-1) xendshift = 1;
105     else xendshift = 0;
106     ierr = VecGetOwnershipRange(f,&fstart,NULL);CHKERRQ(ierr);
107     ierr = VecGetLocalSize(x,&lsize);CHKERRQ(ierr);
108     try {
109       thrust::for_each(
110         thrust::make_zip_iterator(
111           thrust::make_tuple(
112             thrust::device_ptr<PetscScalar>(farray),
113             thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
114             thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
115             thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
116             thrust::counting_iterator<int>(fstart),
117             thrust::constant_iterator<int>(Mx),
118             thrust::constant_iterator<PetscScalar>(hx))),
119         thrust::make_zip_iterator(
120           thrust::make_tuple(
121             thrust::device_ptr<PetscScalar>(farray + lsize),
122             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
123             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
124             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
125             thrust::counting_iterator<int>(fstart) + lsize,
126             thrust::constant_iterator<int>(Mx),
127             thrust::constant_iterator<PetscScalar>(hx))),
128         ApplyStencil());
129     }
130     catch (char *all) {
131       ierr = PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");CHKERRQ(ierr);
132     }
133     ierr = VecCUDARestoreArrayRead(xlocal,&xarray);CHKERRQ(ierr);
134     ierr = VecCUDARestoreArrayWrite(f,&farray);CHKERRQ(ierr);
135   } else {
136     ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr);
137     ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr);
138     ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
139 
140     for (i=xs; i<xs+xm; i++) {
141       if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx;
142       else ff[i] =  (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]);
143     }
144     ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr);
145     ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr);
146   }
147   ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr);
148   //  VecView(x,0);printf("f\n");
149   //  VecView(f,0);
150   return 0;
151 
152 }
153 PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx)
154 {
155   DM             da = (DM) ctx;
156   PetscInt       i,Mx,xm,xs;
157   PetscScalar    hx,*xx;
158   Vec            xlocal;
159   PetscErrorCode ierr;
160 
161   ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr);
162   hx   = 1.0/(PetscReal)(Mx-1);
163   ierr = DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
164   ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
165   ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr);
166   ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);
167 
168   for (i=xs; i<xs+xm; i++) {
169     if (i == 0 || i == Mx-1) {
170       ierr = MatSetValue(J,i,i,1.0/hx,INSERT_VALUES);CHKERRQ(ierr);
171     } else {
172       ierr = MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES);CHKERRQ(ierr);
173       ierr = MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);CHKERRQ(ierr);
174       ierr = MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES);CHKERRQ(ierr);
175     }
176   }
177   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
178   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
179   ierr  = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr);
180   ierr  = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr);
181   return 0;
182 }
183 
184 
185 
186 /*TEST
187 
188    build:
189       requires: cuda
190 
191    test:
192       args: -snes_monitor_short -dm_vec_type cuda
193 
194 TEST*/
195