xref: /petsc/src/snes/tutorials/ex47cu.cu (revision d5b43468fb8780a8feea140ccd6fa3e6a50411cc)
1 static char help[] = "Solves -Laplacian u - exp(u) = 0,  0 < x < 1 using GPU\n\n";
2 /*
3    Same as ex47.c except it also uses the GPU to evaluate the function
4 */
5 
6 #include <petscdm.h>
7 #include <petscdmda.h>
8 #include <petscsnes.h>
9 
10 #include <thrust/device_ptr.h>
11 #include <thrust/for_each.h>
12 #include <thrust/tuple.h>
13 #include <thrust/iterator/constant_iterator.h>
14 #include <thrust/iterator/counting_iterator.h>
15 #include <thrust/iterator/zip_iterator.h>
16 
17 extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *);
18 PetscBool             useCUDA = PETSC_FALSE;
19 
20 int main(int argc, char **argv)
21 {
22   SNES      snes;
23   Vec       x, f;
24   Mat       J;
25   DM        da;
26   char     *tmp, typeName[256];
27   PetscBool flg;
28 
29   PetscFunctionBeginUser;
30   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
31   PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg));
32   if (flg) {
33     PetscCall(PetscStrstr(typeName, "cuda", &tmp));
34     if (tmp) useCUDA = PETSC_TRUE;
35   }
36 
37   PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da));
38   PetscCall(DMSetFromOptions(da));
39   PetscCall(DMSetUp(da));
40   PetscCall(DMCreateGlobalVector(da, &x));
41   PetscCall(VecDuplicate(x, &f));
42   PetscCall(DMCreateMatrix(da, &J));
43 
44   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
45   PetscCall(SNESSetFunction(snes, f, ComputeFunction, da));
46   PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da));
47   PetscCall(SNESSetFromOptions(snes));
48   PetscCall(SNESSolve(snes, NULL, x));
49 
50   PetscCall(MatDestroy(&J));
51   PetscCall(VecDestroy(&x));
52   PetscCall(VecDestroy(&f));
53   PetscCall(SNESDestroy(&snes));
54   PetscCall(DMDestroy(&da));
55 
56   PetscCall(PetscFinalize());
57   return 0;
58 }
59 
60 struct ApplyStencil {
61   template <typename Tuple>
62   __host__ __device__ void operator()(Tuple t)
63   {
64     /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
65     thrust::get<0>(t) = 1;
66     if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) {
67       thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t));
68     } else if (thrust::get<4>(t) == 0) {
69       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
70     } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) {
71       thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
72     }
73   }
74 };
75 
76 PetscErrorCode ComputeFunction(SNES snes, Vec x, Vec f, void *ctx)
77 {
78   PetscInt           i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize;
79   PetscScalar       *xx, *ff, hx;
80   DM                 da = (DM)ctx;
81   Vec                xlocal;
82   PetscMPIInt        rank, size;
83   MPI_Comm           comm;
84   PetscScalar const *xarray;
85   PetscScalar       *farray;
86 
87   PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
88   hx = 1.0 / (PetscReal)(Mx - 1);
89   PetscCall(DMGetLocalVector(da, &xlocal));
90   PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
91   PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
92 
93   if (useCUDA) {
94     PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
95     PetscCallMPI(MPI_Comm_size(comm, &size));
96     PetscCallMPI(MPI_Comm_rank(comm, &rank));
97     PetscCall(VecCUDAGetArrayRead(xlocal, &xarray));
98     PetscCall(VecCUDAGetArrayWrite(f, &farray));
99     if (rank) xstartshift = 1;
100     else xstartshift = 0;
101     if (rank != size - 1) xendshift = 1;
102     else xendshift = 0;
103     PetscCall(VecGetOwnershipRange(f, &fstart, NULL));
104     PetscCall(VecGetLocalSize(x, &lsize));
105     // clang-format off
106     try {
107       thrust::for_each(
108         thrust::make_zip_iterator(
109           thrust::make_tuple(
110             thrust::device_ptr<PetscScalar>(farray),
111             thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
112             thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
113             thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
114             thrust::counting_iterator<int>(fstart),
115             thrust::constant_iterator<int>(Mx),
116             thrust::constant_iterator<PetscScalar>(hx))),
117         thrust::make_zip_iterator(
118           thrust::make_tuple(
119             thrust::device_ptr<PetscScalar>(farray + lsize),
120             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
121             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
122             thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
123             thrust::counting_iterator<int>(fstart) + lsize,
124             thrust::constant_iterator<int>(Mx),
125             thrust::constant_iterator<PetscScalar>(hx))),
126         ApplyStencil());
127     }
128     // clang-format on
129     catch (char *all) {
130       PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n"));
131     }
132     PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray));
133     PetscCall(VecCUDARestoreArrayWrite(f, &farray));
134   } else {
135     PetscCall(DMDAVecGetArray(da, xlocal, &xx));
136     PetscCall(DMDAVecGetArray(da, f, &ff));
137     PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
138 
139     for (i = xs; i < xs + xm; i++) {
140       if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx;
141       else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]);
142     }
143     PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
144     PetscCall(DMDAVecRestoreArray(da, f, &ff));
145   }
146   PetscCall(DMRestoreLocalVector(da, &xlocal));
147   return 0;
148 }
149 PetscErrorCode ComputeJacobian(SNES snes, Vec x, Mat J, Mat B, void *ctx)
150 {
151   DM          da = (DM)ctx;
152   PetscInt    i, Mx, xm, xs;
153   PetscScalar hx, *xx;
154   Vec         xlocal;
155 
156   PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
157   hx = 1.0 / (PetscReal)(Mx - 1);
158   PetscCall(DMGetLocalVector(da, &xlocal));
159   PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
160   PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
161   PetscCall(DMDAVecGetArray(da, xlocal, &xx));
162   PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
163 
164   for (i = xs; i < xs + xm; i++) {
165     if (i == 0 || i == Mx - 1) {
166       PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES));
167     } else {
168       PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES));
169       PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES));
170       PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES));
171     }
172   }
173   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
174   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
175   PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
176   PetscCall(DMRestoreLocalVector(da, &xlocal));
177   return 0;
178 }
179 
180 /*TEST
181 
182    build:
183       requires: cuda
184 
185    testset:
186       args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
187       output_file: output/ex47cu_1.out
188       test:
189         suffix: 1
190         nsize:  1
191       test:
192         suffix: 2
193         nsize:  2
194 
195 TEST*/
196