1 static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n";
2 /*
3 Same as ex47.c except it also uses the GPU to evaluate the function
4 */
5
6 #include <petscdm.h>
7 #include <petscdmda.h>
8 #include <petscsnes.h>
9
10 #include <thrust/device_ptr.h>
11 #include <thrust/for_each.h>
12 #include <thrust/tuple.h>
13 #include <thrust/iterator/constant_iterator.h>
14 #include <thrust/iterator/counting_iterator.h>
15 #include <thrust/iterator/zip_iterator.h>
16
17 extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *);
18 PetscBool useCUDA = PETSC_FALSE;
19
main(int argc,char ** argv)20 int main(int argc, char **argv)
21 {
22 SNES snes;
23 Vec x, f;
24 Mat J;
25 DM da;
26 char *tmp, typeName[256];
27 PetscBool flg;
28
29 PetscFunctionBeginUser;
30 PetscCall(PetscInitialize(&argc, &argv, NULL, help));
31 PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg));
32 if (flg) {
33 PetscCall(PetscStrstr(typeName, "cuda", &tmp));
34 if (tmp) useCUDA = PETSC_TRUE;
35 }
36
37 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da));
38 PetscCall(DMSetFromOptions(da));
39 PetscCall(DMSetUp(da));
40 PetscCall(DMCreateGlobalVector(da, &x));
41 PetscCall(VecDuplicate(x, &f));
42 PetscCall(DMCreateMatrix(da, &J));
43
44 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
45 PetscCall(SNESSetFunction(snes, f, ComputeFunction, da));
46 PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da));
47 PetscCall(SNESSetFromOptions(snes));
48 PetscCall(SNESSolve(snes, NULL, x));
49
50 PetscCall(MatDestroy(&J));
51 PetscCall(VecDestroy(&x));
52 PetscCall(VecDestroy(&f));
53 PetscCall(SNESDestroy(&snes));
54 PetscCall(DMDestroy(&da));
55
56 PetscCall(PetscFinalize());
57 return 0;
58 }
59
60 struct ApplyStencil {
61 template <typename Tuple>
operator ()ApplyStencil62 __host__ __device__ void operator()(Tuple t)
63 {
64 /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
65 thrust::get<0>(t) = 1;
66 if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) {
67 thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t));
68 } else if (thrust::get<4>(t) == 0) {
69 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
70 } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) {
71 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
72 }
73 }
74 };
75
ComputeFunction(SNES,Vec x,Vec f,PetscCtx ctx)76 PetscErrorCode ComputeFunction(SNES, Vec x, Vec f, PetscCtx ctx)
77 {
78 PetscInt i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize;
79 PetscScalar *xx, *ff, hx;
80 DM da = (DM)ctx;
81 Vec xlocal;
82 PetscMPIInt rank, size;
83 MPI_Comm comm;
84 PetscScalar const *xarray;
85 PetscScalar *farray;
86
87 PetscFunctionBeginUser;
88 PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
89 hx = 1.0 / (PetscReal)(Mx - 1);
90 PetscCall(DMGetLocalVector(da, &xlocal));
91 PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
92 PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
93
94 if (useCUDA) {
95 PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
96 PetscCallMPI(MPI_Comm_size(comm, &size));
97 PetscCallMPI(MPI_Comm_rank(comm, &rank));
98 PetscCall(VecCUDAGetArrayRead(xlocal, &xarray));
99 PetscCall(VecCUDAGetArrayWrite(f, &farray));
100 if (rank) xstartshift = 1;
101 else xstartshift = 0;
102 if (rank != size - 1) xendshift = 1;
103 else xendshift = 0;
104 PetscCall(VecGetOwnershipRange(f, &fstart, NULL));
105 PetscCall(VecGetLocalSize(x, &lsize));
106 // clang-format off
107 try {
108 thrust::for_each(
109 thrust::make_zip_iterator(
110 thrust::make_tuple(
111 thrust::device_ptr<PetscScalar>(farray),
112 thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
113 thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
114 thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
115 thrust::counting_iterator<int>(fstart),
116 thrust::constant_iterator<int>(Mx),
117 thrust::constant_iterator<PetscScalar>(hx))),
118 thrust::make_zip_iterator(
119 thrust::make_tuple(
120 thrust::device_ptr<PetscScalar>(farray + lsize),
121 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
122 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
123 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
124 thrust::counting_iterator<int>(fstart) + lsize,
125 thrust::constant_iterator<int>(Mx),
126 thrust::constant_iterator<PetscScalar>(hx))),
127 ApplyStencil());
128 }
129 // clang-format on
130 catch (char *all) {
131 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n"));
132 }
133 PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray));
134 PetscCall(VecCUDARestoreArrayWrite(f, &farray));
135 } else {
136 PetscCall(DMDAVecGetArray(da, xlocal, &xx));
137 PetscCall(DMDAVecGetArray(da, f, &ff));
138 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
139
140 for (i = xs; i < xs + xm; i++) {
141 if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx;
142 else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]);
143 }
144 PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
145 PetscCall(DMDAVecRestoreArray(da, f, &ff));
146 }
147 PetscCall(DMRestoreLocalVector(da, &xlocal));
148 PetscFunctionReturn(PETSC_SUCCESS);
149 }
ComputeJacobian(SNES,Vec x,Mat J,Mat,PetscCtx ctx)150 PetscErrorCode ComputeJacobian(SNES, Vec x, Mat J, Mat, PetscCtx ctx)
151 {
152 DM da = (DM)ctx;
153 PetscInt i, Mx, xm, xs;
154 PetscScalar hx, *xx;
155 Vec xlocal;
156
157 PetscFunctionBeginUser;
158 PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
159 hx = 1.0 / (PetscReal)(Mx - 1);
160 PetscCall(DMGetLocalVector(da, &xlocal));
161 PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
162 PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
163 PetscCall(DMDAVecGetArray(da, xlocal, &xx));
164 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
165
166 for (i = xs; i < xs + xm; i++) {
167 if (i == 0 || i == Mx - 1) {
168 PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES));
169 } else {
170 PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES));
171 PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES));
172 PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES));
173 }
174 }
175 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
176 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
177 PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
178 PetscCall(DMRestoreLocalVector(da, &xlocal));
179 PetscFunctionReturn(PETSC_SUCCESS);
180 }
181
182 /*TEST
183
184 build:
185 requires: cuda
186
187 testset:
188 args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
189 output_file: output/ex47cu_1.out
190 test:
191 suffix: 1
192 nsize: 1
193 test:
194 suffix: 2
195 nsize: 2
196
197 TEST*/
198