1c4762a1bSJed Brown static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n";
2c4762a1bSJed Brown /*
3c4762a1bSJed Brown Same as ex47.c except it also uses the GPU to evaluate the function
4c4762a1bSJed Brown */
5c4762a1bSJed Brown
6c4762a1bSJed Brown #include <petscdm.h>
7c4762a1bSJed Brown #include <petscdmda.h>
8c4762a1bSJed Brown #include <petscsnes.h>
9c4762a1bSJed Brown
10c4762a1bSJed Brown #include <thrust/device_ptr.h>
11c4762a1bSJed Brown #include <thrust/for_each.h>
12c4762a1bSJed Brown #include <thrust/tuple.h>
13c4762a1bSJed Brown #include <thrust/iterator/constant_iterator.h>
14c4762a1bSJed Brown #include <thrust/iterator/counting_iterator.h>
15c4762a1bSJed Brown #include <thrust/iterator/zip_iterator.h>
16c4762a1bSJed Brown
17c4762a1bSJed Brown extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *);
18c4762a1bSJed Brown PetscBool useCUDA = PETSC_FALSE;
19c4762a1bSJed Brown
main(int argc,char ** argv)20d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
21d71ae5a4SJacob Faibussowitsch {
22c4762a1bSJed Brown SNES snes;
23c4762a1bSJed Brown Vec x, f;
24c4762a1bSJed Brown Mat J;
25c4762a1bSJed Brown DM da;
26c4762a1bSJed Brown char *tmp, typeName[256];
27c4762a1bSJed Brown PetscBool flg;
28c4762a1bSJed Brown
29327415f7SBarry Smith PetscFunctionBeginUser;
30c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help));
319566063dSJacob Faibussowitsch PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg));
32c4762a1bSJed Brown if (flg) {
339566063dSJacob Faibussowitsch PetscCall(PetscStrstr(typeName, "cuda", &tmp));
34c4762a1bSJed Brown if (tmp) useCUDA = PETSC_TRUE;
35c4762a1bSJed Brown }
36c4762a1bSJed Brown
379566063dSJacob Faibussowitsch PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da));
389566063dSJacob Faibussowitsch PetscCall(DMSetFromOptions(da));
399566063dSJacob Faibussowitsch PetscCall(DMSetUp(da));
40e09e07f2SBarry Smith PetscCall(DMCreateGlobalVector(da, &x));
41e09e07f2SBarry Smith PetscCall(VecDuplicate(x, &f));
429566063dSJacob Faibussowitsch PetscCall(DMCreateMatrix(da, &J));
43c4762a1bSJed Brown
449566063dSJacob Faibussowitsch PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
459566063dSJacob Faibussowitsch PetscCall(SNESSetFunction(snes, f, ComputeFunction, da));
469566063dSJacob Faibussowitsch PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da));
479566063dSJacob Faibussowitsch PetscCall(SNESSetFromOptions(snes));
489566063dSJacob Faibussowitsch PetscCall(SNESSolve(snes, NULL, x));
49c4762a1bSJed Brown
509566063dSJacob Faibussowitsch PetscCall(MatDestroy(&J));
519566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x));
529566063dSJacob Faibussowitsch PetscCall(VecDestroy(&f));
539566063dSJacob Faibussowitsch PetscCall(SNESDestroy(&snes));
549566063dSJacob Faibussowitsch PetscCall(DMDestroy(&da));
55c4762a1bSJed Brown
569566063dSJacob Faibussowitsch PetscCall(PetscFinalize());
57b122ec5aSJacob Faibussowitsch return 0;
58c4762a1bSJed Brown }
59c4762a1bSJed Brown
609371c9d4SSatish Balay struct ApplyStencil {
61c4762a1bSJed Brown template <typename Tuple>
operator ()ApplyStencil62d71ae5a4SJacob Faibussowitsch __host__ __device__ void operator()(Tuple t)
63d71ae5a4SJacob Faibussowitsch {
64c4762a1bSJed Brown /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */
65c4762a1bSJed Brown thrust::get<0>(t) = 1;
66c4762a1bSJed Brown if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) {
6788e3df94SJunchao Zhang thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t));
68c4762a1bSJed Brown } else if (thrust::get<4>(t) == 0) {
69c4762a1bSJed Brown thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
70c4762a1bSJed Brown } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) {
71c4762a1bSJed Brown thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t));
72c4762a1bSJed Brown }
73c4762a1bSJed Brown }
74c4762a1bSJed Brown };
75c4762a1bSJed Brown
ComputeFunction(SNES,Vec x,Vec f,PetscCtx ctx)76*2a8381b2SBarry Smith PetscErrorCode ComputeFunction(SNES, Vec x, Vec f, PetscCtx ctx)
77d71ae5a4SJacob Faibussowitsch {
78c4762a1bSJed Brown PetscInt i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize;
79c4762a1bSJed Brown PetscScalar *xx, *ff, hx;
80c4762a1bSJed Brown DM da = (DM)ctx;
81c4762a1bSJed Brown Vec xlocal;
82c4762a1bSJed Brown PetscMPIInt rank, size;
83c4762a1bSJed Brown MPI_Comm comm;
84c4762a1bSJed Brown PetscScalar const *xarray;
85c4762a1bSJed Brown PetscScalar *farray;
86c4762a1bSJed Brown
873ba16761SJacob Faibussowitsch PetscFunctionBeginUser;
889566063dSJacob Faibussowitsch PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
89c4762a1bSJed Brown hx = 1.0 / (PetscReal)(Mx - 1);
909566063dSJacob Faibussowitsch PetscCall(DMGetLocalVector(da, &xlocal));
919566063dSJacob Faibussowitsch PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
929566063dSJacob Faibussowitsch PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
93c4762a1bSJed Brown
94c4762a1bSJed Brown if (useCUDA) {
959566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)da, &comm));
969566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
979566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
9806cf5b03SBarry Smith PetscCall(VecCUDAGetArrayRead(xlocal, &xarray));
9906cf5b03SBarry Smith PetscCall(VecCUDAGetArrayWrite(f, &farray));
100c4762a1bSJed Brown if (rank) xstartshift = 1;
101c4762a1bSJed Brown else xstartshift = 0;
102c4762a1bSJed Brown if (rank != size - 1) xendshift = 1;
103c4762a1bSJed Brown else xendshift = 0;
1049566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(f, &fstart, NULL));
1059566063dSJacob Faibussowitsch PetscCall(VecGetLocalSize(x, &lsize));
10611486bccSBarry Smith // clang-format off
107c4762a1bSJed Brown try {
108c4762a1bSJed Brown thrust::for_each(
109c4762a1bSJed Brown thrust::make_zip_iterator(
110c4762a1bSJed Brown thrust::make_tuple(
111c4762a1bSJed Brown thrust::device_ptr<PetscScalar>(farray),
112c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift),
113c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1),
114c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1),
115c4762a1bSJed Brown thrust::counting_iterator<int>(fstart),
116c4762a1bSJed Brown thrust::constant_iterator<int>(Mx),
117c4762a1bSJed Brown thrust::constant_iterator<PetscScalar>(hx))),
118c4762a1bSJed Brown thrust::make_zip_iterator(
119c4762a1bSJed Brown thrust::make_tuple(
120c4762a1bSJed Brown thrust::device_ptr<PetscScalar>(farray + lsize),
121c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift),
122c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1),
123c4762a1bSJed Brown thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1),
124c4762a1bSJed Brown thrust::counting_iterator<int>(fstart) + lsize,
125c4762a1bSJed Brown thrust::constant_iterator<int>(Mx),
126c4762a1bSJed Brown thrust::constant_iterator<PetscScalar>(hx))),
127c4762a1bSJed Brown ApplyStencil());
128c4762a1bSJed Brown }
12911486bccSBarry Smith // clang-format on
130c4762a1bSJed Brown catch (char *all) {
1319566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n"));
132c4762a1bSJed Brown }
1339566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray));
1349566063dSJacob Faibussowitsch PetscCall(VecCUDARestoreArrayWrite(f, &farray));
135c4762a1bSJed Brown } else {
1369566063dSJacob Faibussowitsch PetscCall(DMDAVecGetArray(da, xlocal, &xx));
1379566063dSJacob Faibussowitsch PetscCall(DMDAVecGetArray(da, f, &ff));
1389566063dSJacob Faibussowitsch PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
139c4762a1bSJed Brown
140c4762a1bSJed Brown for (i = xs; i < xs + xm; i++) {
141c4762a1bSJed Brown if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx;
142c4762a1bSJed Brown else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]);
143c4762a1bSJed Brown }
1449566063dSJacob Faibussowitsch PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
1459566063dSJacob Faibussowitsch PetscCall(DMDAVecRestoreArray(da, f, &ff));
146c4762a1bSJed Brown }
1479566063dSJacob Faibussowitsch PetscCall(DMRestoreLocalVector(da, &xlocal));
1483ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
149c4762a1bSJed Brown }
ComputeJacobian(SNES,Vec x,Mat J,Mat,PetscCtx ctx)150*2a8381b2SBarry Smith PetscErrorCode ComputeJacobian(SNES, Vec x, Mat J, Mat, PetscCtx ctx)
151d71ae5a4SJacob Faibussowitsch {
152c4762a1bSJed Brown DM da = (DM)ctx;
153c4762a1bSJed Brown PetscInt i, Mx, xm, xs;
154c4762a1bSJed Brown PetscScalar hx, *xx;
155c4762a1bSJed Brown Vec xlocal;
156c4762a1bSJed Brown
1573ba16761SJacob Faibussowitsch PetscFunctionBeginUser;
1589566063dSJacob Faibussowitsch PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE));
159c4762a1bSJed Brown hx = 1.0 / (PetscReal)(Mx - 1);
1609566063dSJacob Faibussowitsch PetscCall(DMGetLocalVector(da, &xlocal));
1619566063dSJacob Faibussowitsch PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal));
1629566063dSJacob Faibussowitsch PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal));
1639566063dSJacob Faibussowitsch PetscCall(DMDAVecGetArray(da, xlocal, &xx));
1649566063dSJacob Faibussowitsch PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL));
165c4762a1bSJed Brown
166c4762a1bSJed Brown for (i = xs; i < xs + xm; i++) {
167c4762a1bSJed Brown if (i == 0 || i == Mx - 1) {
1689566063dSJacob Faibussowitsch PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES));
169c4762a1bSJed Brown } else {
1709566063dSJacob Faibussowitsch PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES));
1719566063dSJacob Faibussowitsch PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES));
1729566063dSJacob Faibussowitsch PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES));
173c4762a1bSJed Brown }
174c4762a1bSJed Brown }
1759566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
1769566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
1779566063dSJacob Faibussowitsch PetscCall(DMDAVecRestoreArray(da, xlocal, &xx));
1789566063dSJacob Faibussowitsch PetscCall(DMRestoreLocalVector(da, &xlocal));
1793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
180c4762a1bSJed Brown }
181c4762a1bSJed Brown
182c4762a1bSJed Brown /*TEST
183c4762a1bSJed Brown
184c4762a1bSJed Brown build:
185c4762a1bSJed Brown requires: cuda
186c4762a1bSJed Brown
187499ce956SJunchao Zhang testset:
188499ce956SJunchao Zhang args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda
189499ce956SJunchao Zhang output_file: output/ex47cu_1.out
190c4762a1bSJed Brown test:
191499ce956SJunchao Zhang suffix: 1
192499ce956SJunchao Zhang nsize: 1
193499ce956SJunchao Zhang test:
194499ce956SJunchao Zhang suffix: 2
195499ce956SJunchao Zhang nsize: 2
196c4762a1bSJed Brown
197c4762a1bSJed Brown TEST*/
198