1 static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n"; 2 /* 3 Same as ex47.c except it also uses the GPU to evaluate the function 4 */ 5 6 #include <petscdm.h> 7 #include <petscdmda.h> 8 #include <petscsnes.h> 9 10 #include <thrust/device_ptr.h> 11 #include <thrust/for_each.h> 12 #include <thrust/tuple.h> 13 #include <thrust/iterator/constant_iterator.h> 14 #include <thrust/iterator/counting_iterator.h> 15 #include <thrust/iterator/zip_iterator.h> 16 17 extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *); 18 PetscBool useCUDA = PETSC_FALSE; 19 20 int main(int argc, char **argv) { 21 SNES snes; 22 Vec x, f; 23 Mat J; 24 DM da; 25 char *tmp, typeName[256]; 26 PetscBool flg; 27 28 PetscFunctionBeginUser; 29 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 30 PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg)); 31 if (flg) { 32 PetscCall(PetscStrstr(typeName, "cuda", &tmp)); 33 if (tmp) useCUDA = PETSC_TRUE; 34 } 35 36 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da)); 37 PetscCall(DMSetFromOptions(da)); 38 PetscCall(DMSetUp(da)); 39 PetscCall(DMCreateGlobalVector(da, &x)); 40 PetscCall(VecDuplicate(x, &f)); 41 PetscCall(DMCreateMatrix(da, &J)); 42 43 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 44 PetscCall(SNESSetFunction(snes, f, ComputeFunction, da)); 45 PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da)); 46 PetscCall(SNESSetFromOptions(snes)); 47 PetscCall(SNESSolve(snes, NULL, x)); 48 49 PetscCall(MatDestroy(&J)); 50 PetscCall(VecDestroy(&x)); 51 PetscCall(VecDestroy(&f)); 52 PetscCall(SNESDestroy(&snes)); 53 PetscCall(DMDestroy(&da)); 54 55 PetscCall(PetscFinalize()); 56 return 0; 57 } 58 59 struct ApplyStencil { 60 template <typename Tuple> 61 __host__ __device__ void operator()(Tuple t) { 62 /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */ 63 thrust::get<0>(t) = 1; 64 if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) { 65 thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t)); 66 } else if (thrust::get<4>(t) == 0) { 67 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 68 } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) { 69 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 70 } 71 } 72 }; 73 74 PetscErrorCode ComputeFunction(SNES snes, Vec x, Vec f, void *ctx) { 75 PetscInt i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize; 76 PetscScalar *xx, *ff, hx; 77 DM da = (DM)ctx; 78 Vec xlocal; 79 PetscMPIInt rank, size; 80 MPI_Comm comm; 81 PetscScalar const *xarray; 82 PetscScalar *farray; 83 84 PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE)); 85 hx = 1.0 / (PetscReal)(Mx - 1); 86 PetscCall(DMGetLocalVector(da, &xlocal)); 87 PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal)); 88 PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal)); 89 90 if (useCUDA) { 91 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 92 PetscCallMPI(MPI_Comm_size(comm, &size)); 93 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 94 PetscCall(VecCUDAGetArrayRead(xlocal, &xarray)); 95 PetscCall(VecCUDAGetArrayWrite(f, &farray)); 96 if (rank) xstartshift = 1; 97 else xstartshift = 0; 98 if (rank != size - 1) xendshift = 1; 99 else xendshift = 0; 100 PetscCall(VecGetOwnershipRange(f, &fstart, NULL)); 101 PetscCall(VecGetLocalSize(x, &lsize)); 102 // clang-format off 103 try { 104 thrust::for_each( 105 thrust::make_zip_iterator( 106 thrust::make_tuple( 107 thrust::device_ptr<PetscScalar>(farray), 108 thrust::device_ptr<const PetscScalar>(xarray + xstartshift), 109 thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1), 110 thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1), 111 thrust::counting_iterator<int>(fstart), 112 thrust::constant_iterator<int>(Mx), 113 thrust::constant_iterator<PetscScalar>(hx))), 114 thrust::make_zip_iterator( 115 thrust::make_tuple( 116 thrust::device_ptr<PetscScalar>(farray + lsize), 117 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift), 118 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1), 119 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1), 120 thrust::counting_iterator<int>(fstart) + lsize, 121 thrust::constant_iterator<int>(Mx), 122 thrust::constant_iterator<PetscScalar>(hx))), 123 ApplyStencil()); 124 } 125 // clang-format on 126 catch (char *all) { 127 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n")); 128 } 129 PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray)); 130 PetscCall(VecCUDARestoreArrayWrite(f, &farray)); 131 } else { 132 PetscCall(DMDAVecGetArray(da, xlocal, &xx)); 133 PetscCall(DMDAVecGetArray(da, f, &ff)); 134 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 135 136 for (i = xs; i < xs + xm; i++) { 137 if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx; 138 else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]); 139 } 140 PetscCall(DMDAVecRestoreArray(da, xlocal, &xx)); 141 PetscCall(DMDAVecRestoreArray(da, f, &ff)); 142 } 143 PetscCall(DMRestoreLocalVector(da, &xlocal)); 144 return 0; 145 } 146 PetscErrorCode ComputeJacobian(SNES snes, Vec x, Mat J, Mat B, void *ctx) { 147 DM da = (DM)ctx; 148 PetscInt i, Mx, xm, xs; 149 PetscScalar hx, *xx; 150 Vec xlocal; 151 152 PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE)); 153 hx = 1.0 / (PetscReal)(Mx - 1); 154 PetscCall(DMGetLocalVector(da, &xlocal)); 155 PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal)); 156 PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal)); 157 PetscCall(DMDAVecGetArray(da, xlocal, &xx)); 158 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 159 160 for (i = xs; i < xs + xm; i++) { 161 if (i == 0 || i == Mx - 1) { 162 PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES)); 163 } else { 164 PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES)); 165 PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES)); 166 PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES)); 167 } 168 } 169 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 170 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 171 PetscCall(DMDAVecRestoreArray(da, xlocal, &xx)); 172 PetscCall(DMRestoreLocalVector(da, &xlocal)); 173 return 0; 174 } 175 176 /*TEST 177 178 build: 179 requires: cuda 180 181 testset: 182 args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda 183 output_file: output/ex47cu_1.out 184 test: 185 suffix: 1 186 nsize: 1 187 test: 188 suffix: 2 189 nsize: 2 190 191 TEST*/ 192