1 static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n"; 2 /* 3 Same as ex47.c except it also uses the GPU to evaluate the function 4 */ 5 6 #include <petscdm.h> 7 #include <petscdmda.h> 8 #include <petscsnes.h> 9 10 #include <thrust/device_ptr.h> 11 #include <thrust/for_each.h> 12 #include <thrust/tuple.h> 13 #include <thrust/iterator/constant_iterator.h> 14 #include <thrust/iterator/counting_iterator.h> 15 #include <thrust/iterator/zip_iterator.h> 16 17 extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *); 18 PetscBool useCUDA = PETSC_FALSE; 19 20 int main(int argc, char **argv) 21 { 22 SNES snes; 23 Vec x, f; 24 Mat J; 25 DM da; 26 char *tmp, typeName[256]; 27 PetscBool flg; 28 29 PetscFunctionBeginUser; 30 PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 31 PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg)); 32 if (flg) { 33 PetscCall(PetscStrstr(typeName, "cuda", &tmp)); 34 if (tmp) useCUDA = PETSC_TRUE; 35 } 36 37 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da)); 38 PetscCall(DMSetFromOptions(da)); 39 PetscCall(DMSetUp(da)); 40 PetscCall(DMCreateGlobalVector(da, &x)); 41 PetscCall(VecDuplicate(x, &f)); 42 PetscCall(DMCreateMatrix(da, &J)); 43 44 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 45 PetscCall(SNESSetFunction(snes, f, ComputeFunction, da)); 46 PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da)); 47 PetscCall(SNESSetFromOptions(snes)); 48 PetscCall(SNESSolve(snes, NULL, x)); 49 50 PetscCall(MatDestroy(&J)); 51 PetscCall(VecDestroy(&x)); 52 PetscCall(VecDestroy(&f)); 53 PetscCall(SNESDestroy(&snes)); 54 PetscCall(DMDestroy(&da)); 55 56 PetscCall(PetscFinalize()); 57 return 0; 58 } 59 60 struct ApplyStencil { 61 template <typename Tuple> 62 __host__ __device__ void operator()(Tuple t) 63 { 64 /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */ 65 thrust::get<0>(t) = 1; 66 if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) { 67 thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t)); 68 } else if (thrust::get<4>(t) == 0) { 69 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 70 } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) { 71 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 72 } 73 } 74 }; 75 76 PetscErrorCode ComputeFunction(SNES, Vec x, Vec f, PetscCtx ctx) 77 { 78 PetscInt i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize; 79 PetscScalar *xx, *ff, hx; 80 DM da = (DM)ctx; 81 Vec xlocal; 82 PetscMPIInt rank, size; 83 MPI_Comm comm; 84 PetscScalar const *xarray; 85 PetscScalar *farray; 86 87 PetscFunctionBeginUser; 88 PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE)); 89 hx = 1.0 / (PetscReal)(Mx - 1); 90 PetscCall(DMGetLocalVector(da, &xlocal)); 91 PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal)); 92 PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal)); 93 94 if (useCUDA) { 95 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 96 PetscCallMPI(MPI_Comm_size(comm, &size)); 97 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 98 PetscCall(VecCUDAGetArrayRead(xlocal, &xarray)); 99 PetscCall(VecCUDAGetArrayWrite(f, &farray)); 100 if (rank) xstartshift = 1; 101 else xstartshift = 0; 102 if (rank != size - 1) xendshift = 1; 103 else xendshift = 0; 104 PetscCall(VecGetOwnershipRange(f, &fstart, NULL)); 105 PetscCall(VecGetLocalSize(x, &lsize)); 106 // clang-format off 107 try { 108 thrust::for_each( 109 thrust::make_zip_iterator( 110 thrust::make_tuple( 111 thrust::device_ptr<PetscScalar>(farray), 112 thrust::device_ptr<const PetscScalar>(xarray + xstartshift), 113 thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1), 114 thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1), 115 thrust::counting_iterator<int>(fstart), 116 thrust::constant_iterator<int>(Mx), 117 thrust::constant_iterator<PetscScalar>(hx))), 118 thrust::make_zip_iterator( 119 thrust::make_tuple( 120 thrust::device_ptr<PetscScalar>(farray + lsize), 121 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift), 122 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1), 123 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1), 124 thrust::counting_iterator<int>(fstart) + lsize, 125 thrust::constant_iterator<int>(Mx), 126 thrust::constant_iterator<PetscScalar>(hx))), 127 ApplyStencil()); 128 } 129 // clang-format on 130 catch (char *all) { 131 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n")); 132 } 133 PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray)); 134 PetscCall(VecCUDARestoreArrayWrite(f, &farray)); 135 } else { 136 PetscCall(DMDAVecGetArray(da, xlocal, &xx)); 137 PetscCall(DMDAVecGetArray(da, f, &ff)); 138 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 139 140 for (i = xs; i < xs + xm; i++) { 141 if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx; 142 else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]); 143 } 144 PetscCall(DMDAVecRestoreArray(da, xlocal, &xx)); 145 PetscCall(DMDAVecRestoreArray(da, f, &ff)); 146 } 147 PetscCall(DMRestoreLocalVector(da, &xlocal)); 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 PetscErrorCode ComputeJacobian(SNES, Vec x, Mat J, Mat, PetscCtx ctx) 151 { 152 DM da = (DM)ctx; 153 PetscInt i, Mx, xm, xs; 154 PetscScalar hx, *xx; 155 Vec xlocal; 156 157 PetscFunctionBeginUser; 158 PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE)); 159 hx = 1.0 / (PetscReal)(Mx - 1); 160 PetscCall(DMGetLocalVector(da, &xlocal)); 161 PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal)); 162 PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal)); 163 PetscCall(DMDAVecGetArray(da, xlocal, &xx)); 164 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 165 166 for (i = xs; i < xs + xm; i++) { 167 if (i == 0 || i == Mx - 1) { 168 PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES)); 169 } else { 170 PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES)); 171 PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES)); 172 PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES)); 173 } 174 } 175 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 176 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 177 PetscCall(DMDAVecRestoreArray(da, xlocal, &xx)); 178 PetscCall(DMRestoreLocalVector(da, &xlocal)); 179 PetscFunctionReturn(PETSC_SUCCESS); 180 } 181 182 /*TEST 183 184 build: 185 requires: cuda 186 187 testset: 188 args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda 189 output_file: output/ex47cu_1.out 190 test: 191 suffix: 1 192 nsize: 1 193 test: 194 suffix: 2 195 nsize: 2 196 197 TEST*/ 198