1 static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n"; 2 /* 3 Same as ex47.c except it also uses the GPU to evaluate the function 4 */ 5 6 #include <petscdm.h> 7 #include <petscdmda.h> 8 #include <petscsnes.h> 9 10 #include <thrust/device_ptr.h> 11 #include <thrust/for_each.h> 12 #include <thrust/tuple.h> 13 #include <thrust/iterator/constant_iterator.h> 14 #include <thrust/iterator/counting_iterator.h> 15 #include <thrust/iterator/zip_iterator.h> 16 17 extern PetscErrorCode ComputeFunction(SNES, Vec, Vec, void *), ComputeJacobian(SNES, Vec, Mat, Mat, void *); 18 PetscBool useCUDA = PETSC_FALSE; 19 20 int main(int argc, char **argv) 21 { 22 SNES snes; 23 Vec x, f; 24 Mat J; 25 DM da; 26 char *tmp, typeName[256]; 27 PetscBool flg; 28 29 PetscFunctionBeginUser; 30 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 31 PetscCall(PetscOptionsGetString(NULL, NULL, "-dm_vec_type", typeName, sizeof(typeName), &flg)); 32 if (flg) { 33 PetscCall(PetscStrstr(typeName, "cuda", &tmp)); 34 if (tmp) useCUDA = PETSC_TRUE; 35 } 36 37 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, 8, 1, 1, NULL, &da)); 38 PetscCall(DMSetFromOptions(da)); 39 PetscCall(DMSetUp(da)); 40 PetscCall(DMCreateGlobalVector(da, &x)); 41 PetscCall(VecDuplicate(x, &f)); 42 PetscCall(DMCreateMatrix(da, &J)); 43 44 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 45 PetscCall(SNESSetFunction(snes, f, ComputeFunction, da)); 46 PetscCall(SNESSetJacobian(snes, J, J, ComputeJacobian, da)); 47 PetscCall(SNESSetFromOptions(snes)); 48 PetscCall(SNESSolve(snes, NULL, x)); 49 50 PetscCall(MatDestroy(&J)); 51 PetscCall(VecDestroy(&x)); 52 PetscCall(VecDestroy(&f)); 53 PetscCall(SNESDestroy(&snes)); 54 PetscCall(DMDestroy(&da)); 55 56 PetscCall(PetscFinalize()); 57 return 0; 58 } 59 60 struct ApplyStencil { 61 template <typename Tuple> 62 __host__ __device__ void operator()(Tuple t) 63 { 64 /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */ 65 thrust::get<0>(t) = 1; 66 if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t) - 1)) { 67 thrust::get<0>(t) = (((PetscScalar)2.0) * thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t)) * exp(thrust::get<1>(t)); 68 } else if (thrust::get<4>(t) == 0) { 69 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 70 } else if (thrust::get<4>(t) == thrust::get<5>(t) - 1) { 71 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 72 } 73 } 74 }; 75 76 PetscErrorCode ComputeFunction(SNES snes, Vec x, Vec f, void *ctx) 77 { 78 PetscInt i, Mx, xs, xm, xstartshift, xendshift, fstart, lsize; 79 PetscScalar *xx, *ff, hx; 80 DM da = (DM)ctx; 81 Vec xlocal; 82 PetscMPIInt rank, size; 83 MPI_Comm comm; 84 PetscScalar const *xarray; 85 PetscScalar *farray; 86 87 PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE)); 88 hx = 1.0 / (PetscReal)(Mx - 1); 89 PetscCall(DMGetLocalVector(da, &xlocal)); 90 PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal)); 91 PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal)); 92 93 if (useCUDA) { 94 PetscCall(PetscObjectGetComm((PetscObject)da, &comm)); 95 PetscCallMPI(MPI_Comm_size(comm, &size)); 96 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 97 PetscCall(VecCUDAGetArrayRead(xlocal, &xarray)); 98 PetscCall(VecCUDAGetArrayWrite(f, &farray)); 99 if (rank) xstartshift = 1; 100 else xstartshift = 0; 101 if (rank != size - 1) xendshift = 1; 102 else xendshift = 0; 103 PetscCall(VecGetOwnershipRange(f, &fstart, NULL)); 104 PetscCall(VecGetLocalSize(x, &lsize)); 105 // clang-format off 106 try { 107 thrust::for_each( 108 thrust::make_zip_iterator( 109 thrust::make_tuple( 110 thrust::device_ptr<PetscScalar>(farray), 111 thrust::device_ptr<const PetscScalar>(xarray + xstartshift), 112 thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1), 113 thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1), 114 thrust::counting_iterator<int>(fstart), 115 thrust::constant_iterator<int>(Mx), 116 thrust::constant_iterator<PetscScalar>(hx))), 117 thrust::make_zip_iterator( 118 thrust::make_tuple( 119 thrust::device_ptr<PetscScalar>(farray + lsize), 120 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift), 121 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1), 122 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1), 123 thrust::counting_iterator<int>(fstart) + lsize, 124 thrust::constant_iterator<int>(Mx), 125 thrust::constant_iterator<PetscScalar>(hx))), 126 ApplyStencil()); 127 } 128 // clang-format on 129 catch (char *all) { 130 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n")); 131 } 132 PetscCall(VecCUDARestoreArrayRead(xlocal, &xarray)); 133 PetscCall(VecCUDARestoreArrayWrite(f, &farray)); 134 } else { 135 PetscCall(DMDAVecGetArray(da, xlocal, &xx)); 136 PetscCall(DMDAVecGetArray(da, f, &ff)); 137 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 138 139 for (i = xs; i < xs + xm; i++) { 140 if (i == 0 || i == Mx - 1) ff[i] = xx[i] / hx; 141 else ff[i] = (2.0 * xx[i] - xx[i - 1] - xx[i + 1]) / hx - hx * PetscExpScalar(xx[i]); 142 } 143 PetscCall(DMDAVecRestoreArray(da, xlocal, &xx)); 144 PetscCall(DMDAVecRestoreArray(da, f, &ff)); 145 } 146 PetscCall(DMRestoreLocalVector(da, &xlocal)); 147 return 0; 148 } 149 PetscErrorCode ComputeJacobian(SNES snes, Vec x, Mat J, Mat B, void *ctx) 150 { 151 DM da = (DM)ctx; 152 PetscInt i, Mx, xm, xs; 153 PetscScalar hx, *xx; 154 Vec xlocal; 155 156 PetscCall(DMDAGetInfo(da, PETSC_IGNORE, &Mx, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE, PETSC_IGNORE)); 157 hx = 1.0 / (PetscReal)(Mx - 1); 158 PetscCall(DMGetLocalVector(da, &xlocal)); 159 PetscCall(DMGlobalToLocalBegin(da, x, INSERT_VALUES, xlocal)); 160 PetscCall(DMGlobalToLocalEnd(da, x, INSERT_VALUES, xlocal)); 161 PetscCall(DMDAVecGetArray(da, xlocal, &xx)); 162 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 163 164 for (i = xs; i < xs + xm; i++) { 165 if (i == 0 || i == Mx - 1) { 166 PetscCall(MatSetValue(J, i, i, 1.0 / hx, INSERT_VALUES)); 167 } else { 168 PetscCall(MatSetValue(J, i, i - 1, -1.0 / hx, INSERT_VALUES)); 169 PetscCall(MatSetValue(J, i, i, 2.0 / hx - hx * PetscExpScalar(xx[i]), INSERT_VALUES)); 170 PetscCall(MatSetValue(J, i, i + 1, -1.0 / hx, INSERT_VALUES)); 171 } 172 } 173 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 174 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 175 PetscCall(DMDAVecRestoreArray(da, xlocal, &xx)); 176 PetscCall(DMRestoreLocalVector(da, &xlocal)); 177 return 0; 178 } 179 180 /*TEST 181 182 build: 183 requires: cuda 184 185 testset: 186 args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda 187 output_file: output/ex47cu_1.out 188 test: 189 suffix: 1 190 nsize: 1 191 test: 192 suffix: 2 193 nsize: 2 194 195 TEST*/ 196