1 static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n"; 2 /* 3 Same as ex47.c except it also uses the GPU to evaluate the function 4 */ 5 6 #include <petscdm.h> 7 #include <petscdmda.h> 8 #include <petscsnes.h> 9 10 #include <thrust/device_ptr.h> 11 #include <thrust/for_each.h> 12 #include <thrust/tuple.h> 13 #include <thrust/iterator/constant_iterator.h> 14 #include <thrust/iterator/counting_iterator.h> 15 #include <thrust/iterator/zip_iterator.h> 16 17 extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*); 18 PetscBool useCUDA = PETSC_FALSE; 19 20 int main(int argc,char **argv) 21 { 22 SNES snes; 23 Vec x,f; 24 Mat J; 25 DM da; 26 char *tmp,typeName[256]; 27 PetscBool flg; 28 29 PetscCall(PetscInitialize(&argc,&argv,(char*)0,help)); 30 PetscCall(PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,sizeof(typeName),&flg)); 31 if (flg) { 32 PetscCall(PetscStrstr(typeName,"cuda",&tmp)); 33 if (tmp) useCUDA = PETSC_TRUE; 34 } 35 36 PetscCall(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da)); 37 PetscCall(DMSetFromOptions(da)); 38 PetscCall(DMSetUp(da)); 39 PetscCall(DMCreateGlobalVector(da,&x); VecDuplicate(x,&f)); 40 PetscCall(DMCreateMatrix(da,&J)); 41 42 PetscCall(SNESCreate(PETSC_COMM_WORLD,&snes)); 43 PetscCall(SNESSetFunction(snes,f,ComputeFunction,da)); 44 PetscCall(SNESSetJacobian(snes,J,J,ComputeJacobian,da)); 45 PetscCall(SNESSetFromOptions(snes)); 46 PetscCall(SNESSolve(snes,NULL,x)); 47 48 PetscCall(MatDestroy(&J)); 49 PetscCall(VecDestroy(&x)); 50 PetscCall(VecDestroy(&f)); 51 PetscCall(SNESDestroy(&snes)); 52 PetscCall(DMDestroy(&da)); 53 54 PetscCall(PetscFinalize()); 55 return 0; 56 } 57 58 struct ApplyStencil 59 { 60 template <typename Tuple> 61 __host__ __device__ 62 void operator()(Tuple t) 63 { 64 /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */ 65 thrust::get<0>(t) = 1; 66 if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) { 67 thrust::get<0>(t) = (((PetscScalar)2.0)*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t)); 68 } else if (thrust::get<4>(t) == 0) { 69 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 70 } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) { 71 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 72 } 73 } 74 }; 75 76 PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx) 77 { 78 PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize; 79 PetscScalar *xx,*ff,hx; 80 DM da = (DM) ctx; 81 Vec xlocal; 82 PetscMPIInt rank,size; 83 MPI_Comm comm; 84 PetscScalar const *xarray; 85 PetscScalar *farray; 86 87 PetscCall(DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE)); 88 hx = 1.0/(PetscReal)(Mx-1); 89 PetscCall(DMGetLocalVector(da,&xlocal)); 90 PetscCall(DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal)); 91 PetscCall(DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal)); 92 93 if (useCUDA) { 94 PetscCall(VecCUDAGetArrayRead(xlocal,&xarray)); 95 PetscCall(VecCUDAGetArrayWrite(f,&farray)); 96 PetscCall(PetscObjectGetComm((PetscObject)da,&comm)); 97 PetscCallMPI(MPI_Comm_size(comm,&size)); 98 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 99 if (rank) xstartshift = 1; 100 else xstartshift = 0; 101 if (rank != size-1) xendshift = 1; 102 else xendshift = 0; 103 PetscCall(VecGetOwnershipRange(f,&fstart,NULL)); 104 PetscCall(VecGetLocalSize(x,&lsize)); 105 try { 106 thrust::for_each( 107 thrust::make_zip_iterator( 108 thrust::make_tuple( 109 thrust::device_ptr<PetscScalar>(farray), 110 thrust::device_ptr<const PetscScalar>(xarray + xstartshift), 111 thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1), 112 thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1), 113 thrust::counting_iterator<int>(fstart), 114 thrust::constant_iterator<int>(Mx), 115 thrust::constant_iterator<PetscScalar>(hx))), 116 thrust::make_zip_iterator( 117 thrust::make_tuple( 118 thrust::device_ptr<PetscScalar>(farray + lsize), 119 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift), 120 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1), 121 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1), 122 thrust::counting_iterator<int>(fstart) + lsize, 123 thrust::constant_iterator<int>(Mx), 124 thrust::constant_iterator<PetscScalar>(hx))), 125 ApplyStencil()); 126 } 127 catch (char *all) { 128 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n")); 129 } 130 PetscCall(VecCUDARestoreArrayRead(xlocal,&xarray)); 131 PetscCall(VecCUDARestoreArrayWrite(f,&farray)); 132 } else { 133 PetscCall(DMDAVecGetArray(da,xlocal,&xx)); 134 PetscCall(DMDAVecGetArray(da,f,&ff)); 135 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 136 137 for (i=xs; i<xs+xm; i++) { 138 if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx; 139 else ff[i] = (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]); 140 } 141 PetscCall(DMDAVecRestoreArray(da,xlocal,&xx)); 142 PetscCall(DMDAVecRestoreArray(da,f,&ff)); 143 } 144 PetscCall(DMRestoreLocalVector(da,&xlocal)); 145 return 0; 146 147 } 148 PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx) 149 { 150 DM da = (DM) ctx; 151 PetscInt i,Mx,xm,xs; 152 PetscScalar hx,*xx; 153 Vec xlocal; 154 155 PetscCall(DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE)); 156 hx = 1.0/(PetscReal)(Mx-1); 157 PetscCall(DMGetLocalVector(da,&xlocal)); 158 PetscCall(DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal)); 159 PetscCall(DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal)); 160 PetscCall(DMDAVecGetArray(da,xlocal,&xx)); 161 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 162 163 for (i=xs; i<xs+xm; i++) { 164 if (i == 0 || i == Mx-1) { 165 PetscCall(MatSetValue(J,i,i,1.0/hx,INSERT_VALUES)); 166 } else { 167 PetscCall(MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES)); 168 PetscCall(MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES)); 169 PetscCall(MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES)); 170 } 171 } 172 PetscCall(MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY)); 173 PetscCall(MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY)); 174 PetscCall(DMDAVecRestoreArray(da,xlocal,&xx)); 175 PetscCall(DMRestoreLocalVector(da,&xlocal)); 176 return 0; 177 } 178 179 /*TEST 180 181 build: 182 requires: cuda 183 184 testset: 185 args: -snes_monitor_short -dm_mat_type aijcusparse -dm_vec_type cuda 186 output_file: output/ex47cu_1.out 187 test: 188 suffix: 1 189 nsize: 1 190 test: 191 suffix: 2 192 nsize: 2 193 194 TEST*/ 195