1 static char help[] = "Solves -Laplacian u - exp(u) = 0, 0 < x < 1 using GPU\n\n"; 2 /* 3 Same as ex47.c except it also uses the GPU to evaluate the function 4 */ 5 6 #include <petscdm.h> 7 #include <petscdmda.h> 8 #include <petscsnes.h> 9 10 #include <thrust/device_ptr.h> 11 #include <thrust/for_each.h> 12 #include <thrust/tuple.h> 13 #include <thrust/iterator/constant_iterator.h> 14 #include <thrust/iterator/counting_iterator.h> 15 #include <thrust/iterator/zip_iterator.h> 16 17 extern PetscErrorCode ComputeFunction(SNES,Vec,Vec,void*), ComputeJacobian(SNES,Vec,Mat,Mat,void*); 18 PetscBool useCUDA = PETSC_FALSE; 19 20 int main(int argc,char **argv) 21 { 22 SNES snes; 23 Vec x,f; 24 Mat J; 25 DM da; 26 PetscErrorCode ierr; 27 char *tmp,typeName[256]; 28 PetscBool flg; 29 30 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 31 ierr = PetscOptionsGetString(NULL,NULL,"-dm_vec_type",typeName,256,&flg);CHKERRQ(ierr); 32 if (flg) { 33 ierr = PetscStrstr(typeName,"cuda",&tmp);CHKERRQ(ierr); 34 if (tmp) useCUDA = PETSC_TRUE; 35 } 36 37 ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,8,1,1,NULL,&da);CHKERRQ(ierr); 38 ierr = DMSetFromOptions(da);CHKERRQ(ierr); 39 ierr = DMSetUp(da);CHKERRQ(ierr); 40 ierr = DMCreateGlobalVector(da,&x); VecDuplicate(x,&f);CHKERRQ(ierr); 41 ierr = DMSetMatType(da,MATAIJ);CHKERRQ(ierr); 42 ierr = DMCreateMatrix(da,&J);CHKERRQ(ierr); 43 44 ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); 45 ierr = SNESSetFunction(snes,f,ComputeFunction,da);CHKERRQ(ierr); 46 ierr = SNESSetJacobian(snes,J,J,ComputeJacobian,da);CHKERRQ(ierr); 47 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 48 ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 49 50 ierr = MatDestroy(&J);CHKERRQ(ierr); 51 ierr = VecDestroy(&x);CHKERRQ(ierr); 52 ierr = VecDestroy(&f);CHKERRQ(ierr); 53 ierr = SNESDestroy(&snes);CHKERRQ(ierr); 54 ierr = DMDestroy(&da);CHKERRQ(ierr); 55 56 ierr = PetscFinalize(); 57 return ierr; 58 } 59 60 struct ApplyStencil 61 { 62 template <typename Tuple> 63 __host__ __device__ 64 void operator()(Tuple t) 65 { 66 /* f = (2*x_i - x_(i+1) - x_(i-1))/h - h*exp(x_i) */ 67 thrust::get<0>(t) = 1; 68 if ((thrust::get<4>(t) > 0) && (thrust::get<4>(t) < thrust::get<5>(t)-1)) { 69 thrust::get<0>(t) = (2.0*thrust::get<1>(t) - thrust::get<2>(t) - thrust::get<3>(t)) / (thrust::get<6>(t)) - (thrust::get<6>(t))*exp(thrust::get<1>(t)); 70 } else if (thrust::get<4>(t) == 0) { 71 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 72 } else if (thrust::get<4>(t) == thrust::get<5>(t)-1) { 73 thrust::get<0>(t) = thrust::get<1>(t) / (thrust::get<6>(t)); 74 } 75 } 76 }; 77 78 PetscErrorCode ComputeFunction(SNES snes,Vec x,Vec f,void *ctx) 79 { 80 PetscInt i,Mx,xs,xm,xstartshift,xendshift,fstart,lsize; 81 PetscScalar *xx,*ff,hx; 82 DM da = (DM) ctx; 83 Vec xlocal; 84 PetscErrorCode ierr; 85 PetscMPIInt rank,size; 86 MPI_Comm comm; 87 PetscScalar const *xarray; 88 PetscScalar *farray; 89 90 ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr); 91 hx = 1.0/(PetscReal)(Mx-1); 92 ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr); 93 ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 94 ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 95 96 if (useCUDA) { 97 ierr = VecCUDAGetArrayRead(xlocal,&xarray);CHKERRQ(ierr); 98 ierr = VecCUDAGetArrayWrite(f,&farray);CHKERRQ(ierr); 99 ierr = PetscObjectGetComm((PetscObject)da,&comm);CHKERRQ(ierr); 100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 101 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 102 if (rank) xstartshift = 1; 103 else xstartshift = 0; 104 if (rank != size-1) xendshift = 1; 105 else xendshift = 0; 106 ierr = VecGetOwnershipRange(f,&fstart,NULL);CHKERRQ(ierr); 107 ierr = VecGetLocalSize(x,&lsize);CHKERRQ(ierr); 108 try { 109 thrust::for_each( 110 thrust::make_zip_iterator( 111 thrust::make_tuple( 112 thrust::device_ptr<PetscScalar>(farray), 113 thrust::device_ptr<const PetscScalar>(xarray + xstartshift), 114 thrust::device_ptr<const PetscScalar>(xarray + xstartshift + 1), 115 thrust::device_ptr<const PetscScalar>(xarray + xstartshift - 1), 116 thrust::counting_iterator<int>(fstart), 117 thrust::constant_iterator<int>(Mx), 118 thrust::constant_iterator<PetscScalar>(hx))), 119 thrust::make_zip_iterator( 120 thrust::make_tuple( 121 thrust::device_ptr<PetscScalar>(farray + lsize), 122 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift), 123 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift + 1), 124 thrust::device_ptr<const PetscScalar>(xarray + lsize - xendshift - 1), 125 thrust::counting_iterator<int>(fstart) + lsize, 126 thrust::constant_iterator<int>(Mx), 127 thrust::constant_iterator<PetscScalar>(hx))), 128 ApplyStencil()); 129 } 130 catch (char *all) { 131 ierr = PetscPrintf(PETSC_COMM_WORLD, "Thrust is not working\n");CHKERRQ(ierr); 132 } 133 ierr = VecCUDARestoreArrayRead(xlocal,&xarray);CHKERRQ(ierr); 134 ierr = VecCUDARestoreArrayWrite(f,&farray);CHKERRQ(ierr); 135 } else { 136 ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr); 137 ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr); 138 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 139 140 for (i=xs; i<xs+xm; i++) { 141 if (i == 0 || i == Mx-1) ff[i] = xx[i]/hx; 142 else ff[i] = (2.0*xx[i] - xx[i-1] - xx[i+1])/hx - hx*PetscExpScalar(xx[i]); 143 } 144 ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr); 145 ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr); 146 } 147 ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr); 148 // VecView(x,0);printf("f\n"); 149 // VecView(f,0); 150 return 0; 151 152 } 153 PetscErrorCode ComputeJacobian(SNES snes,Vec x,Mat J,Mat B,void *ctx) 154 { 155 DM da = (DM) ctx; 156 PetscInt i,Mx,xm,xs; 157 PetscScalar hx,*xx; 158 Vec xlocal; 159 PetscErrorCode ierr; 160 161 ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr); 162 hx = 1.0/(PetscReal)(Mx-1); 163 ierr = DMGetLocalVector(da,&xlocal);DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 164 ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 165 ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr); 166 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 167 168 for (i=xs; i<xs+xm; i++) { 169 if (i == 0 || i == Mx-1) { 170 ierr = MatSetValue(J,i,i,1.0/hx,INSERT_VALUES);CHKERRQ(ierr); 171 } else { 172 ierr = MatSetValue(J,i,i-1,-1.0/hx,INSERT_VALUES);CHKERRQ(ierr); 173 ierr = MatSetValue(J,i,i,2.0/hx - hx*PetscExpScalar(xx[i]),INSERT_VALUES);CHKERRQ(ierr); 174 ierr = MatSetValue(J,i,i+1,-1.0/hx,INSERT_VALUES);CHKERRQ(ierr); 175 } 176 } 177 ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 178 ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr); 180 ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr); 181 return 0; 182 } 183 184 185 186 /*TEST 187 188 build: 189 requires: cuda 190 191 test: 192 args: -snes_monitor_short -dm_vec_type cuda 193 194 TEST*/ 195