1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand-side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) 23 { 24 PetscErrorCode ierr; 25 PetscScalar pfive = .50; 26 27 PetscFunctionBeginUser; 28 ierr = VecSet(x,pfive);CHKERRQ(ierr); 29 PetscFunctionReturn(0); 30 } 31 32 /* ------------------------------------------------------------------- */ 33 /* 34 CpuFunction - Evaluates nonlinear function, F(x) on CPU 35 36 Input Parameters: 37 . snes - the SNES context 38 . x - input vector 39 . ctx - optional user-defined context, as set by SNESSetFunction() 40 41 Output Parameter: 42 . r - function vector 43 44 Note: 45 The user-defined context can contain any application-specific 46 data needed for the function evaluation. 47 */ 48 PetscErrorCode CpuFunction(SNES snes,Vec x,Vec r,void *ctx) 49 { 50 ApplicationCtx *user = (ApplicationCtx*) ctx; 51 DM da = user->da; 52 PetscScalar *X,*R,*F,d; 53 PetscErrorCode ierr; 54 PetscInt i,M,xs,xm; 55 Vec xl; 56 57 PetscFunctionBeginUser; 58 ierr = DMGetLocalVector(da,&xl);CHKERRQ(ierr); 59 ierr = DMGlobalToLocal(da,x,INSERT_VALUES,xl);CHKERRQ(ierr); 60 ierr = DMDAVecGetArray(da,xl,&X);CHKERRQ(ierr); 61 ierr = DMDAVecGetArray(da,r,&R);CHKERRQ(ierr); 62 ierr = DMDAVecGetArray(da,user->F,&F);CHKERRQ(ierr); 63 64 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 65 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 66 67 if (xs == 0) { /* left boundary */ 68 R[0] = X[0]; 69 xs++;xm--; 70 } 71 if (xs+xm == M) { /* right boundary */ 72 R[xs+xm-1] = X[xs+xm-1] - 1.0; 73 xm--; 74 } 75 d = 1.0/(user->h*user->h); 76 for (i=xs; i<xs+xm; i++) R[i] = d*(X[i-1] - 2.0*X[i] + X[i+1]) + X[i]*X[i] - F[i]; 77 78 ierr = DMDAVecRestoreArray(da,xl,&X);CHKERRQ(ierr); 79 ierr = DMDAVecRestoreArray(da,r,&R);CHKERRQ(ierr); 80 ierr = DMDAVecRestoreArray(da,user->F,&F);CHKERRQ(ierr); 81 ierr = DMRestoreLocalVector(da,&xl);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 86 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 87 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar*,DefaultMemorySpace>; 88 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar*,DefaultMemorySpace>; 89 90 PetscErrorCode KokkosFunction(SNES snes,Vec x,Vec r,void *ctx) 91 { 92 PetscErrorCode ierr; 93 ApplicationCtx *user = (ApplicationCtx*) ctx; 94 DM da = user->da; 95 PetscScalar d; 96 PetscInt M; 97 Vec xl; 98 PetscScalarKokkosOffsetView R; 99 ConstPetscScalarKokkosOffsetView X,F; 100 101 PetscFunctionBeginUser; 102 ierr = DMGetLocalVector(da,&xl);CHKERRQ(ierr); 103 ierr = DMGlobalToLocal(da,x,INSERT_VALUES,xl);CHKERRQ(ierr); 104 d = 1.0/(user->h*user->h); 105 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 106 ierr = DMDAVecGetKokkosOffsetView(da,xl,&X);CHKERRQ(ierr); /* read only */ 107 ierr = DMDAVecGetKokkosOffsetViewWrite(da,r,&R);CHKERRQ(ierr); /* write only */ 108 ierr = DMDAVecGetKokkosOffsetView(da,user->F,&F);CHKERRQ(ierr); /* read only */ 109 Kokkos:: parallel_for (Kokkos::RangePolicy<>(R.begin(0),R.end(0)),KOKKOS_LAMBDA (int i) { 110 if (i == 0) R(0) = X(0); /* left boundary */ 111 else if (i == M-1) R(i) = X(i) - 1.0; /* right boundary */ 112 else R(i) = d*(X(i-1) - 2.0*X(i) + X(i+1)) + X(i)*X(i) - F(i); /* interior */ 113 }); 114 ierr = DMDAVecRestoreKokkosOffsetView(da,xl,&X);CHKERRQ(ierr); 115 ierr = DMDAVecRestoreKokkosOffsetViewWrite(da,r,&R);CHKERRQ(ierr); 116 ierr = DMDAVecRestoreKokkosOffsetView(da,user->F,&F);CHKERRQ(ierr); 117 ierr = DMRestoreLocalVector(da,&xl);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode StubFunction(SNES snes ,Vec x,Vec r,void *ctx) 122 { 123 PetscErrorCode ierr; 124 ApplicationCtx *user = (ApplicationCtx*) ctx; 125 DM da = user->da; 126 Vec rk; 127 PetscReal norm=0; 128 129 PetscFunctionBeginUser; 130 ierr = DMGetGlobalVector(da,&rk);CHKERRQ(ierr); 131 ierr = CpuFunction(snes,x,r,ctx);CHKERRQ(ierr); 132 ierr = KokkosFunction(snes,x,rk,ctx);CHKERRQ(ierr); 133 ierr = VecAXPY(rk,-1.0,r);CHKERRQ(ierr); 134 ierr = VecNorm(rk,NORM_2,&norm);CHKERRQ(ierr); 135 ierr = DMRestoreGlobalVector(da,&rk);CHKERRQ(ierr); 136 if (norm > 1e-6) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"KokkosFunction() different from CpuFunction() with a diff norm = %g\n",norm); 137 PetscFunctionReturn(0); 138 } 139 /* ------------------------------------------------------------------- */ 140 /* 141 FormJacobian - Evaluates Jacobian matrix. 142 143 Input Parameters: 144 . snes - the SNES context 145 . x - input vector 146 . dummy - optional user-defined context (not used here) 147 148 Output Parameters: 149 . jac - Jacobian matrix 150 . B - optionally different preconditioning matrix 151 . flag - flag indicating matrix structure 152 */ 153 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 154 { 155 ApplicationCtx *user = (ApplicationCtx*) ctx; 156 PetscScalar *xx,d,A[3]; 157 PetscErrorCode ierr; 158 PetscInt i,j[3],M,xs,xm; 159 DM da = user->da; 160 161 PetscFunctionBeginUser; 162 /* 163 Get pointer to vector data 164 */ 165 ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr); 166 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 167 168 /* 169 Get range of locally owned matrix 170 */ 171 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 172 173 /* 174 Determine starting and ending local indices for interior grid points. 175 Set Jacobian entries for boundary points. 176 */ 177 178 if (xs == 0) { /* left boundary */ 179 i = 0; A[0] = 1.0; 180 181 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 182 xs++;xm--; 183 } 184 if (xs+xm == M) { /* right boundary */ 185 i = M-1; 186 A[0] = 1.0; 187 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 188 xm--; 189 } 190 191 /* 192 Interior grid points 193 - Note that in this case we set all elements for a particular 194 row at once. 195 */ 196 d = 1.0/(user->h*user->h); 197 for (i=xs; i<xs+xm; i++) { 198 j[0] = i - 1; j[1] = i; j[2] = i + 1; 199 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 200 ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr); 201 } 202 203 /* 204 Assemble matrix, using the 2-step process: 205 MatAssemblyBegin(), MatAssemblyEnd(). 206 By placing code between these two statements, computations can be 207 done while messages are in transition. 208 209 Also, restore vector. 210 */ 211 212 ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 213 ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr); 214 ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 215 216 PetscFunctionReturn(0); 217 } 218 219 int main(int argc,char **argv) 220 { 221 SNES snes; /* SNES context */ 222 Mat J; /* Jacobian matrix */ 223 ApplicationCtx ctx; /* user-defined context */ 224 Vec x,r,U,F; /* vectors */ 225 PetscScalar none = -1.0; 226 PetscErrorCode ierr; 227 PetscInt its,N = 5,maxit,maxf; 228 PetscReal abstol,rtol,stol,norm; 229 PetscBool viewinitial = PETSC_FALSE; 230 231 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 232 ierr = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr); 233 ctx.h = 1.0/(N-1); 234 ierr = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr); 235 236 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 237 Create nonlinear solver context 238 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 239 ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); 240 241 /* 242 Create distributed array (DMDA) to manage parallel grid and vectors 243 */ 244 ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr); 245 ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr); 246 ierr = DMSetUp(ctx.da);CHKERRQ(ierr); 247 248 /* 249 Extract global and local vectors from DMDA; then duplicate for remaining 250 vectors that are the same types 251 */ 252 ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr); 253 ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr); 254 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 255 ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F; 256 ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr); 257 ierr = VecDuplicate(x,&U);CHKERRQ(ierr); 258 ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr); 259 260 /* 261 Set function evaluation routine and vector. Whenever the nonlinear 262 solver needs to compute the nonlinear function, it will call this 263 routine. 264 - Note that the final routine argument is the user-defined 265 context that provides application-specific data for the 266 function evaluation routine. 267 268 At the begining, one can use a stub function that checks the Kokkos version 269 against the CPU version to quickly expose errors. 270 ierr = SNESSetFunction(snes,r,StubFunction,&ctx);CHKERRQ(ierr); 271 */ 272 ierr = SNESSetFunction(snes,r,KokkosFunction,&ctx);CHKERRQ(ierr); 273 274 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 275 Create matrix data structure; set Jacobian evaluation routine 276 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 277 ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr); 278 279 /* 280 Set Jacobian matrix data structure and default Jacobian evaluation 281 routine. Whenever the nonlinear solver needs to compute the 282 Jacobian matrix, it will call this routine. 283 - Note that the final routine argument is the user-defined 284 context that provides application-specific data for the 285 Jacobian evaluation routine. 286 */ 287 ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr); 288 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 289 ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr); 290 ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr); 291 292 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 293 Initialize application: 294 Store forcing function of PDE and exact solution 295 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 296 { 297 PetscScalarKokkosOffsetView FF,UU; 298 ierr = DMDAVecGetKokkosOffsetViewWrite(ctx.da,F,&FF);CHKERRQ(ierr); 299 ierr = DMDAVecGetKokkosOffsetViewWrite(ctx.da,U,&UU);CHKERRQ(ierr); 300 Kokkos:: parallel_for (Kokkos::RangePolicy<>(FF.begin(0),FF.end(0)),KOKKOS_LAMBDA (int i) { 301 PetscReal xp = i*ctx.h; 302 FF(i) = 6.0*xp + pow(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 303 UU(i) = xp*xp*xp; 304 }); 305 ierr = DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,F,&FF);CHKERRQ(ierr); 306 ierr = DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,U,&UU);CHKERRQ(ierr); 307 } 308 309 if (viewinitial) { 310 ierr = VecView(U,NULL);CHKERRQ(ierr); 311 ierr = VecView(F,NULL);CHKERRQ(ierr); 312 } 313 314 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 315 Evaluate initial guess; then solve nonlinear system 316 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 317 318 /* 319 Note: The user should initialize the vector, x, with the initial guess 320 for the nonlinear solver prior to calling SNESSolve(). In particular, 321 to employ an initial guess of zero, the user should explicitly set 322 this vector to zero by calling VecSet(). 323 */ 324 ierr = FormInitialGuess(x);CHKERRQ(ierr); 325 ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 326 ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); 327 ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr); 328 329 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 330 Check solution and clean up 331 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 332 /* 333 Check the error 334 */ 335 ierr = VecAXPY(x,none,U);CHKERRQ(ierr); 336 ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); 337 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr); 338 339 /* 340 Free work space. All PETSc objects should be destroyed when they 341 are no longer needed. 342 */ 343 ierr = VecDestroy(&x);CHKERRQ(ierr); 344 ierr = VecDestroy(&r);CHKERRQ(ierr); 345 ierr = VecDestroy(&U);CHKERRQ(ierr); 346 ierr = VecDestroy(&F);CHKERRQ(ierr); 347 ierr = MatDestroy(&J);CHKERRQ(ierr); 348 ierr = SNESDestroy(&snes);CHKERRQ(ierr); 349 ierr = DMDestroy(&ctx.da);CHKERRQ(ierr); 350 ierr = PetscFinalize(); 351 return ierr; 352 } 353 354 /*TEST 355 356 build: 357 requires: kokkos_kernels 358 359 test: 360 requires: kokkos_kernels !complex !single cuda 361 nsize: 2 362 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 363 output_file: output/ex3k_1.out 364 365 TEST*/ 366