1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand-side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) 23 { 24 PetscErrorCode ierr; 25 PetscScalar pfive = .50; 26 27 PetscFunctionBeginUser; 28 ierr = VecSet(x,pfive);CHKERRQ(ierr); 29 PetscFunctionReturn(0); 30 } 31 32 /* ------------------------------------------------------------------- */ 33 /* 34 CpuFunction - Evaluates nonlinear function, F(x) on CPU 35 36 Input Parameters: 37 . snes - the SNES context 38 . x - input vector 39 . ctx - optional user-defined context, as set by SNESSetFunction() 40 41 Output Parameter: 42 . r - function vector 43 44 Note: 45 The user-defined context can contain any application-specific 46 data needed for the function evaluation. 47 */ 48 PetscErrorCode CpuFunction(SNES snes,Vec x,Vec r,void *ctx) 49 { 50 ApplicationCtx *user = (ApplicationCtx*) ctx; 51 DM da = user->da; 52 PetscScalar *X,*R,*F,d; 53 PetscErrorCode ierr; 54 PetscInt i,M,xs,xm; 55 Vec xl; 56 57 PetscFunctionBeginUser; 58 ierr = DMGetLocalVector(da,&xl);CHKERRQ(ierr); 59 ierr = DMGlobalToLocal(da,x,INSERT_VALUES,xl);CHKERRQ(ierr); 60 ierr = DMDAVecGetArray(da,xl,&X);CHKERRQ(ierr); 61 ierr = DMDAVecGetArray(da,r,&R);CHKERRQ(ierr); 62 ierr = DMDAVecGetArray(da,user->F,&F);CHKERRQ(ierr); 63 64 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 65 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 66 67 if (xs == 0) { /* left boundary */ 68 R[0] = X[0]; 69 xs++;xm--; 70 } 71 if (xs+xm == M) { /* right boundary */ 72 R[xs+xm-1] = X[xs+xm-1] - 1.0; 73 xm--; 74 } 75 d = 1.0/(user->h*user->h); 76 for (i=xs; i<xs+xm; i++) R[i] = d*(X[i-1] - 2.0*X[i] + X[i+1]) + X[i]*X[i] - F[i]; 77 78 ierr = DMDAVecRestoreArray(da,xl,&X);CHKERRQ(ierr); 79 ierr = DMDAVecRestoreArray(da,r,&R);CHKERRQ(ierr); 80 ierr = DMDAVecRestoreArray(da,user->F,&F);CHKERRQ(ierr); 81 ierr = DMRestoreLocalVector(da,&xl);CHKERRQ(ierr); 82 PetscFunctionReturn(0); 83 } 84 85 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 86 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 87 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar*,DefaultMemorySpace>; 88 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar*,DefaultMemorySpace>; 89 90 PetscErrorCode KokkosFunction(SNES snes,Vec x,Vec r,void *ctx) 91 { 92 PetscErrorCode ierr; 93 ApplicationCtx *user = (ApplicationCtx*) ctx; 94 DM da = user->da; 95 PetscScalar d; 96 PetscInt M; 97 Vec xl; 98 PetscScalarKokkosOffsetView R; 99 ConstPetscScalarKokkosOffsetView X,F; 100 101 PetscFunctionBeginUser; 102 ierr = DMGetLocalVector(da,&xl);CHKERRQ(ierr); 103 ierr = DMGlobalToLocal(da,x,INSERT_VALUES,xl);CHKERRQ(ierr); 104 d = 1.0/(user->h*user->h); 105 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 106 ierr = DMDAVecGetKokkosOffsetView(da,xl,&X);CHKERRQ(ierr); /* read only */ 107 ierr = DMDAVecGetKokkosOffsetViewWrite(da,r,&R);CHKERRQ(ierr); /* write only */ 108 ierr = DMDAVecGetKokkosOffsetView(da,user->F,&F);CHKERRQ(ierr); /* read only */ 109 Kokkos:: parallel_for (Kokkos::RangePolicy<>(R.begin(0),R.end(0)),KOKKOS_LAMBDA (int i) { 110 if (i == 0) R(0) = X(0); /* left boundary */ 111 else if (i == M-1) R(i) = X(i) - 1.0; /* right boundary */ 112 else R(i) = d*(X(i-1) - 2.0*X(i) + X(i+1)) + X(i)*X(i) - F(i); /* interior */ 113 }); 114 ierr = DMDAVecRestoreKokkosOffsetView(da,xl,&X);CHKERRQ(ierr); 115 ierr = DMDAVecRestoreKokkosOffsetViewWrite(da,r,&R);CHKERRQ(ierr); 116 ierr = DMDAVecRestoreKokkosOffsetView(da,user->F,&F);CHKERRQ(ierr); 117 ierr = DMRestoreLocalVector(da,&xl);CHKERRQ(ierr); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode StubFunction(SNES snes ,Vec x,Vec r,void *ctx) 122 { 123 PetscErrorCode ierr; 124 ApplicationCtx *user = (ApplicationCtx*) ctx; 125 DM da = user->da; 126 Vec rk; 127 PetscReal norm=0; 128 129 PetscFunctionBeginUser; 130 ierr = DMGetGlobalVector(da,&rk);CHKERRQ(ierr); 131 ierr = CpuFunction(snes,x,r,ctx);CHKERRQ(ierr); 132 ierr = KokkosFunction(snes,x,rk,ctx);CHKERRQ(ierr); 133 ierr = VecAXPY(rk,-1.0,r);CHKERRQ(ierr); 134 ierr = VecNorm(rk,NORM_2,&norm);CHKERRQ(ierr); 135 ierr = DMRestoreGlobalVector(da,&rk);CHKERRQ(ierr); 136 if (norm > 1e-6) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"KokkosFunction() different from CpuFunction() with a diff norm = %g\n",norm); 137 PetscFunctionReturn(0); 138 } 139 /* ------------------------------------------------------------------- */ 140 /* 141 FormJacobian - Evaluates Jacobian matrix. 142 143 Input Parameters: 144 . snes - the SNES context 145 . x - input vector 146 . dummy - optional user-defined context (not used here) 147 148 Output Parameters: 149 . jac - Jacobian matrix 150 . B - optionally different preconditioning matrix 151 . flag - flag indicating matrix structure 152 */ 153 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 154 { 155 ApplicationCtx *user = (ApplicationCtx*) ctx; 156 PetscScalar *xx,d,A[3]; 157 PetscErrorCode ierr; 158 PetscInt i,j[3],M,xs,xm; 159 DM da = user->da; 160 161 PetscFunctionBeginUser; 162 /* 163 Get pointer to vector data 164 */ 165 ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr); 166 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 167 168 /* 169 Get range of locally owned matrix 170 */ 171 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 172 173 /* 174 Determine starting and ending local indices for interior grid points. 175 Set Jacobian entries for boundary points. 176 */ 177 178 if (xs == 0) { /* left boundary */ 179 i = 0; A[0] = 1.0; 180 181 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 182 xs++;xm--; 183 } 184 if (xs+xm == M) { /* right boundary */ 185 i = M-1; 186 A[0] = 1.0; 187 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 188 xm--; 189 } 190 191 /* 192 Interior grid points 193 - Note that in this case we set all elements for a particular 194 row at once. 195 */ 196 d = 1.0/(user->h*user->h); 197 for (i=xs; i<xs+xm; i++) { 198 j[0] = i - 1; j[1] = i; j[2] = i + 1; 199 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 200 ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr); 201 } 202 203 /* 204 Assemble matrix, using the 2-step process: 205 MatAssemblyBegin(), MatAssemblyEnd(). 206 By placing code between these two statements, computations can be 207 done while messages are in transition. 208 209 Also, restore vector. 210 */ 211 212 ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 213 ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr); 214 ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 215 216 PetscFunctionReturn(0); 217 } 218 219 int main(int argc,char **argv) 220 { 221 SNES snes; /* SNES context */ 222 Mat J; /* Jacobian matrix */ 223 ApplicationCtx ctx; /* user-defined context */ 224 Vec x,r,U,F; /* vectors */ 225 PetscScalar none = -1.0; 226 PetscErrorCode ierr; 227 PetscInt its,N = 5,maxit,maxf; 228 PetscReal abstol,rtol,stol,norm; 229 PetscBool viewinitial = PETSC_FALSE; 230 PetscScalarKokkosOffsetView FF,UU; 231 232 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 233 ierr = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr); 234 ctx.h = 1.0/(N-1); 235 ierr = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr); 236 237 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 238 Create nonlinear solver context 239 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 240 ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); 241 242 /* 243 Create distributed array (DMDA) to manage parallel grid and vectors 244 */ 245 ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr); 246 ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr); 247 ierr = DMSetUp(ctx.da);CHKERRQ(ierr); 248 249 /* 250 Extract global and local vectors from DMDA; then duplicate for remaining 251 vectors that are the same types 252 */ 253 ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr); 254 ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr); 255 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 256 ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F; 257 ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr); 258 ierr = VecDuplicate(x,&U);CHKERRQ(ierr); 259 ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr); 260 261 /* 262 Set function evaluation routine and vector. Whenever the nonlinear 263 solver needs to compute the nonlinear function, it will call this 264 routine. 265 - Note that the final routine argument is the user-defined 266 context that provides application-specific data for the 267 function evaluation routine. 268 269 At the begining, one can use a stub function that checks the Kokkos version 270 against the CPU version to quickly expose errors. 271 ierr = SNESSetFunction(snes,r,StubFunction,&ctx);CHKERRQ(ierr); 272 */ 273 ierr = SNESSetFunction(snes,r,KokkosFunction,&ctx);CHKERRQ(ierr); 274 275 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 276 Create matrix data structure; set Jacobian evaluation routine 277 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 278 ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr); 279 280 /* 281 Set Jacobian matrix data structure and default Jacobian evaluation 282 routine. Whenever the nonlinear solver needs to compute the 283 Jacobian matrix, it will call this routine. 284 - Note that the final routine argument is the user-defined 285 context that provides application-specific data for the 286 Jacobian evaluation routine. 287 */ 288 ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr); 289 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 290 ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr); 291 ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr); 292 293 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 294 Initialize application: 295 Store forcing function of PDE and exact solution 296 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 297 ierr = DMDAVecGetKokkosOffsetViewWrite(ctx.da,F,&FF);CHKERRQ(ierr); 298 ierr = DMDAVecGetKokkosOffsetViewWrite(ctx.da,U,&UU);CHKERRQ(ierr); 299 Kokkos:: parallel_for (Kokkos::RangePolicy<>(FF.begin(0),FF.end(0)),KOKKOS_LAMBDA (int i) { 300 PetscReal xp = i*ctx.h; 301 FF(i) = 6.0*xp + pow(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 302 UU(i) = xp*xp*xp; 303 }); 304 ierr = DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,F,&FF);CHKERRQ(ierr); 305 ierr = DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,U,&UU);CHKERRQ(ierr); 306 307 if (viewinitial) { 308 ierr = VecView(U,NULL);CHKERRQ(ierr); 309 ierr = VecView(F,NULL);CHKERRQ(ierr); 310 } 311 312 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 313 Evaluate initial guess; then solve nonlinear system 314 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 315 316 /* 317 Note: The user should initialize the vector, x, with the initial guess 318 for the nonlinear solver prior to calling SNESSolve(). In particular, 319 to employ an initial guess of zero, the user should explicitly set 320 this vector to zero by calling VecSet(). 321 */ 322 ierr = FormInitialGuess(x);CHKERRQ(ierr); 323 ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 324 ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); 325 ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr); 326 327 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 328 Check solution and clean up 329 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 330 /* 331 Check the error 332 */ 333 ierr = VecAXPY(x,none,U);CHKERRQ(ierr); 334 ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); 335 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr); 336 337 /* 338 Free work space. All PETSc objects should be destroyed when they 339 are no longer needed. 340 */ 341 ierr = VecDestroy(&x);CHKERRQ(ierr); 342 ierr = VecDestroy(&r);CHKERRQ(ierr); 343 ierr = VecDestroy(&U);CHKERRQ(ierr); 344 ierr = VecDestroy(&F);CHKERRQ(ierr); 345 ierr = MatDestroy(&J);CHKERRQ(ierr); 346 ierr = SNESDestroy(&snes);CHKERRQ(ierr); 347 ierr = DMDestroy(&ctx.da);CHKERRQ(ierr); 348 ierr = PetscFinalize(); 349 return ierr; 350 } 351 352 /*TEST 353 354 build: 355 requires: kokkos_kernels 356 357 test: 358 requires: kokkos_kernels !complex !single cuda 359 nsize: 2 360 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 361 output_file: output/ex3k_1.out 362 363 TEST*/