1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand-side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) 23 { 24 PetscScalar pfive = .50; 25 26 PetscFunctionBeginUser; 27 CHKERRQ(VecSet(x,pfive)); 28 PetscFunctionReturn(0); 29 } 30 31 /* ------------------------------------------------------------------- */ 32 /* 33 CpuFunction - Evaluates nonlinear function, F(x) on CPU 34 35 Input Parameters: 36 . snes - the SNES context 37 . x - input vector 38 . ctx - optional user-defined context, as set by SNESSetFunction() 39 40 Output Parameter: 41 . r - function vector 42 43 Note: 44 The user-defined context can contain any application-specific 45 data needed for the function evaluation. 46 */ 47 PetscErrorCode CpuFunction(SNES snes,Vec x,Vec r,void *ctx) 48 { 49 ApplicationCtx *user = (ApplicationCtx*) ctx; 50 DM da = user->da; 51 PetscScalar *X,*R,*F,d; 52 PetscInt i,M,xs,xm; 53 Vec xl; 54 55 PetscFunctionBeginUser; 56 CHKERRQ(DMGetLocalVector(da,&xl)); 57 CHKERRQ(DMGlobalToLocal(da,x,INSERT_VALUES,xl)); 58 CHKERRQ(DMDAVecGetArray(da,xl,&X)); 59 CHKERRQ(DMDAVecGetArray(da,r,&R)); 60 CHKERRQ(DMDAVecGetArray(da,user->F,&F)); 61 62 CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 63 CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 64 65 if (xs == 0) { /* left boundary */ 66 R[0] = X[0]; 67 xs++;xm--; 68 } 69 if (xs+xm == M) { /* right boundary */ 70 R[xs+xm-1] = X[xs+xm-1] - 1.0; 71 xm--; 72 } 73 d = 1.0/(user->h*user->h); 74 for (i=xs; i<xs+xm; i++) R[i] = d*(X[i-1] - 2.0*X[i] + X[i+1]) + X[i]*X[i] - F[i]; 75 76 CHKERRQ(DMDAVecRestoreArray(da,xl,&X)); 77 CHKERRQ(DMDAVecRestoreArray(da,r,&R)); 78 CHKERRQ(DMDAVecRestoreArray(da,user->F,&F)); 79 CHKERRQ(DMRestoreLocalVector(da,&xl)); 80 PetscFunctionReturn(0); 81 } 82 83 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 84 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 85 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar*,DefaultMemorySpace>; 86 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar*,DefaultMemorySpace>; 87 88 PetscErrorCode KokkosFunction(SNES snes,Vec x,Vec r,void *ctx) 89 { 90 ApplicationCtx *user = (ApplicationCtx*) ctx; 91 DM da = user->da; 92 PetscScalar d; 93 PetscInt M; 94 Vec xl; 95 PetscScalarKokkosOffsetView R; 96 ConstPetscScalarKokkosOffsetView X,F; 97 98 PetscFunctionBeginUser; 99 CHKERRQ(DMGetLocalVector(da,&xl)); 100 CHKERRQ(DMGlobalToLocal(da,x,INSERT_VALUES,xl)); 101 d = 1.0/(user->h*user->h); 102 CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 103 CHKERRQ(DMDAVecGetKokkosOffsetView(da,xl,&X)); /* read only */ 104 CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(da,r,&R)); /* write only */ 105 CHKERRQ(DMDAVecGetKokkosOffsetView(da,user->F,&F)); /* read only */ 106 Kokkos:: parallel_for (Kokkos::RangePolicy<>(R.begin(0),R.end(0)),KOKKOS_LAMBDA (int i) { 107 if (i == 0) R(0) = X(0); /* left boundary */ 108 else if (i == M-1) R(i) = X(i) - 1.0; /* right boundary */ 109 else R(i) = d*(X(i-1) - 2.0*X(i) + X(i+1)) + X(i)*X(i) - F(i); /* interior */ 110 }); 111 CHKERRQ(DMDAVecRestoreKokkosOffsetView(da,xl,&X)); 112 CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(da,r,&R)); 113 CHKERRQ(DMDAVecRestoreKokkosOffsetView(da,user->F,&F)); 114 CHKERRQ(DMRestoreLocalVector(da,&xl)); 115 PetscFunctionReturn(0); 116 } 117 118 PetscErrorCode StubFunction(SNES snes ,Vec x,Vec r,void *ctx) 119 { 120 ApplicationCtx *user = (ApplicationCtx*) ctx; 121 DM da = user->da; 122 Vec rk; 123 PetscReal norm=0; 124 125 PetscFunctionBeginUser; 126 CHKERRQ(DMGetGlobalVector(da,&rk)); 127 CHKERRQ(CpuFunction(snes,x,r,ctx)); 128 CHKERRQ(KokkosFunction(snes,x,rk,ctx)); 129 CHKERRQ(VecAXPY(rk,-1.0,r)); 130 CHKERRQ(VecNorm(rk,NORM_2,&norm)); 131 CHKERRQ(DMRestoreGlobalVector(da,&rk)); 132 PetscCheckFalse(norm > 1e-6,PETSC_COMM_SELF,PETSC_ERR_PLIB,"KokkosFunction() different from CpuFunction() with a diff norm = %g",norm); 133 PetscFunctionReturn(0); 134 } 135 /* ------------------------------------------------------------------- */ 136 /* 137 FormJacobian - Evaluates Jacobian matrix. 138 139 Input Parameters: 140 . snes - the SNES context 141 . x - input vector 142 . dummy - optional user-defined context (not used here) 143 144 Output Parameters: 145 . jac - Jacobian matrix 146 . B - optionally different preconditioning matrix 147 . flag - flag indicating matrix structure 148 */ 149 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 150 { 151 ApplicationCtx *user = (ApplicationCtx*) ctx; 152 PetscScalar *xx,d,A[3]; 153 PetscInt i,j[3],M,xs,xm; 154 DM da = user->da; 155 156 PetscFunctionBeginUser; 157 /* 158 Get pointer to vector data 159 */ 160 CHKERRQ(DMDAVecGetArrayRead(da,x,&xx)); 161 CHKERRQ(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 162 163 /* 164 Get range of locally owned matrix 165 */ 166 CHKERRQ(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 167 168 /* 169 Determine starting and ending local indices for interior grid points. 170 Set Jacobian entries for boundary points. 171 */ 172 173 if (xs == 0) { /* left boundary */ 174 i = 0; A[0] = 1.0; 175 176 CHKERRQ(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES)); 177 xs++;xm--; 178 } 179 if (xs+xm == M) { /* right boundary */ 180 i = M-1; 181 A[0] = 1.0; 182 CHKERRQ(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES)); 183 xm--; 184 } 185 186 /* 187 Interior grid points 188 - Note that in this case we set all elements for a particular 189 row at once. 190 */ 191 d = 1.0/(user->h*user->h); 192 for (i=xs; i<xs+xm; i++) { 193 j[0] = i - 1; j[1] = i; j[2] = i + 1; 194 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 195 CHKERRQ(MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES)); 196 } 197 198 /* 199 Assemble matrix, using the 2-step process: 200 MatAssemblyBegin(), MatAssemblyEnd(). 201 By placing code between these two statements, computations can be 202 done while messages are in transition. 203 204 Also, restore vector. 205 */ 206 207 CHKERRQ(MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY)); 208 CHKERRQ(DMDAVecRestoreArrayRead(da,x,&xx)); 209 CHKERRQ(MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY)); 210 211 PetscFunctionReturn(0); 212 } 213 214 int main(int argc,char **argv) 215 { 216 SNES snes; /* SNES context */ 217 Mat J; /* Jacobian matrix */ 218 ApplicationCtx ctx; /* user-defined context */ 219 Vec x,r,U,F; /* vectors */ 220 PetscScalar none = -1.0; 221 PetscErrorCode ierr; 222 PetscInt its,N = 5,maxit,maxf; 223 PetscReal abstol,rtol,stol,norm; 224 PetscBool viewinitial = PETSC_FALSE; 225 226 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 227 CHKERRQ(PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL)); 228 ctx.h = 1.0/(N-1); 229 CHKERRQ(PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL)); 230 231 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 232 Create nonlinear solver context 233 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 234 CHKERRQ(SNESCreate(PETSC_COMM_WORLD,&snes)); 235 236 /* 237 Create distributed array (DMDA) to manage parallel grid and vectors 238 */ 239 CHKERRQ(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da)); 240 CHKERRQ(DMSetFromOptions(ctx.da)); 241 CHKERRQ(DMSetUp(ctx.da)); 242 243 /* 244 Extract global and local vectors from DMDA; then duplicate for remaining 245 vectors that are the same types 246 */ 247 CHKERRQ(DMCreateGlobalVector(ctx.da,&x)); 248 CHKERRQ(PetscObjectSetName((PetscObject)x,"Approximate Solution")); 249 CHKERRQ(VecDuplicate(x,&r)); 250 CHKERRQ(VecDuplicate(x,&F)); ctx.F = F; 251 CHKERRQ(PetscObjectSetName((PetscObject)F,"Forcing function")); 252 CHKERRQ(VecDuplicate(x,&U)); 253 CHKERRQ(PetscObjectSetName((PetscObject)U,"Exact Solution")); 254 255 /* 256 Set function evaluation routine and vector. Whenever the nonlinear 257 solver needs to compute the nonlinear function, it will call this 258 routine. 259 - Note that the final routine argument is the user-defined 260 context that provides application-specific data for the 261 function evaluation routine. 262 263 At the beginning, one can use a stub function that checks the Kokkos version 264 against the CPU version to quickly expose errors. 265 CHKERRQ(SNESSetFunction(snes,r,StubFunction,&ctx)); 266 */ 267 CHKERRQ(SNESSetFunction(snes,r,KokkosFunction,&ctx)); 268 269 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 270 Create matrix data structure; set Jacobian evaluation routine 271 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 272 CHKERRQ(DMCreateMatrix(ctx.da,&J)); 273 274 /* 275 Set Jacobian matrix data structure and default Jacobian evaluation 276 routine. Whenever the nonlinear solver needs to compute the 277 Jacobian matrix, it will call this routine. 278 - Note that the final routine argument is the user-defined 279 context that provides application-specific data for the 280 Jacobian evaluation routine. 281 */ 282 CHKERRQ(SNESSetJacobian(snes,J,J,FormJacobian,&ctx)); 283 CHKERRQ(SNESSetFromOptions(snes)); 284 CHKERRQ(SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf)); 285 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf)); 286 287 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 288 Initialize application: 289 Store forcing function of PDE and exact solution 290 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 291 { 292 PetscScalarKokkosOffsetView FF,UU; 293 CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(ctx.da,F,&FF)); 294 CHKERRQ(DMDAVecGetKokkosOffsetViewWrite(ctx.da,U,&UU)); 295 Kokkos:: parallel_for (Kokkos::RangePolicy<>(FF.begin(0),FF.end(0)),KOKKOS_LAMBDA (int i) { 296 PetscReal xp = i*ctx.h; 297 FF(i) = 6.0*xp + pow(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 298 UU(i) = xp*xp*xp; 299 }); 300 CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,F,&FF)); 301 CHKERRQ(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,U,&UU)); 302 } 303 304 if (viewinitial) { 305 CHKERRQ(VecView(U,NULL)); 306 CHKERRQ(VecView(F,NULL)); 307 } 308 309 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 310 Evaluate initial guess; then solve nonlinear system 311 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 312 313 /* 314 Note: The user should initialize the vector, x, with the initial guess 315 for the nonlinear solver prior to calling SNESSolve(). In particular, 316 to employ an initial guess of zero, the user should explicitly set 317 this vector to zero by calling VecSet(). 318 */ 319 CHKERRQ(FormInitialGuess(x)); 320 CHKERRQ(SNESSolve(snes,NULL,x)); 321 CHKERRQ(SNESGetIterationNumber(snes,&its)); 322 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its)); 323 324 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 325 Check solution and clean up 326 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 327 /* 328 Check the error 329 */ 330 CHKERRQ(VecAXPY(x,none,U)); 331 CHKERRQ(VecNorm(x,NORM_2,&norm)); 332 CHKERRQ(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its)); 333 334 /* 335 Free work space. All PETSc objects should be destroyed when they 336 are no longer needed. 337 */ 338 CHKERRQ(VecDestroy(&x)); 339 CHKERRQ(VecDestroy(&r)); 340 CHKERRQ(VecDestroy(&U)); 341 CHKERRQ(VecDestroy(&F)); 342 CHKERRQ(MatDestroy(&J)); 343 CHKERRQ(SNESDestroy(&snes)); 344 CHKERRQ(DMDestroy(&ctx.da)); 345 ierr = PetscFinalize(); 346 return ierr; 347 } 348 349 /*TEST 350 351 build: 352 requires: kokkos_kernels 353 354 test: 355 requires: kokkos_kernels !complex !single 356 nsize: 2 357 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 358 output_file: output/ex3k_1.out 359 360 TEST*/ 361