1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand-side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) 23 { 24 PetscScalar pfive = .50; 25 26 PetscFunctionBeginUser; 27 PetscCall(VecSet(x,pfive)); 28 PetscFunctionReturn(0); 29 } 30 31 /* ------------------------------------------------------------------- */ 32 /* 33 CpuFunction - Evaluates nonlinear function, F(x) on CPU 34 35 Input Parameters: 36 . snes - the SNES context 37 . x - input vector 38 . ctx - optional user-defined context, as set by SNESSetFunction() 39 40 Output Parameter: 41 . r - function vector 42 43 Note: 44 The user-defined context can contain any application-specific 45 data needed for the function evaluation. 46 */ 47 PetscErrorCode CpuFunction(SNES snes,Vec x,Vec r,void *ctx) 48 { 49 ApplicationCtx *user = (ApplicationCtx*) ctx; 50 DM da = user->da; 51 PetscScalar *X,*R,*F,d; 52 PetscInt i,M,xs,xm; 53 Vec xl; 54 55 PetscFunctionBeginUser; 56 PetscCall(DMGetLocalVector(da,&xl)); 57 PetscCall(DMGlobalToLocal(da,x,INSERT_VALUES,xl)); 58 PetscCall(DMDAVecGetArray(da,xl,&X)); 59 PetscCall(DMDAVecGetArray(da,r,&R)); 60 PetscCall(DMDAVecGetArray(da,user->F,&F)); 61 62 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 63 PetscCall(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 64 65 if (xs == 0) { /* left boundary */ 66 R[0] = X[0]; 67 xs++;xm--; 68 } 69 if (xs+xm == M) { /* right boundary */ 70 R[xs+xm-1] = X[xs+xm-1] - 1.0; 71 xm--; 72 } 73 d = 1.0/(user->h*user->h); 74 for (i=xs; i<xs+xm; i++) R[i] = d*(X[i-1] - 2.0*X[i] + X[i+1]) + X[i]*X[i] - F[i]; 75 76 PetscCall(DMDAVecRestoreArray(da,xl,&X)); 77 PetscCall(DMDAVecRestoreArray(da,r,&R)); 78 PetscCall(DMDAVecRestoreArray(da,user->F,&F)); 79 PetscCall(DMRestoreLocalVector(da,&xl)); 80 PetscFunctionReturn(0); 81 } 82 83 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 84 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 85 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar*,DefaultMemorySpace>; 86 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar*,DefaultMemorySpace>; 87 88 PetscErrorCode KokkosFunction(SNES snes,Vec x,Vec r,void *ctx) 89 { 90 ApplicationCtx *user = (ApplicationCtx*) ctx; 91 DM da = user->da; 92 PetscScalar d; 93 PetscInt M; 94 Vec xl; 95 PetscScalarKokkosOffsetView R; 96 ConstPetscScalarKokkosOffsetView X,F; 97 98 PetscFunctionBeginUser; 99 PetscCall(DMGetLocalVector(da,&xl)); 100 PetscCall(DMGlobalToLocal(da,x,INSERT_VALUES,xl)); 101 d = 1.0/(user->h*user->h); 102 PetscCall(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 103 PetscCall(DMDAVecGetKokkosOffsetView(da,xl,&X)); /* read only */ 104 PetscCall(DMDAVecGetKokkosOffsetViewWrite(da,r,&R)); /* write only */ 105 PetscCall(DMDAVecGetKokkosOffsetView(da,user->F,&F)); /* read only */ 106 Kokkos:: parallel_for (Kokkos::RangePolicy<>(R.begin(0),R.end(0)),KOKKOS_LAMBDA (int i) { 107 if (i == 0) R(0) = X(0); /* left boundary */ 108 else if (i == M-1) R(i) = X(i) - 1.0; /* right boundary */ 109 else R(i) = d*(X(i-1) - 2.0*X(i) + X(i+1)) + X(i)*X(i) - F(i); /* interior */ 110 }); 111 PetscCall(DMDAVecRestoreKokkosOffsetView(da,xl,&X)); 112 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da,r,&R)); 113 PetscCall(DMDAVecRestoreKokkosOffsetView(da,user->F,&F)); 114 PetscCall(DMRestoreLocalVector(da,&xl)); 115 PetscFunctionReturn(0); 116 } 117 118 PetscErrorCode StubFunction(SNES snes ,Vec x,Vec r,void *ctx) 119 { 120 ApplicationCtx *user = (ApplicationCtx*) ctx; 121 DM da = user->da; 122 Vec rk; 123 PetscReal norm=0; 124 125 PetscFunctionBeginUser; 126 PetscCall(DMGetGlobalVector(da,&rk)); 127 PetscCall(CpuFunction(snes,x,r,ctx)); 128 PetscCall(KokkosFunction(snes,x,rk,ctx)); 129 PetscCall(VecAXPY(rk,-1.0,r)); 130 PetscCall(VecNorm(rk,NORM_2,&norm)); 131 PetscCall(DMRestoreGlobalVector(da,&rk)); 132 PetscCheckFalse(norm > 1e-6,PETSC_COMM_SELF,PETSC_ERR_PLIB,"KokkosFunction() different from CpuFunction() with a diff norm = %g",norm); 133 PetscFunctionReturn(0); 134 } 135 /* ------------------------------------------------------------------- */ 136 /* 137 FormJacobian - Evaluates Jacobian matrix. 138 139 Input Parameters: 140 . snes - the SNES context 141 . x - input vector 142 . dummy - optional user-defined context (not used here) 143 144 Output Parameters: 145 . jac - Jacobian matrix 146 . B - optionally different preconditioning matrix 147 . flag - flag indicating matrix structure 148 */ 149 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 150 { 151 ApplicationCtx *user = (ApplicationCtx*) ctx; 152 PetscScalar *xx,d,A[3]; 153 PetscInt i,j[3],M,xs,xm; 154 DM da = user->da; 155 156 PetscFunctionBeginUser; 157 /* 158 Get pointer to vector data 159 */ 160 PetscCall(DMDAVecGetArrayRead(da,x,&xx)); 161 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 162 163 /* 164 Get range of locally owned matrix 165 */ 166 PetscCall(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 167 168 /* 169 Determine starting and ending local indices for interior grid points. 170 Set Jacobian entries for boundary points. 171 */ 172 173 if (xs == 0) { /* left boundary */ 174 i = 0; A[0] = 1.0; 175 176 PetscCall(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES)); 177 xs++;xm--; 178 } 179 if (xs+xm == M) { /* right boundary */ 180 i = M-1; 181 A[0] = 1.0; 182 PetscCall(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES)); 183 xm--; 184 } 185 186 /* 187 Interior grid points 188 - Note that in this case we set all elements for a particular 189 row at once. 190 */ 191 d = 1.0/(user->h*user->h); 192 for (i=xs; i<xs+xm; i++) { 193 j[0] = i - 1; j[1] = i; j[2] = i + 1; 194 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 195 PetscCall(MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES)); 196 } 197 198 /* 199 Assemble matrix, using the 2-step process: 200 MatAssemblyBegin(), MatAssemblyEnd(). 201 By placing code between these two statements, computations can be 202 done while messages are in transition. 203 204 Also, restore vector. 205 */ 206 207 PetscCall(MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY)); 208 PetscCall(DMDAVecRestoreArrayRead(da,x,&xx)); 209 PetscCall(MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY)); 210 211 PetscFunctionReturn(0); 212 } 213 214 int main(int argc,char **argv) 215 { 216 SNES snes; /* SNES context */ 217 Mat J; /* Jacobian matrix */ 218 ApplicationCtx ctx; /* user-defined context */ 219 Vec x,r,U,F; /* vectors */ 220 PetscScalar none = -1.0; 221 PetscInt its,N = 5,maxit,maxf; 222 PetscReal abstol,rtol,stol,norm; 223 PetscBool viewinitial = PETSC_FALSE; 224 225 PetscCall(PetscInitialize(&argc,&argv,(char*)0,help)); 226 PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL)); 227 ctx.h = 1.0/(N-1); 228 PetscCall(PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL)); 229 230 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 231 Create nonlinear solver context 232 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 233 PetscCall(SNESCreate(PETSC_COMM_WORLD,&snes)); 234 235 /* 236 Create distributed array (DMDA) to manage parallel grid and vectors 237 */ 238 PetscCall(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da)); 239 PetscCall(DMSetFromOptions(ctx.da)); 240 PetscCall(DMSetUp(ctx.da)); 241 242 /* 243 Extract global and local vectors from DMDA; then duplicate for remaining 244 vectors that are the same types 245 */ 246 PetscCall(DMCreateGlobalVector(ctx.da,&x)); 247 PetscCall(PetscObjectSetName((PetscObject)x,"Approximate Solution")); 248 PetscCall(VecDuplicate(x,&r)); 249 PetscCall(VecDuplicate(x,&F)); ctx.F = F; 250 PetscCall(PetscObjectSetName((PetscObject)F,"Forcing function")); 251 PetscCall(VecDuplicate(x,&U)); 252 PetscCall(PetscObjectSetName((PetscObject)U,"Exact Solution")); 253 254 /* 255 Set function evaluation routine and vector. Whenever the nonlinear 256 solver needs to compute the nonlinear function, it will call this 257 routine. 258 - Note that the final routine argument is the user-defined 259 context that provides application-specific data for the 260 function evaluation routine. 261 262 At the beginning, one can use a stub function that checks the Kokkos version 263 against the CPU version to quickly expose errors. 264 PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx)); 265 */ 266 PetscCall(SNESSetFunction(snes,r,KokkosFunction,&ctx)); 267 268 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 269 Create matrix data structure; set Jacobian evaluation routine 270 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 271 PetscCall(DMCreateMatrix(ctx.da,&J)); 272 273 /* 274 Set Jacobian matrix data structure and default Jacobian evaluation 275 routine. Whenever the nonlinear solver needs to compute the 276 Jacobian matrix, it will call this routine. 277 - Note that the final routine argument is the user-defined 278 context that provides application-specific data for the 279 Jacobian evaluation routine. 280 */ 281 PetscCall(SNESSetJacobian(snes,J,J,FormJacobian,&ctx)); 282 PetscCall(SNESSetFromOptions(snes)); 283 PetscCall(SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf)); 284 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf)); 285 286 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 287 Initialize application: 288 Store forcing function of PDE and exact solution 289 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 290 { 291 PetscScalarKokkosOffsetView FF,UU; 292 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da,F,&FF)); 293 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da,U,&UU)); 294 Kokkos:: parallel_for (Kokkos::RangePolicy<>(FF.begin(0),FF.end(0)),KOKKOS_LAMBDA (int i) { 295 PetscReal xp = i*ctx.h; 296 FF(i) = 6.0*xp + pow(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 297 UU(i) = xp*xp*xp; 298 }); 299 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,F,&FF)); 300 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,U,&UU)); 301 } 302 303 if (viewinitial) { 304 PetscCall(VecView(U,NULL)); 305 PetscCall(VecView(F,NULL)); 306 } 307 308 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 309 Evaluate initial guess; then solve nonlinear system 310 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 311 312 /* 313 Note: The user should initialize the vector, x, with the initial guess 314 for the nonlinear solver prior to calling SNESSolve(). In particular, 315 to employ an initial guess of zero, the user should explicitly set 316 this vector to zero by calling VecSet(). 317 */ 318 PetscCall(FormInitialGuess(x)); 319 PetscCall(SNESSolve(snes,NULL,x)); 320 PetscCall(SNESGetIterationNumber(snes,&its)); 321 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its)); 322 323 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 324 Check solution and clean up 325 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 326 /* 327 Check the error 328 */ 329 PetscCall(VecAXPY(x,none,U)); 330 PetscCall(VecNorm(x,NORM_2,&norm)); 331 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its)); 332 333 /* 334 Free work space. All PETSc objects should be destroyed when they 335 are no longer needed. 336 */ 337 PetscCall(VecDestroy(&x)); 338 PetscCall(VecDestroy(&r)); 339 PetscCall(VecDestroy(&U)); 340 PetscCall(VecDestroy(&F)); 341 PetscCall(MatDestroy(&J)); 342 PetscCall(SNESDestroy(&snes)); 343 PetscCall(DMDestroy(&ctx.da)); 344 PetscCall(PetscFinalize()); 345 return 0; 346 } 347 348 /*TEST 349 350 build: 351 requires: kokkos_kernels 352 353 test: 354 requires: kokkos_kernels !complex !single 355 nsize: 2 356 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 357 output_file: output/ex3k_1.out 358 359 TEST*/ 360