1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand-side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) 23 { 24 PetscScalar pfive = .50; 25 26 PetscFunctionBeginUser; 27 PetscCall(VecSet(x,pfive)); 28 PetscFunctionReturn(0); 29 } 30 31 /* ------------------------------------------------------------------- */ 32 /* 33 CpuFunction - Evaluates nonlinear function, F(x) on CPU 34 35 Input Parameters: 36 . snes - the SNES context 37 . x - input vector 38 . ctx - optional user-defined context, as set by SNESSetFunction() 39 40 Output Parameter: 41 . r - function vector 42 43 Note: 44 The user-defined context can contain any application-specific 45 data needed for the function evaluation. 46 */ 47 PetscErrorCode CpuFunction(SNES snes,Vec x,Vec r,void *ctx) 48 { 49 ApplicationCtx *user = (ApplicationCtx*) ctx; 50 DM da = user->da; 51 PetscScalar *X,*R,*F,d; 52 PetscInt i,M,xs,xm; 53 Vec xl; 54 55 PetscFunctionBeginUser; 56 PetscCall(DMGetLocalVector(da,&xl)); 57 PetscCall(DMGlobalToLocal(da,x,INSERT_VALUES,xl)); 58 PetscCall(DMDAVecGetArray(da,xl,&X)); 59 PetscCall(DMDAVecGetArray(da,r,&R)); 60 PetscCall(DMDAVecGetArray(da,user->F,&F)); 61 62 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 63 PetscCall(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 64 65 if (xs == 0) { /* left boundary */ 66 R[0] = X[0]; 67 xs++;xm--; 68 } 69 if (xs+xm == M) { /* right boundary */ 70 R[xs+xm-1] = X[xs+xm-1] - 1.0; 71 xm--; 72 } 73 d = 1.0/(user->h*user->h); 74 for (i=xs; i<xs+xm; i++) R[i] = d*(X[i-1] - 2.0*X[i] + X[i+1]) + X[i]*X[i] - F[i]; 75 76 PetscCall(DMDAVecRestoreArray(da,xl,&X)); 77 PetscCall(DMDAVecRestoreArray(da,r,&R)); 78 PetscCall(DMDAVecRestoreArray(da,user->F,&F)); 79 PetscCall(DMRestoreLocalVector(da,&xl)); 80 PetscFunctionReturn(0); 81 } 82 83 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 84 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 85 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar*,DefaultMemorySpace>; 86 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar*,DefaultMemorySpace>; 87 88 PetscErrorCode KokkosFunction(SNES snes,Vec x,Vec r,void *ctx) 89 { 90 ApplicationCtx *user = (ApplicationCtx*) ctx; 91 DM da = user->da; 92 PetscScalar d; 93 PetscInt M; 94 Vec xl; 95 PetscScalarKokkosOffsetView R; 96 ConstPetscScalarKokkosOffsetView X,F; 97 98 PetscFunctionBeginUser; 99 PetscCall(DMGetLocalVector(da,&xl)); 100 PetscCall(DMGlobalToLocal(da,x,INSERT_VALUES,xl)); 101 d = 1.0/(user->h*user->h); 102 PetscCall(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 103 PetscCall(DMDAVecGetKokkosOffsetView(da,xl,&X)); /* read only */ 104 PetscCall(DMDAVecGetKokkosOffsetViewWrite(da,r,&R)); /* write only */ 105 PetscCall(DMDAVecGetKokkosOffsetView(da,user->F,&F)); /* read only */ 106 Kokkos:: parallel_for (Kokkos::RangePolicy<>(R.begin(0),R.end(0)),KOKKOS_LAMBDA (int i) { 107 if (i == 0) R(0) = X(0); /* left boundary */ 108 else if (i == M-1) R(i) = X(i) - 1.0; /* right boundary */ 109 else R(i) = d*(X(i-1) - 2.0*X(i) + X(i+1)) + X(i)*X(i) - F(i); /* interior */ 110 }); 111 PetscCall(DMDAVecRestoreKokkosOffsetView(da,xl,&X)); 112 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da,r,&R)); 113 PetscCall(DMDAVecRestoreKokkosOffsetView(da,user->F,&F)); 114 PetscCall(DMRestoreLocalVector(da,&xl)); 115 PetscFunctionReturn(0); 116 } 117 118 PetscErrorCode StubFunction(SNES snes ,Vec x,Vec r,void *ctx) 119 { 120 ApplicationCtx *user = (ApplicationCtx*) ctx; 121 DM da = user->da; 122 Vec rk; 123 PetscReal norm=0; 124 125 PetscFunctionBeginUser; 126 PetscCall(DMGetGlobalVector(da,&rk)); 127 PetscCall(CpuFunction(snes,x,r,ctx)); 128 PetscCall(KokkosFunction(snes,x,rk,ctx)); 129 PetscCall(VecAXPY(rk,-1.0,r)); 130 PetscCall(VecNorm(rk,NORM_2,&norm)); 131 PetscCall(DMRestoreGlobalVector(da,&rk)); 132 PetscCheck(norm <= 1e-6,PETSC_COMM_SELF,PETSC_ERR_PLIB,"KokkosFunction() different from CpuFunction() with a diff norm = %g",(double)norm); 133 PetscFunctionReturn(0); 134 } 135 /* ------------------------------------------------------------------- */ 136 /* 137 FormJacobian - Evaluates Jacobian matrix. 138 139 Input Parameters: 140 . snes - the SNES context 141 . x - input vector 142 . dummy - optional user-defined context (not used here) 143 144 Output Parameters: 145 . jac - Jacobian matrix 146 . B - optionally different preconditioning matrix 147 . flag - flag indicating matrix structure 148 */ 149 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 150 { 151 ApplicationCtx *user = (ApplicationCtx*) ctx; 152 PetscScalar *xx,d,A[3]; 153 PetscInt i,j[3],M,xs,xm; 154 DM da = user->da; 155 156 PetscFunctionBeginUser; 157 /* 158 Get pointer to vector data 159 */ 160 PetscCall(DMDAVecGetArrayRead(da,x,&xx)); 161 PetscCall(DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL)); 162 163 /* 164 Get range of locally owned matrix 165 */ 166 PetscCall(DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL)); 167 168 /* 169 Determine starting and ending local indices for interior grid points. 170 Set Jacobian entries for boundary points. 171 */ 172 173 if (xs == 0) { /* left boundary */ 174 i = 0; A[0] = 1.0; 175 176 PetscCall(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES)); 177 xs++;xm--; 178 } 179 if (xs+xm == M) { /* right boundary */ 180 i = M-1; 181 A[0] = 1.0; 182 PetscCall(MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES)); 183 xm--; 184 } 185 186 /* 187 Interior grid points 188 - Note that in this case we set all elements for a particular 189 row at once. 190 */ 191 d = 1.0/(user->h*user->h); 192 for (i=xs; i<xs+xm; i++) { 193 j[0] = i - 1; j[1] = i; j[2] = i + 1; 194 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 195 PetscCall(MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES)); 196 } 197 198 /* 199 Assemble matrix, using the 2-step process: 200 MatAssemblyBegin(), MatAssemblyEnd(). 201 By placing code between these two statements, computations can be 202 done while messages are in transition. 203 204 Also, restore vector. 205 */ 206 207 PetscCall(MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY)); 208 PetscCall(DMDAVecRestoreArrayRead(da,x,&xx)); 209 PetscCall(MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY)); 210 211 PetscFunctionReturn(0); 212 } 213 214 int main(int argc,char **argv) 215 { 216 SNES snes; /* SNES context */ 217 Mat J; /* Jacobian matrix */ 218 ApplicationCtx ctx; /* user-defined context */ 219 Vec x,r,U,F; /* vectors */ 220 PetscScalar none = -1.0; 221 PetscInt its,N = 5,maxit,maxf; 222 PetscReal abstol,rtol,stol,norm; 223 PetscBool viewinitial = PETSC_FALSE; 224 225 PetscFunctionBeginUser; 226 PetscCall(PetscInitialize(&argc,&argv,(char*)0,help)); 227 PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL)); 228 ctx.h = 1.0/(N-1); 229 PetscCall(PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL)); 230 231 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 232 Create nonlinear solver context 233 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 234 PetscCall(SNESCreate(PETSC_COMM_WORLD,&snes)); 235 236 /* 237 Create distributed array (DMDA) to manage parallel grid and vectors 238 */ 239 PetscCall(DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da)); 240 PetscCall(DMSetFromOptions(ctx.da)); 241 PetscCall(DMSetUp(ctx.da)); 242 243 /* 244 Extract global and local vectors from DMDA; then duplicate for remaining 245 vectors that are the same types 246 */ 247 PetscCall(DMCreateGlobalVector(ctx.da,&x)); 248 PetscCall(PetscObjectSetName((PetscObject)x,"Approximate Solution")); 249 PetscCall(VecDuplicate(x,&r)); 250 PetscCall(VecDuplicate(x,&F)); ctx.F = F; 251 PetscCall(PetscObjectSetName((PetscObject)F,"Forcing function")); 252 PetscCall(VecDuplicate(x,&U)); 253 PetscCall(PetscObjectSetName((PetscObject)U,"Exact Solution")); 254 255 /* 256 Set function evaluation routine and vector. Whenever the nonlinear 257 solver needs to compute the nonlinear function, it will call this 258 routine. 259 - Note that the final routine argument is the user-defined 260 context that provides application-specific data for the 261 function evaluation routine. 262 263 At the beginning, one can use a stub function that checks the Kokkos version 264 against the CPU version to quickly expose errors. 265 PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx)); 266 */ 267 PetscCall(SNESSetFunction(snes,r,KokkosFunction,&ctx)); 268 269 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 270 Create matrix data structure; set Jacobian evaluation routine 271 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 272 PetscCall(DMCreateMatrix(ctx.da,&J)); 273 274 /* 275 Set Jacobian matrix data structure and default Jacobian evaluation 276 routine. Whenever the nonlinear solver needs to compute the 277 Jacobian matrix, it will call this routine. 278 - Note that the final routine argument is the user-defined 279 context that provides application-specific data for the 280 Jacobian evaluation routine. 281 */ 282 PetscCall(SNESSetJacobian(snes,J,J,FormJacobian,&ctx)); 283 PetscCall(SNESSetFromOptions(snes)); 284 PetscCall(SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf)); 285 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf)); 286 287 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 288 Initialize application: 289 Store forcing function of PDE and exact solution 290 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 291 { 292 PetscScalarKokkosOffsetView FF,UU; 293 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da,F,&FF)); 294 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da,U,&UU)); 295 Kokkos:: parallel_for (Kokkos::RangePolicy<>(FF.begin(0),FF.end(0)),KOKKOS_LAMBDA (int i) { 296 PetscReal xp = i*ctx.h; 297 FF(i) = 6.0*xp + pow(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 298 UU(i) = xp*xp*xp; 299 }); 300 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,F,&FF)); 301 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da,U,&UU)); 302 } 303 304 if (viewinitial) { 305 PetscCall(VecView(U,NULL)); 306 PetscCall(VecView(F,NULL)); 307 } 308 309 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 310 Evaluate initial guess; then solve nonlinear system 311 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 312 313 /* 314 Note: The user should initialize the vector, x, with the initial guess 315 for the nonlinear solver prior to calling SNESSolve(). In particular, 316 to employ an initial guess of zero, the user should explicitly set 317 this vector to zero by calling VecSet(). 318 */ 319 PetscCall(FormInitialGuess(x)); 320 PetscCall(SNESSolve(snes,NULL,x)); 321 PetscCall(SNESGetIterationNumber(snes,&its)); 322 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %" PetscInt_FMT "\n",its)); 323 324 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 325 Check solution and clean up 326 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 327 /* 328 Check the error 329 */ 330 PetscCall(VecAXPY(x,none,U)); 331 PetscCall(VecNorm(x,NORM_2,&norm)); 332 PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %" PetscInt_FMT "\n",(double)norm,its)); 333 334 /* 335 Free work space. All PETSc objects should be destroyed when they 336 are no longer needed. 337 */ 338 PetscCall(VecDestroy(&x)); 339 PetscCall(VecDestroy(&r)); 340 PetscCall(VecDestroy(&U)); 341 PetscCall(VecDestroy(&F)); 342 PetscCall(MatDestroy(&J)); 343 PetscCall(SNESDestroy(&snes)); 344 PetscCall(DMDestroy(&ctx.da)); 345 PetscCall(PetscFinalize()); 346 return 0; 347 } 348 349 /*TEST 350 351 build: 352 requires: kokkos_kernels 353 354 test: 355 requires: kokkos_kernels !complex !single 356 nsize: 2 357 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 358 output_file: output/ex3k_1.out 359 360 TEST*/ 361