1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand-side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) 23 { 24 PetscScalar pfive = .50; 25 26 PetscFunctionBeginUser; 27 PetscCall(VecSet(x, pfive)); 28 PetscFunctionReturn(PETSC_SUCCESS); 29 } 30 31 /* ------------------------------------------------------------------- */ 32 /* 33 CpuFunction - Evaluates nonlinear function, F(x) on CPU 34 35 Input Parameters: 36 . snes - the SNES context 37 . x - input vector 38 . ctx - optional user-defined context, as set by SNESSetFunction() 39 40 Output Parameter: 41 . r - function vector 42 43 Note: 44 The user-defined context can contain any application-specific 45 data needed for the function evaluation. 46 */ 47 PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx) 48 { 49 ApplicationCtx *user = (ApplicationCtx *)ctx; 50 DM da = user->da; 51 PetscScalar *X, *R, *F, d; 52 PetscInt i, M, xs, xm; 53 Vec xl; 54 55 PetscFunctionBeginUser; 56 PetscCall(DMGetLocalVector(da, &xl)); 57 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl)); 58 PetscCall(DMDAVecGetArray(da, xl, &X)); 59 PetscCall(DMDAVecGetArray(da, r, &R)); 60 PetscCall(DMDAVecGetArray(da, user->F, &F)); 61 62 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 63 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 64 65 if (xs == 0) { /* left boundary */ 66 R[0] = X[0]; 67 xs++; 68 xm--; 69 } 70 if (xs + xm == M) { /* right boundary */ 71 R[xs + xm - 1] = X[xs + xm - 1] - 1.0; 72 xm--; 73 } 74 d = 1.0 / (user->h * user->h); 75 for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i]; 76 77 PetscCall(DMDAVecRestoreArray(da, xl, &X)); 78 PetscCall(DMDAVecRestoreArray(da, r, &R)); 79 PetscCall(DMDAVecRestoreArray(da, user->F, &F)); 80 PetscCall(DMRestoreLocalVector(da, &xl)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 85 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 86 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>; 87 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>; 88 89 PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx) 90 { 91 ApplicationCtx *user = (ApplicationCtx *)ctx; 92 DM da = user->da; 93 PetscScalar d; 94 PetscInt M; 95 Vec xl; 96 PetscScalarKokkosOffsetView R; 97 ConstPetscScalarKokkosOffsetView X, F; 98 99 PetscFunctionBeginUser; 100 PetscCall(DMGetLocalVector(da, &xl)); 101 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl)); 102 d = 1.0 / (user->h * user->h); 103 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 104 PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X)); /* read only */ 105 PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R)); /* write only */ 106 PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */ 107 Kokkos::parallel_for( 108 Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) { 109 if (i == 0) R(0) = X(0); /* left boundary */ 110 else if (i == M - 1) R(i) = X(i) - 1.0; /* right boundary */ 111 else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */ 112 }); 113 PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X)); 114 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R)); 115 PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F)); 116 PetscCall(DMRestoreLocalVector(da, &xl)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx) 121 { 122 ApplicationCtx *user = (ApplicationCtx *)ctx; 123 DM da = user->da; 124 Vec rk; 125 PetscReal norm = 0; 126 127 PetscFunctionBeginUser; 128 PetscCall(DMGetGlobalVector(da, &rk)); 129 PetscCall(CpuFunction(snes, x, r, ctx)); 130 PetscCall(KokkosFunction(snes, x, rk, ctx)); 131 PetscCall(VecAXPY(rk, -1.0, r)); 132 PetscCall(VecNorm(rk, NORM_2, &norm)); 133 PetscCall(DMRestoreGlobalVector(da, &rk)); 134 PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm); 135 PetscFunctionReturn(PETSC_SUCCESS); 136 } 137 /* ------------------------------------------------------------------- */ 138 /* 139 FormJacobian - Evaluates Jacobian matrix. 140 141 Input Parameters: 142 . snes - the SNES context 143 . x - input vector 144 . dummy - optional user-defined context (not used here) 145 146 Output Parameters: 147 . jac - Jacobian matrix 148 . B - optionally different preconditioning matrix 149 . flag - flag indicating matrix structure 150 */ 151 PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx) 152 { 153 ApplicationCtx *user = (ApplicationCtx *)ctx; 154 PetscScalar *xx, d, A[3]; 155 PetscInt i, j[3], M, xs, xm; 156 DM da = user->da; 157 158 PetscFunctionBeginUser; 159 /* 160 Get pointer to vector data 161 */ 162 PetscCall(DMDAVecGetArrayRead(da, x, &xx)); 163 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 164 165 /* 166 Get range of locally owned matrix 167 */ 168 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 169 170 /* 171 Determine starting and ending local indices for interior grid points. 172 Set Jacobian entries for boundary points. 173 */ 174 175 if (xs == 0) { /* left boundary */ 176 i = 0; 177 A[0] = 1.0; 178 179 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES)); 180 xs++; 181 xm--; 182 } 183 if (xs + xm == M) { /* right boundary */ 184 i = M - 1; 185 A[0] = 1.0; 186 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES)); 187 xm--; 188 } 189 190 /* 191 Interior grid points 192 - Note that in this case we set all elements for a particular 193 row at once. 194 */ 195 d = 1.0 / (user->h * user->h); 196 for (i = xs; i < xs + xm; i++) { 197 j[0] = i - 1; 198 j[1] = i; 199 j[2] = i + 1; 200 A[0] = A[2] = d; 201 A[1] = -2.0 * d + 2.0 * xx[i]; 202 PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES)); 203 } 204 205 /* 206 Assemble matrix, using the 2-step process: 207 MatAssemblyBegin(), MatAssemblyEnd(). 208 By placing code between these two statements, computations can be 209 done while messages are in transition. 210 211 Also, restore vector. 212 */ 213 214 PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY)); 215 PetscCall(DMDAVecRestoreArrayRead(da, x, &xx)); 216 PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY)); 217 218 PetscFunctionReturn(PETSC_SUCCESS); 219 } 220 221 int main(int argc, char **argv) 222 { 223 SNES snes; /* SNES context */ 224 Mat J; /* Jacobian matrix */ 225 ApplicationCtx ctx; /* user-defined context */ 226 Vec x, r, U, F; /* vectors */ 227 PetscScalar none = -1.0; 228 PetscInt its, N = 5, maxit, maxf; 229 PetscReal abstol, rtol, stol, norm; 230 PetscBool viewinitial = PETSC_FALSE; 231 232 PetscFunctionBeginUser; 233 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 234 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL)); 235 ctx.h = 1.0 / (N - 1); 236 PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL)); 237 238 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 239 Create nonlinear solver context 240 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 241 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 242 243 /* 244 Create distributed array (DMDA) to manage parallel grid and vectors 245 */ 246 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da)); 247 PetscCall(DMSetFromOptions(ctx.da)); 248 PetscCall(DMSetUp(ctx.da)); 249 250 /* 251 Extract global and local vectors from DMDA; then duplicate for remaining 252 vectors that are the same types 253 */ 254 PetscCall(DMCreateGlobalVector(ctx.da, &x)); 255 PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution")); 256 PetscCall(VecDuplicate(x, &r)); 257 PetscCall(VecDuplicate(x, &F)); 258 ctx.F = F; 259 PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function")); 260 PetscCall(VecDuplicate(x, &U)); 261 PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution")); 262 263 /* 264 Set function evaluation routine and vector. Whenever the nonlinear 265 solver needs to compute the nonlinear function, it will call this 266 routine. 267 - Note that the final routine argument is the user-defined 268 context that provides application-specific data for the 269 function evaluation routine. 270 271 At the beginning, one can use a stub function that checks the Kokkos version 272 against the CPU version to quickly expose errors. 273 PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx)); 274 */ 275 PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx)); 276 277 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 278 Create matrix data structure; set Jacobian evaluation routine 279 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 280 PetscCall(DMCreateMatrix(ctx.da, &J)); 281 282 /* 283 Set Jacobian matrix data structure and default Jacobian evaluation 284 routine. Whenever the nonlinear solver needs to compute the 285 Jacobian matrix, it will call this routine. 286 - Note that the final routine argument is the user-defined 287 context that provides application-specific data for the 288 Jacobian evaluation routine. 289 */ 290 PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx)); 291 PetscCall(SNESSetFromOptions(snes)); 292 PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf)); 293 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf)); 294 295 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 296 Initialize application: 297 Store forcing function of PDE and exact solution 298 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 299 { 300 PetscScalarKokkosOffsetView FF, UU; 301 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF)); 302 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU)); 303 Kokkos::parallel_for( 304 Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) { 305 PetscReal xp = i * ctx.h; 306 FF(i) = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */ 307 UU(i) = xp * xp * xp; 308 }); 309 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF)); 310 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU)); 311 } 312 313 if (viewinitial) { 314 PetscCall(VecView(U, NULL)); 315 PetscCall(VecView(F, NULL)); 316 } 317 318 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 319 Evaluate initial guess; then solve nonlinear system 320 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 321 322 /* 323 Note: The user should initialize the vector, x, with the initial guess 324 for the nonlinear solver prior to calling SNESSolve(). In particular, 325 to employ an initial guess of zero, the user should explicitly set 326 this vector to zero by calling VecSet(). 327 */ 328 PetscCall(FormInitialGuess(x)); 329 PetscCall(SNESSolve(snes, NULL, x)); 330 PetscCall(SNESGetIterationNumber(snes, &its)); 331 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its)); 332 333 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 334 Check solution and clean up 335 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 336 /* 337 Check the error 338 */ 339 PetscCall(VecAXPY(x, none, U)); 340 PetscCall(VecNorm(x, NORM_2, &norm)); 341 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its)); 342 343 /* 344 Free work space. All PETSc objects should be destroyed when they 345 are no longer needed. 346 */ 347 PetscCall(VecDestroy(&x)); 348 PetscCall(VecDestroy(&r)); 349 PetscCall(VecDestroy(&U)); 350 PetscCall(VecDestroy(&F)); 351 PetscCall(MatDestroy(&J)); 352 PetscCall(SNESDestroy(&snes)); 353 PetscCall(DMDestroy(&ctx.da)); 354 PetscCall(PetscFinalize()); 355 return 0; 356 } 357 358 /*TEST 359 360 build: 361 requires: kokkos_kernels 362 363 test: 364 requires: kokkos_kernels !complex !single 365 nsize: 2 366 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 367 output_file: output/ex3k_1.out 368 369 TEST*/ 370