1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct 10 { 11 DM da; /* distributed array */ 12 Vec F; /* right-hand-side of PDE */ 13 PetscReal h; /* mesh spacing */ 14 } ApplicationCtx; 15 16 /* ------------------------------------------------------------------- */ 17 /* 18 FormInitialGuess - Computes initial guess. 19 20 Input/Output Parameter: 21 . x - the solution vector 22 */ 23 PetscErrorCode FormInitialGuess(Vec x) 24 { 25 PetscScalar pfive = .50; 26 27 PetscFunctionBeginUser; 28 PetscCall(VecSet(x, pfive)); 29 PetscFunctionReturn(0); 30 } 31 32 /* ------------------------------------------------------------------- */ 33 /* 34 CpuFunction - Evaluates nonlinear function, F(x) on CPU 35 36 Input Parameters: 37 . snes - the SNES context 38 . x - input vector 39 . ctx - optional user-defined context, as set by SNESSetFunction() 40 41 Output Parameter: 42 . r - function vector 43 44 Note: 45 The user-defined context can contain any application-specific 46 data needed for the function evaluation. 47 */ 48 PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx) 49 { 50 ApplicationCtx *user = (ApplicationCtx *)ctx; 51 DM da = user->da; 52 PetscScalar *X, *R, *F, d; 53 PetscInt i, M, xs, xm; 54 Vec xl; 55 56 PetscFunctionBeginUser; 57 PetscCall(DMGetLocalVector(da, &xl)); 58 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl)); 59 PetscCall(DMDAVecGetArray(da, xl, &X)); 60 PetscCall(DMDAVecGetArray(da, r, &R)); 61 PetscCall(DMDAVecGetArray(da, user->F, &F)); 62 63 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 64 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 65 66 if (xs == 0) { /* left boundary */ 67 R[0] = X[0]; 68 xs++; 69 xm--; 70 } 71 if (xs + xm == M) { /* right boundary */ 72 R[xs + xm - 1] = X[xs + xm - 1] - 1.0; 73 xm--; 74 } 75 d = 1.0 / (user->h * user->h); 76 for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i]; 77 78 PetscCall(DMDAVecRestoreArray(da, xl, &X)); 79 PetscCall(DMDAVecRestoreArray(da, r, &R)); 80 PetscCall(DMDAVecRestoreArray(da, user->F, &F)); 81 PetscCall(DMRestoreLocalVector(da, &xl)); 82 PetscFunctionReturn(0); 83 } 84 85 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 86 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 87 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>; 88 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>; 89 90 PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx) 91 { 92 ApplicationCtx *user = (ApplicationCtx *)ctx; 93 DM da = user->da; 94 PetscScalar d; 95 PetscInt M; 96 Vec xl; 97 PetscScalarKokkosOffsetView R; 98 ConstPetscScalarKokkosOffsetView X, F; 99 100 PetscFunctionBeginUser; 101 PetscCall(DMGetLocalVector(da, &xl)); 102 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl)); 103 d = 1.0 / (user->h * user->h); 104 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 105 PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X)); /* read only */ 106 PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R)); /* write only */ 107 PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */ 108 Kokkos::parallel_for( 109 Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) { 110 if (i == 0) R(0) = X(0); /* left boundary */ 111 else if (i == M - 1) R(i) = X(i) - 1.0; /* right boundary */ 112 else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */ 113 }); 114 PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X)); 115 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R)); 116 PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F)); 117 PetscCall(DMRestoreLocalVector(da, &xl)); 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx) 122 { 123 ApplicationCtx *user = (ApplicationCtx *)ctx; 124 DM da = user->da; 125 Vec rk; 126 PetscReal norm = 0; 127 128 PetscFunctionBeginUser; 129 PetscCall(DMGetGlobalVector(da, &rk)); 130 PetscCall(CpuFunction(snes, x, r, ctx)); 131 PetscCall(KokkosFunction(snes, x, rk, ctx)); 132 PetscCall(VecAXPY(rk, -1.0, r)); 133 PetscCall(VecNorm(rk, NORM_2, &norm)); 134 PetscCall(DMRestoreGlobalVector(da, &rk)); 135 PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm); 136 PetscFunctionReturn(0); 137 } 138 /* ------------------------------------------------------------------- */ 139 /* 140 FormJacobian - Evaluates Jacobian matrix. 141 142 Input Parameters: 143 . snes - the SNES context 144 . x - input vector 145 . dummy - optional user-defined context (not used here) 146 147 Output Parameters: 148 . jac - Jacobian matrix 149 . B - optionally different preconditioning matrix 150 . flag - flag indicating matrix structure 151 */ 152 PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx) 153 { 154 ApplicationCtx *user = (ApplicationCtx *)ctx; 155 PetscScalar *xx, d, A[3]; 156 PetscInt i, j[3], M, xs, xm; 157 DM da = user->da; 158 159 PetscFunctionBeginUser; 160 /* 161 Get pointer to vector data 162 */ 163 PetscCall(DMDAVecGetArrayRead(da, x, &xx)); 164 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 165 166 /* 167 Get range of locally owned matrix 168 */ 169 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 170 171 /* 172 Determine starting and ending local indices for interior grid points. 173 Set Jacobian entries for boundary points. 174 */ 175 176 if (xs == 0) { /* left boundary */ 177 i = 0; 178 A[0] = 1.0; 179 180 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES)); 181 xs++; 182 xm--; 183 } 184 if (xs + xm == M) { /* right boundary */ 185 i = M - 1; 186 A[0] = 1.0; 187 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES)); 188 xm--; 189 } 190 191 /* 192 Interior grid points 193 - Note that in this case we set all elements for a particular 194 row at once. 195 */ 196 d = 1.0 / (user->h * user->h); 197 for (i = xs; i < xs + xm; i++) { 198 j[0] = i - 1; 199 j[1] = i; 200 j[2] = i + 1; 201 A[0] = A[2] = d; 202 A[1] = -2.0 * d + 2.0 * xx[i]; 203 PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES)); 204 } 205 206 /* 207 Assemble matrix, using the 2-step process: 208 MatAssemblyBegin(), MatAssemblyEnd(). 209 By placing code between these two statements, computations can be 210 done while messages are in transition. 211 212 Also, restore vector. 213 */ 214 215 PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY)); 216 PetscCall(DMDAVecRestoreArrayRead(da, x, &xx)); 217 PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY)); 218 219 PetscFunctionReturn(0); 220 } 221 222 int main(int argc, char **argv) 223 { 224 SNES snes; /* SNES context */ 225 Mat J; /* Jacobian matrix */ 226 ApplicationCtx ctx; /* user-defined context */ 227 Vec x, r, U, F; /* vectors */ 228 PetscScalar none = -1.0; 229 PetscInt its, N = 5, maxit, maxf; 230 PetscReal abstol, rtol, stol, norm; 231 PetscBool viewinitial = PETSC_FALSE; 232 233 PetscFunctionBeginUser; 234 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 235 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL)); 236 ctx.h = 1.0 / (N - 1); 237 PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL)); 238 239 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 240 Create nonlinear solver context 241 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 242 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 243 244 /* 245 Create distributed array (DMDA) to manage parallel grid and vectors 246 */ 247 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da)); 248 PetscCall(DMSetFromOptions(ctx.da)); 249 PetscCall(DMSetUp(ctx.da)); 250 251 /* 252 Extract global and local vectors from DMDA; then duplicate for remaining 253 vectors that are the same types 254 */ 255 PetscCall(DMCreateGlobalVector(ctx.da, &x)); 256 PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution")); 257 PetscCall(VecDuplicate(x, &r)); 258 PetscCall(VecDuplicate(x, &F)); 259 ctx.F = F; 260 PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function")); 261 PetscCall(VecDuplicate(x, &U)); 262 PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution")); 263 264 /* 265 Set function evaluation routine and vector. Whenever the nonlinear 266 solver needs to compute the nonlinear function, it will call this 267 routine. 268 - Note that the final routine argument is the user-defined 269 context that provides application-specific data for the 270 function evaluation routine. 271 272 At the beginning, one can use a stub function that checks the Kokkos version 273 against the CPU version to quickly expose errors. 274 PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx)); 275 */ 276 PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx)); 277 278 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 279 Create matrix data structure; set Jacobian evaluation routine 280 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 281 PetscCall(DMCreateMatrix(ctx.da, &J)); 282 283 /* 284 Set Jacobian matrix data structure and default Jacobian evaluation 285 routine. Whenever the nonlinear solver needs to compute the 286 Jacobian matrix, it will call this routine. 287 - Note that the final routine argument is the user-defined 288 context that provides application-specific data for the 289 Jacobian evaluation routine. 290 */ 291 PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx)); 292 PetscCall(SNESSetFromOptions(snes)); 293 PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf)); 294 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf)); 295 296 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 297 Initialize application: 298 Store forcing function of PDE and exact solution 299 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 300 { 301 PetscScalarKokkosOffsetView FF, UU; 302 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF)); 303 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU)); 304 Kokkos::parallel_for( 305 Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) { 306 PetscReal xp = i * ctx.h; 307 FF(i) = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */ 308 UU(i) = xp * xp * xp; 309 }); 310 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF)); 311 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU)); 312 } 313 314 if (viewinitial) { 315 PetscCall(VecView(U, NULL)); 316 PetscCall(VecView(F, NULL)); 317 } 318 319 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 320 Evaluate initial guess; then solve nonlinear system 321 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 322 323 /* 324 Note: The user should initialize the vector, x, with the initial guess 325 for the nonlinear solver prior to calling SNESSolve(). In particular, 326 to employ an initial guess of zero, the user should explicitly set 327 this vector to zero by calling VecSet(). 328 */ 329 PetscCall(FormInitialGuess(x)); 330 PetscCall(SNESSolve(snes, NULL, x)); 331 PetscCall(SNESGetIterationNumber(snes, &its)); 332 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its)); 333 334 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 335 Check solution and clean up 336 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 337 /* 338 Check the error 339 */ 340 PetscCall(VecAXPY(x, none, U)); 341 PetscCall(VecNorm(x, NORM_2, &norm)); 342 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its)); 343 344 /* 345 Free work space. All PETSc objects should be destroyed when they 346 are no longer needed. 347 */ 348 PetscCall(VecDestroy(&x)); 349 PetscCall(VecDestroy(&r)); 350 PetscCall(VecDestroy(&U)); 351 PetscCall(VecDestroy(&F)); 352 PetscCall(MatDestroy(&J)); 353 PetscCall(SNESDestroy(&snes)); 354 PetscCall(DMDestroy(&ctx.da)); 355 PetscCall(PetscFinalize()); 356 return 0; 357 } 358 359 /*TEST 360 361 build: 362 requires: kokkos_kernels 363 364 test: 365 requires: kokkos_kernels !complex !single 366 nsize: 2 367 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 368 output_file: output/ex3k_1.out 369 370 TEST*/ 371