1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) 23 { 24 PetscScalar pfive = .50; 25 26 PetscFunctionBeginUser; 27 PetscCall(VecSet(x, pfive)); 28 PetscFunctionReturn(PETSC_SUCCESS); 29 } 30 31 /* ------------------------------------------------------------------- */ 32 /* 33 CpuFunction - Evaluates nonlinear function, F(x) on CPU 34 35 Input Parameters: 36 . snes - the SNES context 37 . x - input vector 38 . ctx - optional user-defined context, as set by SNESSetFunction() 39 40 Output Parameter: 41 . r - function vector 42 43 Note: 44 The user-defined context can contain any application-specific 45 data needed for the function evaluation. 46 */ 47 PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx) 48 { 49 ApplicationCtx *user = (ApplicationCtx *)ctx; 50 DM da = user->da; 51 PetscScalar *X, *R, *F, d; 52 PetscInt i, M, xs, xm; 53 Vec xl; 54 55 PetscFunctionBeginUser; 56 PetscCall(DMGetLocalVector(da, &xl)); 57 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl)); 58 PetscCall(DMDAVecGetArray(da, xl, &X)); 59 PetscCall(DMDAVecGetArray(da, r, &R)); 60 PetscCall(DMDAVecGetArray(da, user->F, &F)); 61 62 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 63 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 64 65 if (xs == 0) { /* left boundary */ 66 R[0] = X[0]; 67 xs++; 68 xm--; 69 } 70 if (xs + xm == M) { /* right boundary */ 71 R[xs + xm - 1] = X[xs + xm - 1] - 1.0; 72 xm--; 73 } 74 d = 1.0 / (user->h * user->h); 75 for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i]; 76 77 PetscCall(DMDAVecRestoreArray(da, xl, &X)); 78 PetscCall(DMDAVecRestoreArray(da, r, &R)); 79 PetscCall(DMDAVecRestoreArray(da, user->F, &F)); 80 PetscCall(DMRestoreLocalVector(da, &xl)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 85 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 86 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>; 87 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>; 88 89 PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx) 90 { 91 ApplicationCtx *user = (ApplicationCtx *)ctx; 92 DM da = user->da; 93 PetscScalar d; 94 PetscInt M; 95 Vec xl; 96 PetscScalarKokkosOffsetView R; 97 ConstPetscScalarKokkosOffsetView X, F; 98 99 PetscFunctionBeginUser; 100 PetscCall(DMGetLocalVector(da, &xl)); 101 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl)); 102 d = 1.0 / (user->h * user->h); 103 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 104 PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X)); /* read only */ 105 PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R)); /* write only */ 106 PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */ 107 Kokkos::parallel_for( 108 Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) { 109 if (i == 0) R(0) = X(0); /* left boundary */ 110 else if (i == M - 1) R(i) = X(i) - 1.0; /* right boundary */ 111 else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */ 112 }); 113 PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X)); 114 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R)); 115 PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F)); 116 PetscCall(DMRestoreLocalVector(da, &xl)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx) 121 { 122 ApplicationCtx *user = (ApplicationCtx *)ctx; 123 DM da = user->da; 124 Vec rk; 125 PetscReal norm = 0; 126 127 PetscFunctionBeginUser; 128 PetscCall(DMGetGlobalVector(da, &rk)); 129 PetscCall(CpuFunction(snes, x, r, ctx)); 130 PetscCall(KokkosFunction(snes, x, rk, ctx)); 131 PetscCall(VecAXPY(rk, -1.0, r)); 132 PetscCall(VecNorm(rk, NORM_2, &norm)); 133 PetscCall(DMRestoreGlobalVector(da, &rk)); 134 PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm); 135 PetscFunctionReturn(PETSC_SUCCESS); 136 } 137 /* ------------------------------------------------------------------- */ 138 /* 139 FormJacobian - Evaluates Jacobian matrix. 140 141 Input Parameters: 142 . snes - the SNES context 143 . x - input vector 144 . dummy - optional user-defined context (not used here) 145 146 Output Parameters: 147 . jac - Jacobian matrix 148 . B - optionally different preconditioning matrix 149 . flag - flag indicating matrix structure 150 */ 151 PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx) 152 { 153 ApplicationCtx *user = (ApplicationCtx *)ctx; 154 PetscScalar *xx, d, A[3]; 155 PetscInt i, j[3], M, xs, xm; 156 DM da = user->da; 157 158 PetscFunctionBeginUser; 159 /* 160 Get pointer to vector data 161 */ 162 PetscCall(DMDAVecGetArrayRead(da, x, &xx)); 163 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 164 165 /* 166 Get range of locally owned matrix 167 */ 168 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 169 170 /* 171 Determine starting and ending local indices for interior grid points. 172 Set Jacobian entries for boundary points. 173 */ 174 175 if (xs == 0) { /* left boundary */ 176 i = 0; 177 A[0] = 1.0; 178 179 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES)); 180 xs++; 181 xm--; 182 } 183 if (xs + xm == M) { /* right boundary */ 184 i = M - 1; 185 A[0] = 1.0; 186 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES)); 187 xm--; 188 } 189 190 /* 191 Interior grid points 192 - Note that in this case we set all elements for a particular 193 row at once. 194 */ 195 d = 1.0 / (user->h * user->h); 196 for (i = xs; i < xs + xm; i++) { 197 j[0] = i - 1; 198 j[1] = i; 199 j[2] = i + 1; 200 A[0] = A[2] = d; 201 A[1] = -2.0 * d + 2.0 * xx[i]; 202 PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES)); 203 } 204 205 /* 206 Assemble matrix, using the 2-step process: 207 MatAssemblyBegin(), MatAssemblyEnd(). 208 By placing code between these two statements, computations can be 209 done while messages are in transition. 210 211 Also, restore vector. 212 */ 213 214 PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY)); 215 PetscCall(DMDAVecRestoreArrayRead(da, x, &xx)); 216 PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY)); 217 PetscFunctionReturn(PETSC_SUCCESS); 218 } 219 220 int main(int argc, char **argv) 221 { 222 SNES snes; /* SNES context */ 223 Mat J; /* Jacobian matrix */ 224 ApplicationCtx ctx; /* user-defined context */ 225 Vec x, r, U, F; /* vectors */ 226 PetscScalar none = -1.0; 227 PetscInt its, N = 5, maxit, maxf; 228 PetscReal abstol, rtol, stol, norm; 229 PetscBool viewinitial = PETSC_FALSE; 230 231 PetscFunctionBeginUser; 232 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 233 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL)); 234 ctx.h = 1.0 / (N - 1); 235 PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL)); 236 237 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 238 Create nonlinear solver context 239 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 240 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 241 242 /* 243 Create distributed array (DMDA) to manage parallel grid and vectors 244 */ 245 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da)); 246 PetscCall(DMSetFromOptions(ctx.da)); 247 PetscCall(DMSetUp(ctx.da)); 248 249 /* 250 Extract global and local vectors from DMDA; then duplicate for remaining 251 vectors that are the same types 252 */ 253 PetscCall(DMCreateGlobalVector(ctx.da, &x)); 254 PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution")); 255 PetscCall(VecDuplicate(x, &r)); 256 PetscCall(VecDuplicate(x, &F)); 257 ctx.F = F; 258 PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function")); 259 PetscCall(VecDuplicate(x, &U)); 260 PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution")); 261 262 /* 263 Set function evaluation routine and vector. Whenever the nonlinear 264 solver needs to compute the nonlinear function, it will call this 265 routine. 266 - Note that the final routine argument is the user-defined 267 context that provides application-specific data for the 268 function evaluation routine. 269 270 At the beginning, one can use a stub function that checks the Kokkos version 271 against the CPU version to quickly expose errors. 272 PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx)); 273 */ 274 PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx)); 275 276 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 277 Create matrix data structure; set Jacobian evaluation routine 278 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 279 PetscCall(DMCreateMatrix(ctx.da, &J)); 280 281 /* 282 Set Jacobian matrix data structure and default Jacobian evaluation 283 routine. Whenever the nonlinear solver needs to compute the 284 Jacobian matrix, it will call this routine. 285 - Note that the final routine argument is the user-defined 286 context that provides application-specific data for the 287 Jacobian evaluation routine. 288 */ 289 PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx)); 290 PetscCall(SNESSetFromOptions(snes)); 291 PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf)); 292 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf)); 293 294 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 295 Initialize application: 296 Store forcing function of PDE and exact solution 297 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 298 { 299 PetscScalarKokkosOffsetView FF, UU; 300 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF)); 301 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU)); 302 Kokkos::parallel_for( 303 Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) { 304 PetscReal xp = i * ctx.h; 305 FF(i) = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */ 306 UU(i) = xp * xp * xp; 307 }); 308 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF)); 309 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU)); 310 } 311 312 if (viewinitial) { 313 PetscCall(VecView(U, NULL)); 314 PetscCall(VecView(F, NULL)); 315 } 316 317 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 318 Evaluate initial guess; then solve nonlinear system 319 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 320 321 /* 322 Note: The user should initialize the vector, x, with the initial guess 323 for the nonlinear solver prior to calling SNESSolve(). In particular, 324 to employ an initial guess of zero, the user should explicitly set 325 this vector to zero by calling VecSet(). 326 */ 327 PetscCall(FormInitialGuess(x)); 328 PetscCall(SNESSolve(snes, NULL, x)); 329 PetscCall(SNESGetIterationNumber(snes, &its)); 330 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its)); 331 332 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 333 Check solution and clean up 334 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 335 /* 336 Check the error 337 */ 338 PetscCall(VecAXPY(x, none, U)); 339 PetscCall(VecNorm(x, NORM_2, &norm)); 340 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its)); 341 342 /* 343 Free work space. All PETSc objects should be destroyed when they 344 are no longer needed. 345 */ 346 PetscCall(VecDestroy(&x)); 347 PetscCall(VecDestroy(&r)); 348 PetscCall(VecDestroy(&U)); 349 PetscCall(VecDestroy(&F)); 350 PetscCall(MatDestroy(&J)); 351 PetscCall(SNESDestroy(&snes)); 352 PetscCall(DMDestroy(&ctx.da)); 353 PetscCall(PetscFinalize()); 354 return 0; 355 } 356 357 /*TEST 358 359 build: 360 requires: kokkos_kernels 361 362 test: 363 requires: kokkos_kernels !complex !single 364 nsize: 2 365 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 366 output_file: output/ex3k_1.out 367 368 TEST*/ 369