1 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 2 3 #include <petscdmda_kokkos.hpp> 4 #include <petscsnes.h> 5 6 /* 7 User-defined application context 8 */ 9 typedef struct { 10 DM da; /* distributed array */ 11 Vec F; /* right-hand-side of PDE */ 12 PetscReal h; /* mesh spacing */ 13 } ApplicationCtx; 14 15 /* ------------------------------------------------------------------- */ 16 /* 17 FormInitialGuess - Computes initial guess. 18 19 Input/Output Parameter: 20 . x - the solution vector 21 */ 22 PetscErrorCode FormInitialGuess(Vec x) { 23 PetscScalar pfive = .50; 24 25 PetscFunctionBeginUser; 26 PetscCall(VecSet(x, pfive)); 27 PetscFunctionReturn(0); 28 } 29 30 /* ------------------------------------------------------------------- */ 31 /* 32 CpuFunction - Evaluates nonlinear function, F(x) on CPU 33 34 Input Parameters: 35 . snes - the SNES context 36 . x - input vector 37 . ctx - optional user-defined context, as set by SNESSetFunction() 38 39 Output Parameter: 40 . r - function vector 41 42 Note: 43 The user-defined context can contain any application-specific 44 data needed for the function evaluation. 45 */ 46 PetscErrorCode CpuFunction(SNES snes, Vec x, Vec r, void *ctx) { 47 ApplicationCtx *user = (ApplicationCtx *)ctx; 48 DM da = user->da; 49 PetscScalar *X, *R, *F, d; 50 PetscInt i, M, xs, xm; 51 Vec xl; 52 53 PetscFunctionBeginUser; 54 PetscCall(DMGetLocalVector(da, &xl)); 55 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl)); 56 PetscCall(DMDAVecGetArray(da, xl, &X)); 57 PetscCall(DMDAVecGetArray(da, r, &R)); 58 PetscCall(DMDAVecGetArray(da, user->F, &F)); 59 60 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 61 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 62 63 if (xs == 0) { /* left boundary */ 64 R[0] = X[0]; 65 xs++; 66 xm--; 67 } 68 if (xs + xm == M) { /* right boundary */ 69 R[xs + xm - 1] = X[xs + xm - 1] - 1.0; 70 xm--; 71 } 72 d = 1.0 / (user->h * user->h); 73 for (i = xs; i < xs + xm; i++) R[i] = d * (X[i - 1] - 2.0 * X[i] + X[i + 1]) + X[i] * X[i] - F[i]; 74 75 PetscCall(DMDAVecRestoreArray(da, xl, &X)); 76 PetscCall(DMDAVecRestoreArray(da, r, &R)); 77 PetscCall(DMDAVecRestoreArray(da, user->F, &F)); 78 PetscCall(DMRestoreLocalVector(da, &xl)); 79 PetscFunctionReturn(0); 80 } 81 82 using DefaultExecutionSpace = Kokkos::DefaultExecutionSpace; 83 using DefaultMemorySpace = Kokkos::DefaultExecutionSpace::memory_space; 84 using PetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<PetscScalar *, DefaultMemorySpace>; 85 using ConstPetscScalarKokkosOffsetView = Kokkos::Experimental::OffsetView<const PetscScalar *, DefaultMemorySpace>; 86 87 PetscErrorCode KokkosFunction(SNES snes, Vec x, Vec r, void *ctx) { 88 ApplicationCtx *user = (ApplicationCtx *)ctx; 89 DM da = user->da; 90 PetscScalar d; 91 PetscInt M; 92 Vec xl; 93 PetscScalarKokkosOffsetView R; 94 ConstPetscScalarKokkosOffsetView X, F; 95 96 PetscFunctionBeginUser; 97 PetscCall(DMGetLocalVector(da, &xl)); 98 PetscCall(DMGlobalToLocal(da, x, INSERT_VALUES, xl)); 99 d = 1.0 / (user->h * user->h); 100 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 101 PetscCall(DMDAVecGetKokkosOffsetView(da, xl, &X)); /* read only */ 102 PetscCall(DMDAVecGetKokkosOffsetViewWrite(da, r, &R)); /* write only */ 103 PetscCall(DMDAVecGetKokkosOffsetView(da, user->F, &F)); /* read only */ 104 Kokkos::parallel_for( 105 Kokkos::RangePolicy<>(R.begin(0), R.end(0)), KOKKOS_LAMBDA(int i) { 106 if (i == 0) R(0) = X(0); /* left boundary */ 107 else if (i == M - 1) R(i) = X(i) - 1.0; /* right boundary */ 108 else R(i) = d * (X(i - 1) - 2.0 * X(i) + X(i + 1)) + X(i) * X(i) - F(i); /* interior */ 109 }); 110 PetscCall(DMDAVecRestoreKokkosOffsetView(da, xl, &X)); 111 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(da, r, &R)); 112 PetscCall(DMDAVecRestoreKokkosOffsetView(da, user->F, &F)); 113 PetscCall(DMRestoreLocalVector(da, &xl)); 114 PetscFunctionReturn(0); 115 } 116 117 PetscErrorCode StubFunction(SNES snes, Vec x, Vec r, void *ctx) { 118 ApplicationCtx *user = (ApplicationCtx *)ctx; 119 DM da = user->da; 120 Vec rk; 121 PetscReal norm = 0; 122 123 PetscFunctionBeginUser; 124 PetscCall(DMGetGlobalVector(da, &rk)); 125 PetscCall(CpuFunction(snes, x, r, ctx)); 126 PetscCall(KokkosFunction(snes, x, rk, ctx)); 127 PetscCall(VecAXPY(rk, -1.0, r)); 128 PetscCall(VecNorm(rk, NORM_2, &norm)); 129 PetscCall(DMRestoreGlobalVector(da, &rk)); 130 PetscCheck(norm <= 1e-6, PETSC_COMM_SELF, PETSC_ERR_PLIB, "KokkosFunction() different from CpuFunction() with a diff norm = %g", (double)norm); 131 PetscFunctionReturn(0); 132 } 133 /* ------------------------------------------------------------------- */ 134 /* 135 FormJacobian - Evaluates Jacobian matrix. 136 137 Input Parameters: 138 . snes - the SNES context 139 . x - input vector 140 . dummy - optional user-defined context (not used here) 141 142 Output Parameters: 143 . jac - Jacobian matrix 144 . B - optionally different preconditioning matrix 145 . flag - flag indicating matrix structure 146 */ 147 PetscErrorCode FormJacobian(SNES snes, Vec x, Mat jac, Mat B, void *ctx) { 148 ApplicationCtx *user = (ApplicationCtx *)ctx; 149 PetscScalar *xx, d, A[3]; 150 PetscInt i, j[3], M, xs, xm; 151 DM da = user->da; 152 153 PetscFunctionBeginUser; 154 /* 155 Get pointer to vector data 156 */ 157 PetscCall(DMDAVecGetArrayRead(da, x, &xx)); 158 PetscCall(DMDAGetCorners(da, &xs, NULL, NULL, &xm, NULL, NULL)); 159 160 /* 161 Get range of locally owned matrix 162 */ 163 PetscCall(DMDAGetInfo(da, NULL, &M, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)); 164 165 /* 166 Determine starting and ending local indices for interior grid points. 167 Set Jacobian entries for boundary points. 168 */ 169 170 if (xs == 0) { /* left boundary */ 171 i = 0; 172 A[0] = 1.0; 173 174 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES)); 175 xs++; 176 xm--; 177 } 178 if (xs + xm == M) { /* right boundary */ 179 i = M - 1; 180 A[0] = 1.0; 181 PetscCall(MatSetValues(jac, 1, &i, 1, &i, A, INSERT_VALUES)); 182 xm--; 183 } 184 185 /* 186 Interior grid points 187 - Note that in this case we set all elements for a particular 188 row at once. 189 */ 190 d = 1.0 / (user->h * user->h); 191 for (i = xs; i < xs + xm; i++) { 192 j[0] = i - 1; 193 j[1] = i; 194 j[2] = i + 1; 195 A[0] = A[2] = d; 196 A[1] = -2.0 * d + 2.0 * xx[i]; 197 PetscCall(MatSetValues(jac, 1, &i, 3, j, A, INSERT_VALUES)); 198 } 199 200 /* 201 Assemble matrix, using the 2-step process: 202 MatAssemblyBegin(), MatAssemblyEnd(). 203 By placing code between these two statements, computations can be 204 done while messages are in transition. 205 206 Also, restore vector. 207 */ 208 209 PetscCall(MatAssemblyBegin(jac, MAT_FINAL_ASSEMBLY)); 210 PetscCall(DMDAVecRestoreArrayRead(da, x, &xx)); 211 PetscCall(MatAssemblyEnd(jac, MAT_FINAL_ASSEMBLY)); 212 213 PetscFunctionReturn(0); 214 } 215 216 int main(int argc, char **argv) { 217 SNES snes; /* SNES context */ 218 Mat J; /* Jacobian matrix */ 219 ApplicationCtx ctx; /* user-defined context */ 220 Vec x, r, U, F; /* vectors */ 221 PetscScalar none = -1.0; 222 PetscInt its, N = 5, maxit, maxf; 223 PetscReal abstol, rtol, stol, norm; 224 PetscBool viewinitial = PETSC_FALSE; 225 226 PetscFunctionBeginUser; 227 PetscCall(PetscInitialize(&argc, &argv, (char *)0, help)); 228 PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL)); 229 ctx.h = 1.0 / (N - 1); 230 PetscCall(PetscOptionsGetBool(NULL, NULL, "-view_initial", &viewinitial, NULL)); 231 232 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 233 Create nonlinear solver context 234 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 235 PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 236 237 /* 238 Create distributed array (DMDA) to manage parallel grid and vectors 239 */ 240 PetscCall(DMDACreate1d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, N, 1, 1, NULL, &ctx.da)); 241 PetscCall(DMSetFromOptions(ctx.da)); 242 PetscCall(DMSetUp(ctx.da)); 243 244 /* 245 Extract global and local vectors from DMDA; then duplicate for remaining 246 vectors that are the same types 247 */ 248 PetscCall(DMCreateGlobalVector(ctx.da, &x)); 249 PetscCall(PetscObjectSetName((PetscObject)x, "Approximate Solution")); 250 PetscCall(VecDuplicate(x, &r)); 251 PetscCall(VecDuplicate(x, &F)); 252 ctx.F = F; 253 PetscCall(PetscObjectSetName((PetscObject)F, "Forcing function")); 254 PetscCall(VecDuplicate(x, &U)); 255 PetscCall(PetscObjectSetName((PetscObject)U, "Exact Solution")); 256 257 /* 258 Set function evaluation routine and vector. Whenever the nonlinear 259 solver needs to compute the nonlinear function, it will call this 260 routine. 261 - Note that the final routine argument is the user-defined 262 context that provides application-specific data for the 263 function evaluation routine. 264 265 At the beginning, one can use a stub function that checks the Kokkos version 266 against the CPU version to quickly expose errors. 267 PetscCall(SNESSetFunction(snes,r,StubFunction,&ctx)); 268 */ 269 PetscCall(SNESSetFunction(snes, r, KokkosFunction, &ctx)); 270 271 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 272 Create matrix data structure; set Jacobian evaluation routine 273 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 274 PetscCall(DMCreateMatrix(ctx.da, &J)); 275 276 /* 277 Set Jacobian matrix data structure and default Jacobian evaluation 278 routine. Whenever the nonlinear solver needs to compute the 279 Jacobian matrix, it will call this routine. 280 - Note that the final routine argument is the user-defined 281 context that provides application-specific data for the 282 Jacobian evaluation routine. 283 */ 284 PetscCall(SNESSetJacobian(snes, J, J, FormJacobian, &ctx)); 285 PetscCall(SNESSetFromOptions(snes)); 286 PetscCall(SNESGetTolerances(snes, &abstol, &rtol, &stol, &maxit, &maxf)); 287 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "atol=%g, rtol=%g, stol=%g, maxit=%" PetscInt_FMT ", maxf=%" PetscInt_FMT "\n", (double)abstol, (double)rtol, (double)stol, maxit, maxf)); 288 289 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 290 Initialize application: 291 Store forcing function of PDE and exact solution 292 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 293 { 294 PetscScalarKokkosOffsetView FF, UU; 295 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, F, &FF)); 296 PetscCall(DMDAVecGetKokkosOffsetViewWrite(ctx.da, U, &UU)); 297 Kokkos::parallel_for( 298 Kokkos::RangePolicy<>(FF.begin(0), FF.end(0)), KOKKOS_LAMBDA(int i) { 299 PetscReal xp = i * ctx.h; 300 FF(i) = 6.0 * xp + pow(xp + 1.e-12, 6.0); /* +1.e-12 is to prevent 0^6 */ 301 UU(i) = xp * xp * xp; 302 }); 303 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, F, &FF)); 304 PetscCall(DMDAVecRestoreKokkosOffsetViewWrite(ctx.da, U, &UU)); 305 } 306 307 if (viewinitial) { 308 PetscCall(VecView(U, NULL)); 309 PetscCall(VecView(F, NULL)); 310 } 311 312 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 313 Evaluate initial guess; then solve nonlinear system 314 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 315 316 /* 317 Note: The user should initialize the vector, x, with the initial guess 318 for the nonlinear solver prior to calling SNESSolve(). In particular, 319 to employ an initial guess of zero, the user should explicitly set 320 this vector to zero by calling VecSet(). 321 */ 322 PetscCall(FormInitialGuess(x)); 323 PetscCall(SNESSolve(snes, NULL, x)); 324 PetscCall(SNESGetIterationNumber(snes, &its)); 325 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number of SNES iterations = %" PetscInt_FMT "\n", its)); 326 327 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 328 Check solution and clean up 329 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 330 /* 331 Check the error 332 */ 333 PetscCall(VecAXPY(x, none, U)); 334 PetscCall(VecNorm(x, NORM_2, &norm)); 335 PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Norm of error %g Iterations %" PetscInt_FMT "\n", (double)norm, its)); 336 337 /* 338 Free work space. All PETSc objects should be destroyed when they 339 are no longer needed. 340 */ 341 PetscCall(VecDestroy(&x)); 342 PetscCall(VecDestroy(&r)); 343 PetscCall(VecDestroy(&U)); 344 PetscCall(VecDestroy(&F)); 345 PetscCall(MatDestroy(&J)); 346 PetscCall(SNESDestroy(&snes)); 347 PetscCall(DMDestroy(&ctx.da)); 348 PetscCall(PetscFinalize()); 349 return 0; 350 } 351 352 /*TEST 353 354 build: 355 requires: kokkos_kernels 356 357 test: 358 requires: kokkos_kernels !complex !single 359 nsize: 2 360 args: -dm_vec_type kokkos -dm_mat_type aijkokkos -view_initial -snes_monitor 361 output_file: output/ex3k_1.out 362 363 TEST*/ 364