1 2 3 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 4 5 /* 6 This is a simplied version of ex3.c except it uses Kokkos to set the initial conditions 7 */ 8 9 /* 10 Include "petscdm.h" so that we can use data management objects (DMs) 11 Include "petscdmda.h" so that we can use distributed arrays (DMDAs). 12 Include "petscsnes.h" so that we can use SNES solvers. Note that this 13 */ 14 15 #include <petscdm.h> 16 #include <petscdmda.h> 17 #include <petscsnes.h> 18 19 /* 20 Include Kokkos files 21 22 */ 23 #include <Kokkos_Core.hpp> 24 #include <Kokkos_OffsetView.hpp> 25 26 /* 27 Application-defined routines. 28 */ 29 PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*); 30 PetscErrorCode FormFunction(SNES,Vec,Vec,void*); 31 PetscErrorCode FormInitialGuess(Vec); 32 33 /* 34 User-defined application context 35 */ 36 typedef struct { 37 DM da; /* distributed array */ 38 Vec F; /* right-hand-side of PDE */ 39 PetscReal h; /* mesh spacing */ 40 } ApplicationCtx; 41 42 int main(int argc,char **argv) 43 { 44 SNES snes; /* SNES context */ 45 Mat J; /* Jacobian matrix */ 46 ApplicationCtx ctx; /* user-defined context */ 47 Vec x,r,U,F; /* vectors */ 48 PetscScalar *FF,*UU,none = -1.0; 49 PetscErrorCode ierr; 50 PetscInt its,N = 5,maxit,maxf,xs,xm; 51 PetscReal abstol,rtol,stol,norm; 52 PetscBool viewinitial = PETSC_FALSE; 53 PetscBool view_kokkos_configuration = PETSC_TRUE; 54 55 56 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 57 ierr = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr); 58 ierr = PetscOptionsGetBool(NULL,NULL,"-view_kokkos_configuration",&view_kokkos_configuration,NULL);CHKERRQ(ierr); 59 ctx.h = 1.0/(N-1); 60 ierr = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr); 61 62 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 63 Create nonlinear solver context 64 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 65 66 ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); 67 68 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 69 Create vector data structures; set function evaluation routine 70 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 71 72 /* 73 Create distributed array (DMDA) to manage parallel grid and vectors 74 */ 75 ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr); 76 ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr); 77 ierr = DMSetUp(ctx.da);CHKERRQ(ierr); 78 79 /* 80 Extract global and local vectors from DMDA; then duplicate for remaining 81 vectors that are the same types 82 */ 83 ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr); 84 ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr); 85 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 86 ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F; 87 ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr); 88 ierr = VecDuplicate(x,&U);CHKERRQ(ierr); 89 ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr); 90 91 /* 92 Set function evaluation routine and vector. Whenever the nonlinear 93 solver needs to compute the nonlinear function, it will call this 94 routine. 95 - Note that the final routine argument is the user-defined 96 context that provides application-specific data for the 97 function evaluation routine. 98 */ 99 ierr = SNESSetFunction(snes,r,FormFunction,&ctx);CHKERRQ(ierr); 100 101 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 102 Create matrix data structure; set Jacobian evaluation routine 103 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 104 105 ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr); 106 107 /* 108 Set Jacobian matrix data structure and default Jacobian evaluation 109 routine. Whenever the nonlinear solver needs to compute the 110 Jacobian matrix, it will call this routine. 111 - Note that the final routine argument is the user-defined 112 context that provides application-specific data for the 113 Jacobian evaluation routine. 114 */ 115 ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr); 116 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 117 ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr); 118 ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr); 119 120 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 121 Initialize application: 122 Store forcing function of PDE and exact solution 123 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 124 125 /* 126 Get local grid boundaries (for 1-dimensional DMDA): 127 xs, xm - starting grid index, width of local grid (no ghost points) 128 */ 129 ierr = DMDAGetCorners(ctx.da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 130 131 /* 132 Get pointers to vector data 133 */ 134 ierr = VecGetArrayWrite(F,&FF);CHKERRQ(ierr); 135 ierr = VecGetArrayWrite(U,&UU);CHKERRQ(ierr); 136 137 /* -------------------------------------------------------------------------------------------------- */ 138 /* ./configure --download-kokkos --download-hwloc [one of --with-openmp --with-pthread --with-cuda] */ 139 /* export OMP_PROC_BIND="threads" export OMP_PROC_BIND="spread" */ 140 141 if (!getenv("KOKKOS_NUM_THREADS")) setenv("KOKKOS_NUM_THREADS","4",1); 142 ierr = PetscKokkosInitializeCheck();CHKERRQ(ierr); /* Init Kokkos if not yet */ 143 144 if (view_kokkos_configuration) { 145 Kokkos::print_configuration(std::cout, true); 146 } 147 148 /* introduce a view object; reference like object */ 149 Kokkos::Experimental::OffsetView<PetscScalar*> xFF(Kokkos::View<PetscScalar*>(FF,xm),{xs}), xUU(Kokkos::View<PetscScalar*>(UU,xm),{xs}); 150 151 PetscReal xpbase = xs*ctx.h; 152 Kokkos:: parallel_for (Kokkos::RangePolicy<> (xs,xm), KOKKOS_LAMBDA ( int j) { 153 PetscReal xp = xpbase + j*ctx.h; 154 xFF(j) = 6.0*xp + PetscPowScalar(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 155 xUU(j) = xp*xp*xp; 156 }); 157 158 /* 159 Restore vectors 160 */ 161 ierr = VecRestoreArrayWrite(F,&FF);CHKERRQ(ierr); 162 ierr = VecRestoreArrayWrite(U,&UU);CHKERRQ(ierr); 163 if (viewinitial) { 164 ierr = VecView(U,NULL);CHKERRQ(ierr); 165 ierr = VecView(F,NULL);CHKERRQ(ierr); 166 } 167 168 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 169 Evaluate initial guess; then solve nonlinear system 170 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 171 172 /* 173 Note: The user should initialize the vector, x, with the initial guess 174 for the nonlinear solver prior to calling SNESSolve(). In particular, 175 to employ an initial guess of zero, the user should explicitly set 176 this vector to zero by calling VecSet(). 177 */ 178 ierr = FormInitialGuess(x);CHKERRQ(ierr); 179 ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 180 ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); 181 ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr); 182 183 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 184 Check solution and clean up 185 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 186 187 /* 188 Check the error 189 */ 190 ierr = VecAXPY(x,none,U);CHKERRQ(ierr); 191 ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); 192 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr); 193 194 /* 195 Free work space. All PETSc objects should be destroyed when they 196 are no longer needed. 197 */ 198 ierr = VecDestroy(&x);CHKERRQ(ierr); 199 ierr = VecDestroy(&r);CHKERRQ(ierr); 200 ierr = VecDestroy(&U);CHKERRQ(ierr); 201 ierr = VecDestroy(&F);CHKERRQ(ierr); 202 ierr = MatDestroy(&J);CHKERRQ(ierr); 203 ierr = SNESDestroy(&snes);CHKERRQ(ierr); 204 ierr = DMDestroy(&ctx.da);CHKERRQ(ierr); 205 ierr = PetscFinalize(); 206 return ierr; 207 } 208 209 /* ------------------------------------------------------------------- */ 210 /* 211 FormInitialGuess - Computes initial guess. 212 213 Input/Output Parameter: 214 . x - the solution vector 215 */ 216 PetscErrorCode FormInitialGuess(Vec x) 217 { 218 PetscErrorCode ierr; 219 PetscScalar pfive = .50; 220 221 PetscFunctionBeginUser; 222 ierr = VecSet(x,pfive);CHKERRQ(ierr); 223 PetscFunctionReturn(0); 224 } 225 226 /* ------------------------------------------------------------------- */ 227 /* 228 FormFunction - Evaluates nonlinear function, F(x). 229 230 Input Parameters: 231 . snes - the SNES context 232 . x - input vector 233 . ctx - optional user-defined context, as set by SNESSetFunction() 234 235 Output Parameter: 236 . f - function vector 237 238 Note: 239 The user-defined context can contain any application-specific 240 data needed for the function evaluation. 241 */ 242 PetscErrorCode FormFunction(SNES snes,Vec x,Vec f,void *ctx) 243 { 244 ApplicationCtx *user = (ApplicationCtx*) ctx; 245 DM da = user->da; 246 PetscScalar *xx,*ff,*FF,d; 247 PetscErrorCode ierr; 248 PetscInt i,M,xs,xm; 249 Vec xlocal; 250 251 PetscFunctionBeginUser; 252 ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr); 253 /* 254 Scatter ghost points to local vector, using the 2-step process 255 DMGlobalToLocalBegin(), DMGlobalToLocalEnd(). 256 By placing code between these two statements, computations can 257 be done while messages are in transition. 258 */ 259 ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 260 ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 261 262 /* 263 Get pointers to vector data. 264 - The vector xlocal includes ghost point; the vectors x and f do 265 NOT include ghost points. 266 - Using DMDAVecGetArray() allows accessing the values using global ordering 267 */ 268 ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr); 269 ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr); 270 ierr = DMDAVecGetArray(da,user->F,&FF);CHKERRQ(ierr); 271 272 /* 273 Get local grid boundaries (for 1-dimensional DMDA): 274 xs, xm - starting grid index, width of local grid (no ghost points) 275 */ 276 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 277 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 278 279 /* 280 Set function values for boundary points; define local interior grid point range: 281 xsi - starting interior grid index 282 xei - ending interior grid index 283 */ 284 if (xs == 0) { /* left boundary */ 285 ff[0] = xx[0]; 286 xs++;xm--; 287 } 288 if (xs+xm == M) { /* right boundary */ 289 ff[xs+xm-1] = xx[xs+xm-1] - 1.0; 290 xm--; 291 } 292 293 /* 294 Compute function over locally owned part of the grid (interior points only) 295 */ 296 d = 1.0/(user->h*user->h); 297 for (i=xs; i<xs+xm; i++) ff[i] = d*(xx[i-1] - 2.0*xx[i] + xx[i+1]) + xx[i]*xx[i] - FF[i]; 298 299 /* 300 Restore vectors 301 */ 302 ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr); 303 ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr); 304 ierr = DMDAVecRestoreArray(da,user->F,&FF);CHKERRQ(ierr); 305 ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 /* ------------------------------------------------------------------- */ 310 /* 311 FormJacobian - Evaluates Jacobian matrix. 312 313 Input Parameters: 314 . snes - the SNES context 315 . x - input vector 316 . dummy - optional user-defined context (not used here) 317 318 Output Parameters: 319 . jac - Jacobian matrix 320 . B - optionally different preconditioning matrix 321 . flag - flag indicating matrix structure 322 */ 323 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 324 { 325 ApplicationCtx *user = (ApplicationCtx*) ctx; 326 PetscScalar *xx,d,A[3]; 327 PetscErrorCode ierr; 328 PetscInt i,j[3],M,xs,xm; 329 DM da = user->da; 330 331 PetscFunctionBeginUser; 332 /* 333 Get pointer to vector data 334 */ 335 ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr); 336 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 337 338 /* 339 Get range of locally owned matrix 340 */ 341 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 342 343 /* 344 Determine starting and ending local indices for interior grid points. 345 Set Jacobian entries for boundary points. 346 */ 347 348 if (xs == 0) { /* left boundary */ 349 i = 0; A[0] = 1.0; 350 351 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 352 xs++;xm--; 353 } 354 if (xs+xm == M) { /* right boundary */ 355 i = M-1; 356 A[0] = 1.0; 357 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 358 xm--; 359 } 360 361 /* 362 Interior grid points 363 - Note that in this case we set all elements for a particular 364 row at once. 365 */ 366 d = 1.0/(user->h*user->h); 367 for (i=xs; i<xs+xm; i++) { 368 j[0] = i - 1; j[1] = i; j[2] = i + 1; 369 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 370 ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr); 371 } 372 373 /* 374 Assemble matrix, using the 2-step process: 375 MatAssemblyBegin(), MatAssemblyEnd(). 376 By placing code between these two statements, computations can be 377 done while messages are in transition. 378 379 Also, restore vector. 380 */ 381 382 ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 383 ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr); 384 ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 385 386 PetscFunctionReturn(0); 387 } 388 389 /* 390 The first test works for Kokkos wtih OpenMP and PThreads, the second with CUDA. 391 392 The second test requires -dm_vec_type cuda and -vec_pinned_memory_min 0 because Kokkos 393 assumes all memory it is given is allocated with cudaMallocHost() and can be used with 394 cudaMemcpy() without indicating it the memory is GPU or CPU. Note this is NOT unified memory. 395 396 The third test requires -dm_mat_type aijcusparse to create the correct vectors for block Jacobi 397 */ 398 399 /*TEST 400 401 build: 402 requires: kokkos 403 404 test: 405 requires: kokkos double !complex !single !cuda 406 args: -view_initial -view_kokkos_configuration false -snes_monitor 407 filter: grep -v type: 408 409 test: 410 suffix: 2 411 requires: kokkos double !complex !single cuda 412 args: -dm_vec_type cuda -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false -snes_monitor 413 output_file: output/ex3k_1.out 414 filter: grep -v type: 415 416 test: 417 suffix: 3 418 TODO: broken 419 requires: kokkos double !complex !single cuda 420 nsize: 2 421 args: -dm_vec_type cuda -dm_mat_type aijcusparse -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false -snes_monitor 422 output_file: output/ex3k_1.out 423 filter: grep -v type: 424 425 TEST*/ 426