1 2 3 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 4 5 /* 6 This is a simplied version of ex3.c except it uses Kokkos to set the initial conditions 7 */ 8 9 /* 10 Include "petscdm.h" so that we can use data management objects (DMs) 11 Include "petscdmda.h" so that we can use distributed arrays (DMDAs). 12 Include "petscsnes.h" so that we can use SNES solvers. Note that this 13 */ 14 15 #include <petscdm.h> 16 #include <petscdmda.h> 17 #include <petscsnes.h> 18 19 /* 20 Include Kokkos files 21 22 */ 23 #include <Kokkos_Core.hpp> 24 #include <Kokkos_OffsetView.hpp> 25 26 /* 27 Application-defined routines. 28 */ 29 PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*); 30 PetscErrorCode FormFunction(SNES,Vec,Vec,void*); 31 PetscErrorCode FormInitialGuess(Vec); 32 33 /* 34 User-defined application context 35 */ 36 typedef struct { 37 DM da; /* distributed array */ 38 Vec F; /* right-hand-side of PDE */ 39 PetscReal h; /* mesh spacing */ 40 } ApplicationCtx; 41 42 int main(int argc,char **argv) 43 { 44 SNES snes; /* SNES context */ 45 Mat J; /* Jacobian matrix */ 46 ApplicationCtx ctx; /* user-defined context */ 47 Vec x,r,U,F; /* vectors */ 48 PetscScalar *FF,*UU,none = -1.0; 49 PetscErrorCode ierr; 50 PetscInt its,N = 5,maxit,maxf,xs,xm; 51 PetscReal abstol,rtol,stol,norm; 52 PetscBool viewinitial = PETSC_FALSE; 53 PetscBool view_kokkos_configuration = PETSC_TRUE; 54 55 56 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 57 ierr = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr); 58 ierr = PetscOptionsGetBool(NULL,NULL,"-view_kokkos_configuration",&view_kokkos_configuration,NULL);CHKERRQ(ierr); 59 ctx.h = 1.0/(N-1); 60 ierr = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr); 61 62 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 63 Create nonlinear solver context 64 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 65 66 ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); 67 68 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 69 Create vector data structures; set function evaluation routine 70 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 71 72 /* 73 Create distributed array (DMDA) to manage parallel grid and vectors 74 */ 75 ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr); 76 ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr); 77 ierr = DMSetUp(ctx.da);CHKERRQ(ierr); 78 79 /* 80 Extract global and local vectors from DMDA; then duplicate for remaining 81 vectors that are the same types 82 */ 83 ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr); 84 ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr); 85 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 86 ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F; 87 ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr); 88 ierr = VecDuplicate(x,&U);CHKERRQ(ierr); 89 ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr); 90 91 /* 92 Set function evaluation routine and vector. Whenever the nonlinear 93 solver needs to compute the nonlinear function, it will call this 94 routine. 95 - Note that the final routine argument is the user-defined 96 context that provides application-specific data for the 97 function evaluation routine. 98 */ 99 ierr = SNESSetFunction(snes,r,FormFunction,&ctx);CHKERRQ(ierr); 100 101 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 102 Create matrix data structure; set Jacobian evaluation routine 103 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 104 105 ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr); 106 107 /* 108 Set Jacobian matrix data structure and default Jacobian evaluation 109 routine. Whenever the nonlinear solver needs to compute the 110 Jacobian matrix, it will call this routine. 111 - Note that the final routine argument is the user-defined 112 context that provides application-specific data for the 113 Jacobian evaluation routine. 114 */ 115 ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr); 116 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 117 ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr); 118 ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr); 119 120 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 121 Initialize application: 122 Store forcing function of PDE and exact solution 123 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 124 125 /* 126 Get local grid boundaries (for 1-dimensional DMDA): 127 xs, xm - starting grid index, width of local grid (no ghost points) 128 */ 129 ierr = DMDAGetCorners(ctx.da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 130 131 /* 132 Get pointers to vector data 133 */ 134 ierr = VecGetArrayWrite(F,&FF);CHKERRQ(ierr); 135 ierr = VecGetArrayWrite(U,&UU);CHKERRQ(ierr); 136 137 /* -------------------------------------------------------------------------------------------------- */ 138 /* ./configure --download-kokkos --download-hwloc [one of --with-openmp --with-pthread --with-cuda] */ 139 /* export OMP_PROC_BIND="threads" export OMP_PROC_BIND="spread" */ 140 141 if (!getenv("KOKKOS_NUM_THREADS")) setenv("KOKKOS_NUM_THREADS","4",1); 142 if (!view_kokkos_configuration) { 143 /* use private routine to turn off warnings about negative number of cores etc */ 144 Kokkos::Impl::pre_initialize(Kokkos::InitArguments(-1, -1, -1, true)); 145 } 146 Kokkos::initialize( argc, argv ); 147 if (view_kokkos_configuration) { 148 Kokkos::print_configuration(std::cout, true); 149 } 150 151 /* introduce a view object; reference like object */ 152 Kokkos::Experimental::OffsetView<PetscScalar*> xFF(Kokkos::View<PetscScalar*>(FF,xm),{xs}), xUU(Kokkos::View<PetscScalar*>(UU,xm),{xs}); 153 154 PetscReal xpbase = xs*ctx.h; 155 Kokkos:: parallel_for(Kokkos::RangePolicy<> (xs,xm), KOKKOS_LAMBDA ( int j ) { 156 PetscReal xp = xpbase + j*ctx.h; 157 xFF(j) = 6.0*xp + PetscPowScalar(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 158 xUU(j) = xp*xp*xp; 159 }); 160 161 Kokkos::finalize(); 162 163 /* 164 Restore vectors 165 */ 166 ierr = VecRestoreArrayWrite(F,&FF);CHKERRQ(ierr); 167 ierr = VecRestoreArrayWrite(U,&UU);CHKERRQ(ierr); 168 if (viewinitial) { 169 ierr = VecView(U,NULL);CHKERRQ(ierr); 170 ierr = VecView(F,NULL);CHKERRQ(ierr); 171 } 172 173 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 174 Evaluate initial guess; then solve nonlinear system 175 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 176 177 /* 178 Note: The user should initialize the vector, x, with the initial guess 179 for the nonlinear solver prior to calling SNESSolve(). In particular, 180 to employ an initial guess of zero, the user should explicitly set 181 this vector to zero by calling VecSet(). 182 */ 183 ierr = FormInitialGuess(x);CHKERRQ(ierr); 184 ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 185 ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); 186 ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr); 187 188 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 189 Check solution and clean up 190 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 191 192 /* 193 Check the error 194 */ 195 ierr = VecAXPY(x,none,U);CHKERRQ(ierr); 196 ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); 197 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr); 198 199 /* 200 Free work space. All PETSc objects should be destroyed when they 201 are no longer needed. 202 */ 203 ierr = VecDestroy(&x);CHKERRQ(ierr); 204 ierr = VecDestroy(&r);CHKERRQ(ierr); 205 ierr = VecDestroy(&U);CHKERRQ(ierr); 206 ierr = VecDestroy(&F);CHKERRQ(ierr); 207 ierr = MatDestroy(&J);CHKERRQ(ierr); 208 ierr = SNESDestroy(&snes);CHKERRQ(ierr); 209 ierr = DMDestroy(&ctx.da);CHKERRQ(ierr); 210 ierr = PetscFinalize(); 211 return ierr; 212 } 213 214 /* ------------------------------------------------------------------- */ 215 /* 216 FormInitialGuess - Computes initial guess. 217 218 Input/Output Parameter: 219 . x - the solution vector 220 */ 221 PetscErrorCode FormInitialGuess(Vec x) 222 { 223 PetscErrorCode ierr; 224 PetscScalar pfive = .50; 225 226 PetscFunctionBeginUser; 227 ierr = VecSet(x,pfive);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 /* ------------------------------------------------------------------- */ 232 /* 233 FormFunction - Evaluates nonlinear function, F(x). 234 235 Input Parameters: 236 . snes - the SNES context 237 . x - input vector 238 . ctx - optional user-defined context, as set by SNESSetFunction() 239 240 Output Parameter: 241 . f - function vector 242 243 Note: 244 The user-defined context can contain any application-specific 245 data needed for the function evaluation. 246 */ 247 PetscErrorCode FormFunction(SNES snes,Vec x,Vec f,void *ctx) 248 { 249 ApplicationCtx *user = (ApplicationCtx*) ctx; 250 DM da = user->da; 251 PetscScalar *xx,*ff,*FF,d; 252 PetscErrorCode ierr; 253 PetscInt i,M,xs,xm; 254 Vec xlocal; 255 256 PetscFunctionBeginUser; 257 ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr); 258 /* 259 Scatter ghost points to local vector, using the 2-step process 260 DMGlobalToLocalBegin(), DMGlobalToLocalEnd(). 261 By placing code between these two statements, computations can 262 be done while messages are in transition. 263 */ 264 ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 265 ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 266 267 /* 268 Get pointers to vector data. 269 - The vector xlocal includes ghost point; the vectors x and f do 270 NOT include ghost points. 271 - Using DMDAVecGetArray() allows accessing the values using global ordering 272 */ 273 ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr); 274 ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr); 275 ierr = DMDAVecGetArray(da,user->F,&FF);CHKERRQ(ierr); 276 277 /* 278 Get local grid boundaries (for 1-dimensional DMDA): 279 xs, xm - starting grid index, width of local grid (no ghost points) 280 */ 281 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 282 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 283 284 /* 285 Set function values for boundary points; define local interior grid point range: 286 xsi - starting interior grid index 287 xei - ending interior grid index 288 */ 289 if (xs == 0) { /* left boundary */ 290 ff[0] = xx[0]; 291 xs++;xm--; 292 } 293 if (xs+xm == M) { /* right boundary */ 294 ff[xs+xm-1] = xx[xs+xm-1] - 1.0; 295 xm--; 296 } 297 298 /* 299 Compute function over locally owned part of the grid (interior points only) 300 */ 301 d = 1.0/(user->h*user->h); 302 for (i=xs; i<xs+xm; i++) ff[i] = d*(xx[i-1] - 2.0*xx[i] + xx[i+1]) + xx[i]*xx[i] - FF[i]; 303 304 /* 305 Restore vectors 306 */ 307 ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr); 308 ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr); 309 ierr = DMDAVecRestoreArray(da,user->F,&FF);CHKERRQ(ierr); 310 ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 /* ------------------------------------------------------------------- */ 315 /* 316 FormJacobian - Evaluates Jacobian matrix. 317 318 Input Parameters: 319 . snes - the SNES context 320 . x - input vector 321 . dummy - optional user-defined context (not used here) 322 323 Output Parameters: 324 . jac - Jacobian matrix 325 . B - optionally different preconditioning matrix 326 . flag - flag indicating matrix structure 327 */ 328 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 329 { 330 ApplicationCtx *user = (ApplicationCtx*) ctx; 331 PetscScalar *xx,d,A[3]; 332 PetscErrorCode ierr; 333 PetscInt i,j[3],M,xs,xm; 334 DM da = user->da; 335 336 PetscFunctionBeginUser; 337 /* 338 Get pointer to vector data 339 */ 340 ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr); 341 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 342 343 /* 344 Get range of locally owned matrix 345 */ 346 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 347 348 /* 349 Determine starting and ending local indices for interior grid points. 350 Set Jacobian entries for boundary points. 351 */ 352 353 if (xs == 0) { /* left boundary */ 354 i = 0; A[0] = 1.0; 355 356 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 357 xs++;xm--; 358 } 359 if (xs+xm == M) { /* right boundary */ 360 i = M-1; 361 A[0] = 1.0; 362 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 363 xm--; 364 } 365 366 /* 367 Interior grid points 368 - Note that in this case we set all elements for a particular 369 row at once. 370 */ 371 d = 1.0/(user->h*user->h); 372 for (i=xs; i<xs+xm; i++) { 373 j[0] = i - 1; j[1] = i; j[2] = i + 1; 374 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 375 ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr); 376 } 377 378 /* 379 Assemble matrix, using the 2-step process: 380 MatAssemblyBegin(), MatAssemblyEnd(). 381 By placing code between these two statements, computations can be 382 done while messages are in transition. 383 384 Also, restore vector. 385 */ 386 387 ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 388 ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr); 389 ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 390 391 PetscFunctionReturn(0); 392 } 393 394 /* 395 The first test works for Kokkos wtih OpenMP and PThreads, the second with CUDA. 396 397 The second test requires -dm_vec_type cuda and -vec_pinned_memory_min 0 because Kokkos 398 assumes all memory it is given is allocated with cudaMallocHost() and can be used with 399 cudaMemcpy() without indicating it the memory is GPU or CPU. Note this is NOT unified memory. 400 401 The third test requires -dm_mat_type aijcusparse to create the correct vectors for block Jacobi 402 */ 403 404 /*TEST 405 406 build: 407 requires: kokkos 408 409 test: 410 requires: kokkos double !complex !single !cuda 411 args: -view_initial -view_kokkos_configuration false -snes_monitor 412 filter: grep -v type: 413 414 test: 415 suffix: 2 416 requires: kokkos double !complex !single cuda 417 args: -dm_vec_type cuda -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false -snes_monitor 418 output_file: output/ex3k_1.out 419 filter: grep -v type: 420 421 test: 422 suffix: 3 423 requires: kokkos double !complex !single defined(PETSC_HAVE_cusparseCreateSolveAnalysisInfo) cuda 424 nsize: 2 425 args: -dm_vec_type cuda -dm_mat_type aijcusparse -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false -snes_monitor 426 output_file: output/ex3k_1.out 427 filter: grep -v type: 428 429 TEST*/ 430