1 2 3 static char help[] = "Newton methods to solve u'' + u^{2} = f in parallel. Uses Kokkos\n\\n"; 4 5 /* 6 This is a simplied version of ex3.c except it uses Kokkos to set the initial conditions 7 */ 8 9 /* 10 Include "petscdm.h" so that we can use data management objects (DMs) 11 Include "petscdmda.h" so that we can use distributed arrays (DMDAs). 12 Include "petscsnes.h" so that we can use SNES solvers. Note that this 13 */ 14 15 #include <petscdm.h> 16 #include <petscdmda.h> 17 #include <petscsnes.h> 18 19 /* 20 Include Kokkos files 21 22 */ 23 #include <Kokkos_Core.hpp> 24 #include <Kokkos_OffsetView.hpp> 25 26 /* 27 Application-defined routines. 28 */ 29 PetscErrorCode FormJacobian(SNES,Vec,Mat,Mat,void*); 30 PetscErrorCode FormFunction(SNES,Vec,Vec,void*); 31 PetscErrorCode FormInitialGuess(Vec); 32 33 /* 34 User-defined application context 35 */ 36 typedef struct { 37 DM da; /* distributed array */ 38 Vec F; /* right-hand-side of PDE */ 39 PetscReal h; /* mesh spacing */ 40 } ApplicationCtx; 41 42 int main(int argc,char **argv) 43 { 44 SNES snes; /* SNES context */ 45 Mat J; /* Jacobian matrix */ 46 ApplicationCtx ctx; /* user-defined context */ 47 Vec x,r,U,F; /* vectors */ 48 PetscScalar *FF,*UU,none = -1.0; 49 PetscErrorCode ierr; 50 PetscInt its,N = 5,maxit,maxf,xs,xm; 51 PetscReal abstol,rtol,stol,norm; 52 PetscBool viewinitial = PETSC_FALSE; 53 PetscBool view_kokkos_configuration = PETSC_TRUE; 54 55 56 ierr = PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr; 57 ierr = PetscOptionsGetInt(NULL,NULL,"-n",&N,NULL);CHKERRQ(ierr); 58 ierr = PetscOptionsGetBool(NULL,NULL,"-view_kokkos_configuration",&view_kokkos_configuration,NULL);CHKERRQ(ierr); 59 ctx.h = 1.0/(N-1); 60 ierr = PetscOptionsGetBool(NULL,NULL,"-view_initial",&viewinitial,NULL);CHKERRQ(ierr); 61 62 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 63 Create nonlinear solver context 64 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 65 66 ierr = SNESCreate(PETSC_COMM_WORLD,&snes);CHKERRQ(ierr); 67 68 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 69 Create vector data structures; set function evaluation routine 70 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 71 72 /* 73 Create distributed array (DMDA) to manage parallel grid and vectors 74 */ 75 ierr = DMDACreate1d(PETSC_COMM_WORLD,DM_BOUNDARY_NONE,N,1,1,NULL,&ctx.da);CHKERRQ(ierr); 76 ierr = DMSetFromOptions(ctx.da);CHKERRQ(ierr); 77 ierr = DMSetUp(ctx.da);CHKERRQ(ierr); 78 79 /* 80 Extract global and local vectors from DMDA; then duplicate for remaining 81 vectors that are the same types 82 */ 83 ierr = DMCreateGlobalVector(ctx.da,&x);CHKERRQ(ierr); 84 ierr = PetscObjectSetName((PetscObject)x,"Approximate Solution");CHKERRQ(ierr); 85 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 86 ierr = VecDuplicate(x,&F);CHKERRQ(ierr); ctx.F = F; 87 ierr = PetscObjectSetName((PetscObject)F,"Forcing function");CHKERRQ(ierr); 88 ierr = VecDuplicate(x,&U);CHKERRQ(ierr); 89 ierr = PetscObjectSetName((PetscObject)U,"Exact Solution");CHKERRQ(ierr); 90 91 /* 92 Set function evaluation routine and vector. Whenever the nonlinear 93 solver needs to compute the nonlinear function, it will call this 94 routine. 95 - Note that the final routine argument is the user-defined 96 context that provides application-specific data for the 97 function evaluation routine. 98 */ 99 ierr = SNESSetFunction(snes,r,FormFunction,&ctx);CHKERRQ(ierr); 100 101 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 102 Create matrix data structure; set Jacobian evaluation routine 103 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 104 105 ierr = DMCreateMatrix(ctx.da,&J);CHKERRQ(ierr); 106 107 /* 108 Set Jacobian matrix data structure and default Jacobian evaluation 109 routine. Whenever the nonlinear solver needs to compute the 110 Jacobian matrix, it will call this routine. 111 - Note that the final routine argument is the user-defined 112 context that provides application-specific data for the 113 Jacobian evaluation routine. 114 */ 115 ierr = SNESSetJacobian(snes,J,J,FormJacobian,&ctx);CHKERRQ(ierr); 116 ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); 117 ierr = SNESGetTolerances(snes,&abstol,&rtol,&stol,&maxit,&maxf);CHKERRQ(ierr); 118 ierr = PetscPrintf(PETSC_COMM_WORLD,"atol=%g, rtol=%g, stol=%g, maxit=%D, maxf=%D\n",(double)abstol,(double)rtol,(double)stol,maxit,maxf);CHKERRQ(ierr); 119 120 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 121 Initialize application: 122 Store forcing function of PDE and exact solution 123 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 124 125 /* 126 Get local grid boundaries (for 1-dimensional DMDA): 127 xs, xm - starting grid index, width of local grid (no ghost points) 128 */ 129 ierr = DMDAGetCorners(ctx.da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 130 131 /* 132 Get pointers to vector data 133 */ 134 ierr = VecGetArrayWrite(F,&FF);CHKERRQ(ierr); 135 ierr = VecGetArrayWrite(U,&UU);CHKERRQ(ierr); 136 137 /* -------------------------------------------------------------------------------------------------- */ 138 /* ./configure --download-kokkos --download-hwloc [one of --with-openmp --with-pthread --with-cuda] */ 139 /* export OMP_PROC_BIND="threads" export OMP_PROC_BIND="spread" */ 140 141 if (!getenv("KOKKOS_NUM_THREADS")) setenv("KOKKOS_NUM_THREADS","4",1); 142 143 if (view_kokkos_configuration) { 144 Kokkos::print_configuration(std::cout, true); 145 } 146 147 /* introduce a view object; reference like object */ 148 Kokkos::Experimental::OffsetView<PetscScalar*> xFF(Kokkos::View<PetscScalar*>(FF,xm),{xs}), xUU(Kokkos::View<PetscScalar*>(UU,xm),{xs}); 149 150 PetscReal xpbase = xs*ctx.h; 151 Kokkos:: parallel_for (Kokkos::RangePolicy<> (xs,xm), KOKKOS_LAMBDA ( int j) { 152 PetscReal xp = xpbase + j*ctx.h; 153 xFF(j) = 6.0*xp + PetscPowScalar(xp+1.e-12,6.0); /* +1.e-12 is to prevent 0^6 */ 154 xUU(j) = xp*xp*xp; 155 }); 156 157 /* 158 Restore vectors 159 */ 160 ierr = VecRestoreArrayWrite(F,&FF);CHKERRQ(ierr); 161 ierr = VecRestoreArrayWrite(U,&UU);CHKERRQ(ierr); 162 if (viewinitial) { 163 ierr = VecView(U,NULL);CHKERRQ(ierr); 164 ierr = VecView(F,NULL);CHKERRQ(ierr); 165 } 166 167 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 168 Evaluate initial guess; then solve nonlinear system 169 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 170 171 /* 172 Note: The user should initialize the vector, x, with the initial guess 173 for the nonlinear solver prior to calling SNESSolve(). In particular, 174 to employ an initial guess of zero, the user should explicitly set 175 this vector to zero by calling VecSet(). 176 */ 177 ierr = FormInitialGuess(x);CHKERRQ(ierr); 178 ierr = SNESSolve(snes,NULL,x);CHKERRQ(ierr); 179 ierr = SNESGetIterationNumber(snes,&its);CHKERRQ(ierr); 180 ierr = PetscPrintf(PETSC_COMM_WORLD,"Number of SNES iterations = %D\n",its);CHKERRQ(ierr); 181 182 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 183 Check solution and clean up 184 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ 185 186 /* 187 Check the error 188 */ 189 ierr = VecAXPY(x,none,U);CHKERRQ(ierr); 190 ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); 191 ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g Iterations %D\n",(double)norm,its);CHKERRQ(ierr); 192 193 /* 194 Free work space. All PETSc objects should be destroyed when they 195 are no longer needed. 196 */ 197 ierr = VecDestroy(&x);CHKERRQ(ierr); 198 ierr = VecDestroy(&r);CHKERRQ(ierr); 199 ierr = VecDestroy(&U);CHKERRQ(ierr); 200 ierr = VecDestroy(&F);CHKERRQ(ierr); 201 ierr = MatDestroy(&J);CHKERRQ(ierr); 202 ierr = SNESDestroy(&snes);CHKERRQ(ierr); 203 ierr = DMDestroy(&ctx.da);CHKERRQ(ierr); 204 ierr = PetscFinalize(); 205 return ierr; 206 } 207 208 /* ------------------------------------------------------------------- */ 209 /* 210 FormInitialGuess - Computes initial guess. 211 212 Input/Output Parameter: 213 . x - the solution vector 214 */ 215 PetscErrorCode FormInitialGuess(Vec x) 216 { 217 PetscErrorCode ierr; 218 PetscScalar pfive = .50; 219 220 PetscFunctionBeginUser; 221 ierr = VecSet(x,pfive);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 /* ------------------------------------------------------------------- */ 226 /* 227 FormFunction - Evaluates nonlinear function, F(x). 228 229 Input Parameters: 230 . snes - the SNES context 231 . x - input vector 232 . ctx - optional user-defined context, as set by SNESSetFunction() 233 234 Output Parameter: 235 . f - function vector 236 237 Note: 238 The user-defined context can contain any application-specific 239 data needed for the function evaluation. 240 */ 241 PetscErrorCode FormFunction(SNES snes,Vec x,Vec f,void *ctx) 242 { 243 ApplicationCtx *user = (ApplicationCtx*) ctx; 244 DM da = user->da; 245 PetscScalar *xx,*ff,*FF,d; 246 PetscErrorCode ierr; 247 PetscInt i,M,xs,xm; 248 Vec xlocal; 249 250 PetscFunctionBeginUser; 251 ierr = DMGetLocalVector(da,&xlocal);CHKERRQ(ierr); 252 /* 253 Scatter ghost points to local vector, using the 2-step process 254 DMGlobalToLocalBegin(), DMGlobalToLocalEnd(). 255 By placing code between these two statements, computations can 256 be done while messages are in transition. 257 */ 258 ierr = DMGlobalToLocalBegin(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 259 ierr = DMGlobalToLocalEnd(da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr); 260 261 /* 262 Get pointers to vector data. 263 - The vector xlocal includes ghost point; the vectors x and f do 264 NOT include ghost points. 265 - Using DMDAVecGetArray() allows accessing the values using global ordering 266 */ 267 ierr = DMDAVecGetArray(da,xlocal,&xx);CHKERRQ(ierr); 268 ierr = DMDAVecGetArray(da,f,&ff);CHKERRQ(ierr); 269 ierr = DMDAVecGetArray(da,user->F,&FF);CHKERRQ(ierr); 270 271 /* 272 Get local grid boundaries (for 1-dimensional DMDA): 273 xs, xm - starting grid index, width of local grid (no ghost points) 274 */ 275 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 276 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 277 278 /* 279 Set function values for boundary points; define local interior grid point range: 280 xsi - starting interior grid index 281 xei - ending interior grid index 282 */ 283 if (xs == 0) { /* left boundary */ 284 ff[0] = xx[0]; 285 xs++;xm--; 286 } 287 if (xs+xm == M) { /* right boundary */ 288 ff[xs+xm-1] = xx[xs+xm-1] - 1.0; 289 xm--; 290 } 291 292 /* 293 Compute function over locally owned part of the grid (interior points only) 294 */ 295 d = 1.0/(user->h*user->h); 296 for (i=xs; i<xs+xm; i++) ff[i] = d*(xx[i-1] - 2.0*xx[i] + xx[i+1]) + xx[i]*xx[i] - FF[i]; 297 298 /* 299 Restore vectors 300 */ 301 ierr = DMDAVecRestoreArray(da,xlocal,&xx);CHKERRQ(ierr); 302 ierr = DMDAVecRestoreArray(da,f,&ff);CHKERRQ(ierr); 303 ierr = DMDAVecRestoreArray(da,user->F,&FF);CHKERRQ(ierr); 304 ierr = DMRestoreLocalVector(da,&xlocal);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 /* ------------------------------------------------------------------- */ 309 /* 310 FormJacobian - Evaluates Jacobian matrix. 311 312 Input Parameters: 313 . snes - the SNES context 314 . x - input vector 315 . dummy - optional user-defined context (not used here) 316 317 Output Parameters: 318 . jac - Jacobian matrix 319 . B - optionally different preconditioning matrix 320 . flag - flag indicating matrix structure 321 */ 322 PetscErrorCode FormJacobian(SNES snes,Vec x,Mat jac,Mat B,void *ctx) 323 { 324 ApplicationCtx *user = (ApplicationCtx*) ctx; 325 PetscScalar *xx,d,A[3]; 326 PetscErrorCode ierr; 327 PetscInt i,j[3],M,xs,xm; 328 DM da = user->da; 329 330 PetscFunctionBeginUser; 331 /* 332 Get pointer to vector data 333 */ 334 ierr = DMDAVecGetArrayRead(da,x,&xx);CHKERRQ(ierr); 335 ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); 336 337 /* 338 Get range of locally owned matrix 339 */ 340 ierr = DMDAGetInfo(da,NULL,&M,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 341 342 /* 343 Determine starting and ending local indices for interior grid points. 344 Set Jacobian entries for boundary points. 345 */ 346 347 if (xs == 0) { /* left boundary */ 348 i = 0; A[0] = 1.0; 349 350 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 351 xs++;xm--; 352 } 353 if (xs+xm == M) { /* right boundary */ 354 i = M-1; 355 A[0] = 1.0; 356 ierr = MatSetValues(jac,1,&i,1,&i,A,INSERT_VALUES);CHKERRQ(ierr); 357 xm--; 358 } 359 360 /* 361 Interior grid points 362 - Note that in this case we set all elements for a particular 363 row at once. 364 */ 365 d = 1.0/(user->h*user->h); 366 for (i=xs; i<xs+xm; i++) { 367 j[0] = i - 1; j[1] = i; j[2] = i + 1; 368 A[0] = A[2] = d; A[1] = -2.0*d + 2.0*xx[i]; 369 ierr = MatSetValues(jac,1,&i,3,j,A,INSERT_VALUES);CHKERRQ(ierr); 370 } 371 372 /* 373 Assemble matrix, using the 2-step process: 374 MatAssemblyBegin(), MatAssemblyEnd(). 375 By placing code between these two statements, computations can be 376 done while messages are in transition. 377 378 Also, restore vector. 379 */ 380 381 ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 382 ierr = DMDAVecRestoreArrayRead(da,x,&xx);CHKERRQ(ierr); 383 ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 384 385 PetscFunctionReturn(0); 386 } 387 388 /* 389 The first test works for Kokkos wtih OpenMP and PThreads, the second with CUDA. 390 391 The second test requires -dm_vec_type cuda and -vec_pinned_memory_min 0 because Kokkos 392 assumes all memory it is given is allocated with cudaMallocHost() and can be used with 393 cudaMemcpy() without indicating it the memory is GPU or CPU. Note this is NOT unified memory. 394 395 The third test requires -dm_mat_type aijcusparse to create the correct vectors for block Jacobi 396 */ 397 398 /*TEST 399 400 build: 401 requires: kokkos 402 403 test: 404 requires: kokkos double !complex !single !cuda 405 args: -view_initial -view_kokkos_configuration false -snes_monitor 406 filter: grep -v type: 407 408 test: 409 suffix: 2 410 requires: kokkos double !complex !single cuda 411 args: -dm_vec_type cuda -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false -snes_monitor 412 output_file: output/ex3k_1.out 413 filter: grep -v type: 414 415 test: 416 suffix: 3 417 requires: kokkos double !complex !single define(PETSC_HAVE_cusparseCreateSolveAnalysisInfo) cuda 418 nsize: 2 419 args: -dm_vec_type cuda -dm_mat_type aijcusparse -vec_pinned_memory_min 0 -view_initial -view_kokkos_configuration false -snes_monitor 420 output_file: output/ex3k_1.out 421 filter: grep -v type: 422 423 TEST*/ 424