1C Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2C the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3C reserved. See files LICENSE and NOTICE for details. 4C 5C This file is part of CEED, a collection of benchmarks, miniapps, software 6C libraries and APIs for efficient high-order finite element and spectral 7C element discretizations for exascale applications. For more information and 8C source code availability see http://github.com/ceed. 9C 10C The CEED research is supported by the Exascale Computing Project (17-SC-20-SC) 11C a collaborative effort of two U.S. Department of Energy organizations (Office 12C of Science and the National Nuclear Security Administration) responsible for 13C the planning and preparation of a capable exascale ecosystem, including 14C software, applications, hardware, advanced system engineering and early 15C testbed platforms, in support of the nation's exascale computing imperative. 16 17C> @file 18C> Mass and diffusion operators examples using Nek5000 19C_TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test 20C_TESTARGS -c {ceed_resource} -e bp3 -n 1 -b 4 -test 21 22C----------------------------------------------------------------------- 23 subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 24 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 25 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 26C Set up mass operator 27C Input: u1,u2,u3,q Output: v1,v2,ierr 28 integer q,ierr 29 real*8 ctx(1) 30 real*8 u1(3*q) 31 real*8 u2(9*q) 32 real*8 u3(q) 33 real*8 v1(q) 34 real*8 v2(q) 35 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 36 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 37 real*8 jacmq 38 39C Quadrature Point Loop 40 do i=1,q 41 a11=u2(i+q*0) 42 a12=u2(i+q*3) 43 a13=u2(i+q*6) 44 45 a21=u2(i+q*1) 46 a22=u2(i+q*4) 47 a23=u2(i+q*7) 48 49 a31=u2(i+q*2) 50 a32=u2(i+q*5) 51 a33=u2(i+q*8) 52 53 g11 = (a22*a33-a23*a32) 54 g12 = (a13*a32-a33*a12) 55 g13 = (a12*a23-a22*a13) 56 57 g21 = (a23*a31-a21*a33) 58 g22 = (a11*a33-a31*a13) 59 g23 = (a13*a21-a23*a11) 60 61 g31 = (a21*a32-a22*a31) 62 g32 = (a12*a31-a32*a11) 63 g33 = (a11*a22-a21*a12) 64 65 jacmq = a11*g11+a21*g12+a31*g13 66 67C Rho 68 v1(i)=u3(i)*jacmq 69 70C RHS 71 v2(i)=u3(i)*jacmq 72 $ *dsqrt(u1(i+q*0)*u1(i+q*0) 73 $ +u1(i+q*1)*u1(i+q*1) 74 $ +u1(i+q*2)*u1(i+q*2)) 75 enddo 76 77 ierr=0 78 end 79C----------------------------------------------------------------------- 80 subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 81 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 82 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 83C Apply mass operator 84C Input: u1,u2,q Output: v1,ierr 85 integer q,ierr 86 real*8 ctx(1) 87 real*8 u1(q) 88 real*8 u2(q) 89 real*8 v1(q) 90 91C Quadrature Point Loop 92 do i=1,q 93 v1(i)=u2(i)*u1(i) 94 enddo 95 96 ierr=0 97 end 98C----------------------------------------------------------------------- 99 subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 100 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 101 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 102C Set up diffusion operator 103C Input: u1,u2,u3,q Output: v1,v2,ierr 104 integer q,ierr 105 real*8 ctx(1) 106 real*8 u1(3*q) 107 real*8 u2(9*q) 108 real*8 u3(q) 109 real*8 v1(6*q) 110 real*8 v2(q) 111 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 112 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 113 real*8 jacmq,scl 114 real*8 c(3),k(3) 115 116C Quadrature Point Loop 117 do i=1,q 118 pi = 3.14159265358979323846 119 120 c(1)=0. 121 c(2)=1. 122 c(3)=2. 123 k(1)=1. 124 k(2)=2. 125 k(3)=3. 126 127 a11=u2(i+q*0) 128 a12=u2(i+q*3) 129 a13=u2(i+q*6) 130 131 a21=u2(i+q*1) 132 a22=u2(i+q*4) 133 a23=u2(i+q*7) 134 135 a31=u2(i+q*2) 136 a32=u2(i+q*5) 137 a33=u2(i+q*8) 138 139 g11 = (a22*a33-a23*a32) 140 g12 = (a13*a32-a33*a12) 141 g13 = (a12*a23-a22*a13) 142 143 g21 = (a23*a31-a21*a33) 144 g22 = (a11*a33-a31*a13) 145 g23 = (a13*a21-a23*a11) 146 147 g31 = (a21*a32-a22*a31) 148 g32 = (a12*a31-a32*a11) 149 g33 = (a11*a22-a21*a12) 150 151 jacmq = a11*g11+a21*g12+a31*g13 152 153 scl = u3(i)/jacmq 154 155C Geometric factors 156C Stored in Voigt convention 157C 0 5 4 158C 5 1 3 159C 4 3 2 160 v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr 161 v1(i+1*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss 162 v1(i+2*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt 163 v1(i+3*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst 164 v1(i+4*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt 165 v1(i+5*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs 166 167C RHS 168 v2(i) = u3(i)*jacmq*pi*pi 169 $ *dsin(pi*(c(1)+k(1)*u1(i+0*q))) 170 $ *dsin(pi*(c(2)+k(2)*u1(i+1*q))) 171 $ *dsin(pi*(c(3)+k(3)*u1(i+2*q))) 172 $ *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3)) 173 174 enddo 175 176 ierr=0 177 end 178C----------------------------------------------------------------------- 179 subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 180 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 181 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 182C Apply diffusion operator 183C Input: u1,u2,q Output: v1,ierr 184 integer q,ierr 185 real*8 ctx(1) 186 real*8 u1(3*q) 187 real*8 u2(6*q) 188 real*8 v1(3*q) 189 190C Quadrature Point Loop 191 do i=1,q 192 v1(i+0*q)= 193 $ u2(i+0*q)*u1(i)+u2(i+5*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q) 194 v1(i+1*q)= 195 $ u2(i+5*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+3*q)*u1(i+2*q) 196 v1(i+2*q)= 197 $ u2(i+4*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q) 198 enddo 199 200 ierr=0 201 end 202C----------------------------------------------------------------------- 203 subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq) 204C Set h2 as rhoJac 205C Input: bmq,nxq Output: h2 206 include 'SIZE' 207 real*8 h2(1),bmq(1) 208 209 common /ctmp77/ wd(lxd),zd(lxd) 210 integer e,i,L 211 212 call zwgl(zd,wd,nxq) ! nxq = number of points 213 214 q = 1.0 ! Later, this can be a function of position... 215 216 L = 0 217 do e=1,lelt 218 do i=1,nxq**ldim 219 L=L+1 220 h2(L) = q*bmq(L) 221 enddo 222 enddo 223 224 return 225 end 226C----------------------------------------------------------------------- 227 subroutine dist_fld_h1(e) 228C Set distance initial condition for BP1 229C Input: Output: e 230 include 'SIZE' 231 include 'TOTAL' 232 real*8 x,y,z 233 real*8 e(1) 234 235 n=lx1*ly1*lz1*nelt 236 237 do i=1,n 238 x=xm1(i,1,1,1) 239 y=ym1(i,1,1,1) 240 z=zm1(i,1,1,1) 241 242 e(i) = dsqrt(x*x+y*y+z*z) 243 enddo 244 245 call dsavg(e) ! This is requisite for random fields 246 247 return 248 end 249C----------------------------------------------------------------------- 250 subroutine sin_fld_h1(e) 251C Set sine initial condition for BP3 252C Input: Output: e 253 include 'SIZE' 254 include 'TOTAL' 255 real*8 x,y,z 256 real*8 e(1) 257 real*8 c(3),k(3) 258 259 n=lx1*ly1*lz1*nelt 260 pi = 3.14159265358979323846 261 262 c(1)=0. 263 c(2)=1. 264 c(3)=2. 265 k(1)=1. 266 k(2)=2. 267 k(3)=3. 268 269 do i=1,n 270 x=xm1(i,1,1,1) 271 y=ym1(i,1,1,1) 272 z=zm1(i,1,1,1) 273 274 e(i) = dsin(pi*(c(1)+k(1)*x)) 275 & *dsin(pi*(c(2)+k(2)*y)) 276 & *dsin(pi*(c(3)+k(3)*z)) 277 278 enddo 279 280 call dsavg(e) ! This is requisite for random fields 281 282 return 283 end 284C----------------------------------------------------------------------- 285 subroutine uservp(ix,iy,iz,eg) ! set variable properties 286 include 'SIZE' 287 include 'TOTAL' 288 include 'NEKUSE' 289 integer e,f,eg 290C e = gllel(eg) 291 292 udiff = 0.0 293 utrans = 0.0 294 295 return 296 end 297C----------------------------------------------------------------------- 298 subroutine userf(ix,iy,iz,eg) ! set acceleration term 299C 300C Note: this is an acceleration term, NOT a force! 301C Thus, ffx will subsequently be multiplied by rho(x,t). 302C 303 include 'SIZE' 304 include 'TOTAL' 305 include 'NEKUSE' 306 integer e,f,eg 307C e = gllel(eg) 308 309 ffx = 0.0 310 ffy = 0.0 311 ffz = 0.0 312 313 return 314 end 315C----------------------------------------------------------------------- 316 subroutine userq(i,j,k,eg) ! set source term 317 include 'SIZE' 318 include 'TOTAL' 319 include 'NEKUSE' 320 integer e,f,eg 321 e = gllel(eg) 322 323 qvol = 0 324 325 return 326 end 327C----------------------------------------------------------------------- 328 subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions 329C NOTE ::: This subroutine MAY NOT be called by every process 330 include 'SIZE' 331 include 'TOTAL' 332 include 'NEKUSE' 333 integer e,f,eg 334 335 ux = 0.0 336 uy = 0.0 337 uz = 0.0 338 temp = 0.0 339 340 return 341 end 342C----------------------------------------------------------------------- 343 subroutine useric(ix,iy,iz,eg) ! set up initial conditions 344 include 'SIZE' 345 include 'TOTAL' 346 include 'NEKUSE' 347 integer e,f,eg 348 349 ux = 0.0 350 uy = 0.0 351 uz = 0.0 352 temp = 0.0 353 354 return 355 end 356C----------------------------------------------------------------------- 357 subroutine usrdat ! This routine to modify element vertices 358 include 'SIZE' 359 include 'TOTAL' 360 361 return 362 end 363C----------------------------------------------------------------------- 364 subroutine usrdat2 ! This routine to modify mesh coordinates 365 include 'SIZE' 366 include 'TOTAL' 367 368 x0 = 0 369 x1 = 1 370 call rescale_x(xm1,x0,x1) 371 call rescale_x(ym1,x0,x1) 372 call rescale_x(zm1,x0,x1) 373 374 param(59)=1 ! Force Nek to use the "deformed element" formulation 375 376 return 377 end 378C----------------------------------------------------------------------- 379 subroutine usrdat3 380 include 'SIZE' 381 include 'TOTAL' 382 383 return 384 end 385C----------------------------------------------------------------------- 386 subroutine xmask1 (r1,h2,nel) 387C Apply zero Dirichlet boundary conditions 388C Input: h2,nel Output: r1 389 include 'SIZE' 390 include 'TOTAL' 391 real*8 r1(1),h2(1) 392 393 n=nx1*ny1*nz1*nel 394 do i=1,n 395 r1(i)=r1(i)*h2(i) 396 enddo 397 398 return 399 end 400C----------------------------------------------------------------------- 401 function glrdif(x,y,n) 402C Compute Linfty norm of (x-y) 403C Input: x,y Output: n 404 real*8 x(n),y(n) 405 406 dmx=0 407 xmx=0 408 ymx=0 409 410 do i=1,n 411 diff=abs(x(i)-y(i)) 412 dmx =max(dmx,diff) 413 xmx =max(xmx,x(i)) 414 ymx =max(ymx,y(i)) 415 enddo 416 417 xmx = max(xmx,ymx) 418 dmx = glmax(dmx,1) ! max across processors 419 xmx = glmax(xmx,1) 420 421 if (xmx.gt.0) then 422 glrdif = dmx/xmx 423 else 424 glrdif = -dmx ! Negative indicates something strange 425 endif 426 427 return 428 end 429C----------------------------------------------------------------------- 430 subroutine loc_grad3(ur,us,ut,u,N,D,Dt) 431C 3D transpose of local gradient 432C Input: u,N,D,Dt Output: ur,us,ut 433 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 434 real*8 u (0:N,0:N,0:N) 435 real*8 D (0:N,0:N),Dt(0:N,0:N) 436 437 m1 = N+1 438 m2 = m1*m1 439 440 call mxm(D ,m1,u(0,0,0),m1,ur,m2) 441 do k=0,N 442 call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1) 443 enddo 444 call mxm(u(0,0,0),m2,Dt,m1,ut,m1) 445 446 return 447 end 448c----------------------------------------------------------------------- 449 subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w) 450C 3D transpose of local gradient 451C Input: ur,us,ut,N,D,Dt Output: u 452 real*8 u (0:N,0:N,0:N) 453 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 454 real*8 D (0:N,0:N),Dt(0:N,0:N) 455 real*8 w (0:N,0:N,0:N) 456 457 m1 = N+1 458 m2 = m1*m1 459 m3 = m1*m1*m1 460 461 call mxm(Dt,m1,ur,m1,u(0,0,0),m2) 462 do k=0,N 463 call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1) 464 enddo 465 call add2(u(0,0,0),w,m3) 466 call mxm(ut,m2,D ,m1,w,m1) 467 call add2(u(0,0,0),w,m3) 468 469 return 470 end 471C----------------------------------------------------------------------- 472 subroutine geodatq(gf,bmq,w3mq,nxq) 473C Routine to generate elemental geometric matrices on mesh 1 474C (Gauss-Legendre Lobatto mesh). 475 include 'SIZE' 476 include 'TOTAL' 477 478 parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim) 479 480 real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq) 481 482 common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd) 483 common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd) 484 $ , zr(lxyd),zs(lxyd),zt(lxyd) 485 486 common /ctmp77/ wd(lxd),zd(lxd) 487 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 488 489 integer e 490 real*8 tmp(lxyd) 491 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 492 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 493 real*8 jacmq 494 495 if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq) 496 497 call zwgl (zd,wd,lzq) ! nxq = number of points 498 call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp) 499 500 do k=1,nxq 501 do j=1,nxq 502 do i=1,nxq 503 w3mq(i,j,k) = wd(i)*wd(j)*wd(k) 504 enddo 505 enddo 506 enddo 507 508 nxyzq = nxq**ldim 509 nxqm1 = lzq-1 510 511 do e=1,nelt 512 call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation 513 call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq) 514 515 call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0) 516 call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq) 517 518 call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0) 519 call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq) 520 521 do i=1,nxyzq 522 a11 = xr(i) 523 a12 = xs(i) 524 a13 = xt(i) 525 526 a21 = yr(i) 527 a22 = ys(i) 528 a23 = yt(i) 529 530 a31 = zr(i) 531 a32 = zs(i) 532 a33 = zt(i) 533 534 g11 = (a22*a33-a23*a32) 535 g12 = (a13*a32-a33*a12) 536 g13 = (a12*a23-a22*a13) 537 538 g21 = (a23*a31-a21*a33) 539 g22 = (a11*a33-a31*a13) 540 g23 = (a13*a21-a23*a11) 541 542 g31 = (a21*a32-a22*a31) 543 g32 = (a12*a31-a32*a11) 544 g33 = (a11*a22-a21*a12) 545 546 jacmq = a11*g11+a21*g12+a31*g13 547 548 bmq(i,e) = w3mq(i,1,1)*jacmq 549 scale = w3mq(i,1,1)/jacmq 550 551 gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr 552 gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs 553 gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt 554 gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss 555 gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst 556 gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt 557 558 enddo 559 enddo 560 561 return 562 end 563C----------------------------------------------------------------------- 564 subroutine setprecn_bp1 (d,h1,h2) 565C Generate diagonal preconditioner for Helmholtz operator 566C Input: h1,h2 Output: d 567 include 'SIZE' 568 include 'TOTAL' 569 570 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 571 572 real*8 d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt) 573 integer e 574 575 real*8 gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data 576 equivalence (gf,g1m1) ! layout to g1m1...g6m1 577 578 real*8 ysm1(ly1) 579 580 nel = nelfld(ifield) 581 n = nel*lx1*ly1*lz1 582 nxyz = lx1*ly1*lz1 583 584 call copy (d,bm1,n) ! Mass matrix preconditioning full mass matrix 585 call dssum (d,nx1,ny1,nz1) 586 call invcol1 (d,n) 587 return 588 589 call dsset(lx1,ly1,lz1) 590 591 do 1000 e=1,nel 592 593 call rzero(d(1,1,1,e),nxyz) 594 595 do 320 iz=1,lz1 596 do 320 iy=1,ly1 597 do 320 ix=1,lx1 598 do 320 iq=1,lx1 599 d(ix,iy,iz,e) = d(ix,iy,iz,e) 600 $ + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2 601 $ + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2 602 $ + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2 603 320 continue 604C 605C Add cross terms if element is deformed. 606C 607 if (lxyz.gt.0) then 608 609 do i2=1,ly1,ly1-1 610 do i1=1,lx1,lx1-1 611 d(1,i1,i2,e) = d(1,i1,i2,e) 612 $ + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1) 613 $ + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2) 614 d(lx1,i1,i2,e) = d(lx1,i1,i2,e) 615 $ + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1) 616 $ + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2) 617 d(i1,1,i2,e) = d(i1,1,i2,e) 618 $ + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1) 619 $ + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2) 620 d(i1,ly1,i2,e) = d(i1,ly1,i2,e) 621 $ + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1) 622 $ + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2) 623 d(i1,i2,1,e) = d(i1,i2,1,e) 624 $ + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1) 625 $ + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2) 626 d(i1,i2,lz1,e) = d(i1,i2,lz1,e) 627 $ + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1) 628 $ + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2) 629 630 enddo 631 enddo 632 endif 633 634 do i=1,lxyz 635 d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e) 636 enddo 637 638 1000 continue ! element loop 639 640C If axisymmetric, add a diagonal term in the radial direction (ISD=2) 641 642 if (ifaxis.and.(isd.eq.2)) then 643 do 1200 e=1,nel 644 if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1) 645 k=0 646 do 1190 j=1,ly1 647 do 1190 i=1,lx1 648 k=k+1 649 if (ym1(i,j,1,e).ne.0.) then 650 term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2 651 if (ifrzer(e)) then 652 term2 = wxm1(i)*wam1(1)*dam1(1,j) 653 $ *jacm1(i,1,1,e)/ysm1(i) 654 else 655 term2 = 0. 656 endif 657 d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2) 658 endif 659 1190 continue 660 1200 continue 661 662 endif 663 call dssum (d,nx1,ny1,nz1) 664 call invcol1 (d,n) 665 666 if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1) 667 1 format(i9,1p4e12.4,' diag prec') 668 669 return 670 end 671C----------------------------------------------------------------------- 672 subroutine setprecn_bp3 (d,h1,h2) 673C Generate dummy diagonal preconditioner for Helmholtz operator 674C Input: h1,h2 Output: d 675 include 'SIZE' 676 include 'TOTAL' 677 678 parameter (n=lx1*ly1*lz1*lelt) 679 real*8 d(n),h1(n),h2(n) 680 681 call rone (d,n) 682 683 return 684 end 685C----------------------------------------------------------------------- 686 subroutine userchk 687 include 'SIZE' 688 include 'TOTAL' 689 690 integer bp 691 692 call get_bp(bp) 693 694 if (bp==1) then 695 if (istep.gt.0) call bp1 696 elseif (bp==3) then 697 if (istep.gt.0) call bp3 698 else 699 write(6,*) "INVALID BP SPECIFICED" 700 endif 701 702 return 703 end 704C----------------------------------------------------------------------- 705 subroutine bp1 706C Solution to BP1 using libCEED 707 include 'SIZE' 708 include 'TOTAL' 709 include 'CTIMER' ! ifsync 710 include 'FDMH1' 711 include 'ceedf.h' 712 713 parameter (lzq=lx1+1) 714 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 715 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 716 717 parameter (lt=lx1*ly1*lz1*lelt) 718 parameter (ld=lxd*lyd*lzd*lelt) 719 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 720 common /vcrny/ e1(lt) 721 common /vcrvh/ h1(lt),h2(lx*lelt),pap(3) 722 real*8 coords(ldim*lx*lelt) 723 724 logical ifh3 725 integer*8 nnode 726 integer ceed,err,test 727 character*64 spec 728 729 integer p,q,ncompx,ncompu,enode,lnode 730 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 731 integer stridesu(3),stridesx(3),stridesw(3) 732 integer erstrctu,erstrctx,erstrctw 733 integer basisu,basisx 734 integer qf_mass,qf_setup 735 integer op_mass,op_setup 736 real*8 x,y,z 737 integer*8 offset 738 739 external massf,masssetupf 740 741 ifield = 1 742 nxq = nx1+1 743 n = nx1*ny1*nz1*nelt 744 745 ifsync = .false. 746 747C Set up coordinates 748 ii=0 749 do j=0,nelt-1 750 do i=1,lx 751 ii=ii+1 752 x = xm1(ii,1,1,1) 753 y = ym1(ii,1,1,1) 754 z = zm1(ii,1,1,1) 755 coords(i+0*lx+3*j*lx)=x 756 coords(i+1*lx+3*j*lx)=y 757 coords(i+2*lx+3*j*lx)=z 758 enddo 759 enddo 760 761C Init ceed library 762 call get_spec(spec) 763 call ceedinit(trim(spec)//char(0),ceed,err) 764 765 call get_test(test) 766 767C Set up Nek geometry data 768 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 769 call set_h2_as_rhoJac_GL(h2,bmq,nxq) 770 771C Set up true soln 772 call dist_fld_h1 (e1) 773 call copy (h1,e1,n) ! Save exact soln in h1 774 775C Set up solver parameters 776 tol = 1e-10 777 param(22) = tol 778 maxit = 100 779 780 call nekgsync() 781 782C Create ceed basis for mesh and computation 783 p=nx1 784 q=p+1 785 ncompu=1 786 ncompx=ldim 787 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q, 788 $ ceed_gauss,basisx,err) 789 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q, 790 $ ceed_gauss,basisu,err) 791 792C Create ceed element restrictions for mesh and computation 793 enode=p**ldim 794 lnode=enode*nelt*ncompu 795 stridesx=[1,enode,enode*ldim] 796 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim, 797 $ ldim*lnode,stridesx,erstrctx,err) 798 stridesu=[1,enode,enode*ncompu] 799 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu, 800 $ ncompu*lnode,stridesu,erstrctu,err) 801 call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim, 802 $ 1,nelt*q**ldim,ceed_strides_backend,erstrctw,err) 803 804C Create ceed vectors 805 call ceedvectorcreate(ceed,lnode,vec_p1,err) 806 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 807 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 808 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 809 call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err) 810 811 offset=0 812 call ceedvectorsetarray(vec_coords,ceed_mem_host, 813 $ ceed_use_pointer,coords,offset,err) 814 815C Create ceed qfunctions for masssetupf and massf 816 call ceedqfunctioncreateinterior(ceed,1,masssetupf, 817 $ EXAMPLE_DIR 818 $ //'bps/bps.h:masssetupf',qf_setup,err) 819 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 820 $ ceed_eval_interp,err) 821 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 822 $ ceed_eval_grad,err) 823 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 824 $ ceed_eval_weight,err) 825 call ceedqfunctionaddoutput(qf_setup,'qdata',ncompu, 826 $ ceed_eval_none,err) 827 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 828 $ ceed_eval_interp,err) 829 830 call ceedqfunctioncreateinterior(ceed,1,massf, 831 $ EXAMPLE_DIR 832 $ //'bps/bps.h:massf',qf_mass,err) 833 call ceedqfunctionaddinput(qf_mass,'u',ncompu, 834 $ ceed_eval_interp,err) 835 call ceedqfunctionaddinput(qf_mass,'qdata',ncompu, 836 $ ceed_eval_none,err) 837 call ceedqfunctionaddoutput(qf_mass,'v',ncompu, 838 $ ceed_eval_interp,err) 839 840C Create ceed operators 841 call ceedoperatorcreate(ceed,qf_setup, 842 $ ceed_qfunction_none,ceed_qfunction_none,op_setup,err) 843 call ceedoperatorsetfield(op_setup,'x',erstrctx, 844 $ basisx,ceed_vector_active,err) 845 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 846 $ basisx,ceed_vector_active,err) 847 call ceedoperatorsetfield(op_setup,'weight', 848 $ ceed_elemrestriction_none,basisx,ceed_vector_none,err) 849 call ceedoperatorsetfield(op_setup,'qdata',erstrctw, 850 $ ceed_basis_collocated,ceed_vector_active,err) 851 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 852 $ basisu,vec_rhs,err) 853 854 call ceedoperatorcreate(ceed,qf_mass, 855 $ ceed_qfunction_none,ceed_qfunction_none,op_mass,err) 856 call ceedoperatorsetfield(op_mass,'u',erstrctu, 857 $ basisu,ceed_vector_active,err) 858 call ceedoperatorsetfield(op_mass,'qdata',erstrctw, 859 $ ceed_basis_collocated,vec_qdata,err) 860 call ceedoperatorsetfield(op_mass,'v',erstrctu, 861 $ basisu,ceed_vector_active,err) 862 863C Compute setup data 864 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 865 $ ceed_use_pointer,r1,offset,err) 866 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 867 $ ceed_request_immediate,err) 868 869C Set up true RHS 870 call dssum (r1,nx1,ny1,nz1) ! r1 871 872C Set up algebraic RHS with libCEED 873 call ceedvectorsetarray(vec_p1,ceed_mem_host, 874 $ ceed_use_pointer,h1,offset,err) 875 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 876 $ ceed_use_pointer,r2,offset,err) 877 call ceedoperatorapply(op_mass,vec_p1,vec_ap1, 878 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 879 call dssum (r2,nx1,ny1,nz1) 880 881C Set up algebraic RHS with Nek5000 882 call axhm1 (pap,r3,h1,h1,h2,'bp1') ! r3 = A_nek5k*h1 883 call dssum (r3,nx1,ny1,nz1) 884 885 call nekgsync() 886 887C Solve true RHS 888 if (nid.eq.0) write (6,*) "libCEED true RHS" 889 tstart = dnekclock() 890 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass, 891 $ vec_p1,vec_ap1,maxit,'bp1') 892 tstop = dnekclock() 893 894C Output 895 telaps = (tstop-tstart) 896 maxits = maxit 897 er1 = glrdif(u1,e1,n) 898 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 899 900 if (test.eq.1.and.nid.eq.0) then 901 if (maxit>=100) then 902 write(6,*) "UNCONVERGED CG" 903 endif 904 if (dabs(er1)>5e-3) then 905 write(6,*) "ERROR IS TOO LARGE" 906 endif 907 endif 908 909 nx = nx1-1 910 nnode = nelgt ! nnodes 911 nnode = nnode*(nx**ldim) ! nnodes 912 nppp = nnode/np ! nnodes/proc 913 914 dofps = nnode/telaps ! DOF/sec - scalar form 915 titers = telaps/maxits ! time per iteration 916 tppp_s = titers/nppp ! time per iteraton per local point 917 918 if (nid.eq.0) write(6,1) 'case scalar:' 919 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 920 921C Solve libCEED algebraic RHS 922 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 923 maxit = 100 924 tstart = dnekclock() 925 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass, 926 $ vec_p1,vec_ap1,maxit,'bp1') 927 tstop = dnekclock() 928 929C Output 930 telaps = (tstop-tstart) 931 maxits = maxit 932 er1 = glrdif(u1,e1,n) 933 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 934 935 if (test.eq.1.and.nid.eq.0) then 936 if (maxit>=100) then 937 write(6,*) "UNCONVERGED CG" 938 endif 939 if (dabs(er1)>1e-5) then 940 write(6,*) "ERROR IS TOO LARGE" 941 endif 942 endif 943 944 nx = nx1-1 945 nnode = nelgt ! nnodes 946 nnode = nnode*(nx**ldim) ! nnodes 947 nppp = nnode/np ! nnodes/proc 948 949 dofps = nnode/telaps ! DOF/sec - scalar form 950 titers = telaps/maxits ! time per iteration 951 tppp_s = titers/nppp ! time per iteraton per local point 952 953 if (nid.eq.0) write(6,1) 'case scalar:' 954 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 955 956C Solve Nek5000 algebraic RHS 957 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 958 maxit = 100 959 tstart = dnekclock() 960 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass, 961 $ vec_p1,vec_ap1,maxit,'bp1') 962 tstop = dnekclock() 963 964C Output 965 telaps = (tstop-tstart) 966 maxits = maxit 967 er1 = glrdif(u1,e1,n) 968 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 969 970 if (test.eq.1.and.nid.eq.0) then 971 if (maxit>=100) then 972 write(6,*) "UNCONVERGED CG" 973 endif 974 if (dabs(er1)>1e-5) then 975 write(6,*) "ERROR IS TOO LARGE" 976 endif 977 endif 978 979 nx = nx1-1 980 nnode = nelgt ! nnodes 981 nnode = nnode*(nx**ldim) ! nnodes 982 nppp = nnode/np ! nnodes/proc 983 984 dofps = nnode/telaps ! DOF/sec - scalar form 985 titers = telaps/maxits ! time per iteration 986 tppp_s = titers/nppp ! time per iteraton per local point 987 988 if (nid.eq.0) write(6,1) 'case scalar:' 989 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 990 991 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 992 3 format(i3,i9,e12.4,1x,a8,i9) 993 994C Destroy ceed handles 995 call ceedvectordestroy(vec_p1,err) 996 call ceedvectordestroy(vec_ap1,err) 997 call ceedvectordestroy(vec_rhs,err) 998 call ceedvectordestroy(vec_qdata,err) 999 call ceedvectordestroy(vec_coords,err) 1000 call ceedelemrestrictiondestroy(erstrctu,err) 1001 call ceedelemrestrictiondestroy(erstrctx,err) 1002 call ceedelemrestrictiondestroy(erstrctw,err) 1003 call ceedbasisdestroy(basisu,err) 1004 call ceedbasisdestroy(basisx,err) 1005 call ceedqfunctiondestroy(qf_setup,err) 1006 call ceedqfunctiondestroy(qf_mass,err) 1007 call ceedoperatordestroy(op_setup,err) 1008 call ceedoperatordestroy(op_mass,err) 1009 call ceeddestroy(ceed,err) 1010 1011 return 1012 end 1013C----------------------------------------------------------------------- 1014 subroutine bp3 1015C Solution to BP3 using libCEED 1016 include 'SIZE' 1017 include 'TOTAL' 1018 include 'CTIMER' ! ifsync 1019 include 'FDMH1' 1020 include 'ceedf.h' 1021 1022 parameter (lzq=lx1+1) 1023 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1024 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 1025 1026 parameter (lt=lx1*ly1*lz1*lelt) 1027 parameter (ld=lxd*lyd*lzd*lelt) 1028 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 1029 common /vcrny/ e1(lt) 1030 common /vcrvh/ h1(lt),h2(ld),pap(3) 1031 real*8 coords(ldim*lx*lelt) 1032 1033 logical ifh3 1034 integer*8 nnode 1035 integer ceed,err,test 1036 character*64 spec 1037 1038 integer p,q,ncompx,ncompu,enode,lnode 1039 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 1040 integer stridesu(3),stridesx(3),stridesw(3) 1041 integer erstrctu,erstrctx,erstrctw 1042 integer basisu,basisx 1043 integer qf_diffusion,qf_setup 1044 integer op_diffusion,op_setup 1045 integer ii,i,ngeo 1046 real*8 x,y,z 1047 integer*8 offset 1048 1049 external diffusionf,diffsetupf 1050 1051 ifield = 1 1052 nxq = nx1+1 1053 n = nx1*ny1*nz1*nelt 1054 1055 ifsync = .false. 1056 1057C Set up coordinates and mask 1058 ii=0 1059 do j=0,nelt-1 1060 do i=1,lx 1061 ii=ii+1 1062 x = xm1(ii,1,1,1) 1063 y = ym1(ii,1,1,1) 1064 z = zm1(ii,1,1,1) 1065 coords(i+0*lx+3*j*lx)=x 1066 coords(i+1*lx+3*j*lx)=y 1067 coords(i+2*lx+3*j*lx)=z 1068 if ( x.eq.0.or.x.eq.1 1069 $ .or.y.eq.0.or.y.eq.1 1070 $ .or.z.eq.0.or.z.eq.1 ) then 1071 h2(ii)=0. 1072 else 1073 h2(ii)=1. 1074 endif 1075 enddo 1076 enddo 1077 1078C Init ceed library 1079 call get_spec(spec) 1080 call ceedinit(trim(spec)//char(0),ceed,err) 1081 1082 call get_test(test) 1083 1084C Set up Nek geometry data 1085 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 1086 1087C Set up true soln 1088 call sin_fld_h1 (e1) 1089 call xmask1 (e1,h2,nelt) 1090 call copy (h1,e1,n) ! Save exact soln in h1 1091 1092C Set up solver parameters 1093 tol = 1e-10 1094 param(22) = tol 1095 maxit = 100 1096 1097 call nekgsync() 1098 1099C Create ceed basis for mesh and computation 1100 p=nx1 1101 q=p+1 1102 ncompu=1 1103 ncompx=ldim 1104 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q, 1105 $ ceed_gauss,basisx,err) 1106 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q, 1107 $ ceed_gauss,basisu,err) 1108 1109C Create ceed element restrictions for mesh and computation 1110 enode=p**ldim 1111 lnode=enode*nelt*ncompu 1112 ngeo=(ldim*(ldim+1))/2 1113 stridesx=[1,enode,enode*ldim] 1114 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim, 1115 $ ldim*lnode,stridesx,erstrctx,err) 1116 stridesu=[1,enode,enode*ncompu] 1117 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu, 1118 $ ncompu*lnode,stridesu,erstrctu,err) 1119 stridesw=[1,q**ldim,ngeo*q**ldim] 1120 call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim, 1121 $ ngeo,nelt*q**ldim*ngeo,stridesw,erstrctw,err) 1122 1123C Create ceed vectors 1124 call ceedvectorcreate(ceed,lnode,vec_p1,err) 1125 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 1126 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 1127 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 1128 call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err) 1129 1130 offset=0 1131 call ceedvectorsetarray(vec_coords,ceed_mem_host, 1132 $ ceed_use_pointer,coords,offset,err) 1133 1134C Create ceed qfunctions for diffsetupf and diffusionf 1135 call ceedqfunctioncreateinterior(ceed,1,diffsetupf, 1136 $ EXAMPLE_DIR 1137 $ //'bps/bps.h:diffsetupf'//char(0),qf_setup,err) 1138 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 1139 $ ceed_eval_interp,err) 1140 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 1141 $ ceed_eval_grad,err) 1142 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 1143 $ ceed_eval_weight,err) 1144 call ceedqfunctionaddoutput(qf_setup,'qdata',ngeo, 1145 $ ceed_eval_none,err) 1146 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 1147 $ ceed_eval_interp,err) 1148 1149 call ceedqfunctioncreateinterior(ceed,1,diffusionf, 1150 $ EXAMPLE_DIR 1151 $ //'bps/bps.h:diffusionf'//char(0),qf_diffusion,err) 1152 call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim, 1153 $ ceed_eval_grad,err) 1154 call ceedqfunctionaddinput(qf_diffusion,'qdata',ngeo, 1155 $ ceed_eval_none,err) 1156 call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim, 1157 $ ceed_eval_grad,err) 1158 1159C Create ceed operators 1160 call ceedoperatorcreate(ceed,qf_setup, 1161 $ ceed_qfunction_none,ceed_qfunction_none,op_setup,err) 1162 call ceedoperatorsetfield(op_setup,'x',erstrctx, 1163 $ basisx,ceed_vector_active,err) 1164 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 1165 $ basisx,ceed_vector_active,err) 1166 call ceedoperatorsetfield(op_setup,'weight', 1167 $ ceed_elemrestriction_none,basisx,ceed_vector_none,err) 1168 call ceedoperatorsetfield(op_setup,'qdata',erstrctw, 1169 $ ceed_basis_collocated,ceed_vector_active,err) 1170 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 1171 $ basisu,vec_rhs,err) 1172 1173 call ceedoperatorcreate(ceed,qf_diffusion, 1174 $ ceed_qfunction_none,ceed_qfunction_none,op_diffusion,err) 1175 call ceedoperatorsetfield(op_diffusion,'u',erstrctu, 1176 $ basisu,ceed_vector_active,err) 1177 call ceedoperatorsetfield(op_diffusion,'qdata',erstrctw, 1178 $ ceed_basis_collocated,vec_qdata,err) 1179 call ceedoperatorsetfield(op_diffusion,'v',erstrctu, 1180 $ basisu,ceed_vector_active,err) 1181 1182C Compute setup data 1183 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 1184 $ ceed_use_pointer,r1,offset,err) 1185 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 1186 $ ceed_request_immediate,err) 1187 1188C Set up true RHS 1189 call dssum (r1,nx1,ny1,nz1) ! r1 1190 call xmask1 (r1,h2,nelt) 1191 1192C Set up algebraic RHS with libCEED 1193 call ceedvectorsetarray(vec_p1,ceed_mem_host, 1194 $ ceed_use_pointer,h1,offset,err) 1195 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 1196 $ ceed_use_pointer,r2,offset,err) 1197 call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1, 1198 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 1199 call dssum (r2,nx1,ny1,nz1) 1200 call xmask1 (r2,h2,nelt) 1201 1202C Set up algebraic RHS with Nek5000 1203 call axhm1 (pap,r3,h1,h1,h2,'bp3') ! r3 = A_nek5k*h1 1204 call dssum (r3,nx1,ny1,nz1) 1205 call xmask1 (r3,h2,nelt) 1206 1207 call nekgsync() 1208 1209C Solve true RHS 1210 if (nid.eq.0) write (6,*) "libCEED true RHS" 1211 tstart = dnekclock() 1212 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1213 $ vec_p1,vec_ap1,maxit,'bp3') 1214 tstop = dnekclock() 1215 1216C Output 1217 telaps = (tstop-tstart) 1218 maxits = maxit 1219 er1 = glrdif(u1,e1,n) 1220 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1221 1222 if (test.eq.1.and.nid.eq.0) then 1223 if (maxit>=100) then 1224 write(6,*) "UNCONVERGED CG" 1225 endif 1226 if (dabs(er1)>1e-3) then 1227 write(6,*) "ERROR IS TOO LARGE" 1228 endif 1229 endif 1230 1231 nx = nx1-1 1232 nnode = nelgt ! nnodes 1233 nnode = nnode*(nx**ldim) ! nnodes 1234 nppp = nnode/np ! nnodes/proc 1235 1236 dofps = nnode/telaps ! DOF/sec - scalar form 1237 titers = telaps/maxits ! time per iteration 1238 tppp_s = titers/nppp ! time per iteraton per local point 1239 1240 if (nid.eq.0) write(6,1) 'case scalar:' 1241 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1242 1243C Solve libCEED algebraic RHS 1244 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 1245 maxit = 100 1246 tstart = dnekclock() 1247 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1248 $ vec_p1,vec_ap1,maxit,'bp3') 1249 tstop = dnekclock() 1250 1251C Output 1252 telaps = (tstop-tstart) 1253 maxits = maxit 1254 er1 = glrdif(u1,e1,n) 1255 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1256 1257 if (test.eq.1.and.nid.eq.0) then 1258 if (maxit>=100) then 1259 write(6,*) "UNCONVERGED CG" 1260 endif 1261 if (dabs(er1)>1e-9) then 1262 write(6,*) "ERROR IS TOO LARGE" 1263 endif 1264 endif 1265 1266 nx = nx1-1 1267 nnode = nelgt ! nnodes 1268 nnode = nnode*(nx**ldim) ! nnodes 1269 nppp = nnode/np ! nnodes/proc 1270 1271 dofps = nnode/telaps ! DOF/sec - scalar form 1272 titers = telaps/maxits ! time per iteration 1273 tppp_s = titers/nppp ! time per iteraton per local point 1274 1275 if (nid.eq.0) write(6,1) 'case scalar:' 1276 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1277 1278C Solve Nek5000 algebraic RHS 1279 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 1280 maxit = 100 1281 tstart = dnekclock() 1282 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1283 $ vec_p1,vec_ap1,maxit,'bp3') 1284 tstop = dnekclock() 1285 1286C Output 1287 telaps = (tstop-tstart) 1288 maxits = maxit 1289 er1 = glrdif(u1,e1,n) 1290 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1291 1292 if (test.eq.1.and.nid.eq.0) then 1293 if (maxit>=100) then 1294 write(6,*) "UNCONVERGED CG" 1295 endif 1296 if (dabs(er1)>1e-9) then 1297 write(6,*) "ERROR IS TOO LARGE" 1298 endif 1299 endif 1300 1301 nx = nx1-1 1302 nnode = nelgt ! nnodes 1303 nnode = nnode*(nx**ldim) ! nnodes 1304 nppp = nnode/np ! nnodes/proc 1305 1306 dofps = nnode/telaps ! DOF/sec - scalar form 1307 titers = telaps/maxits ! time per iteration 1308 tppp_s = titers/nppp ! time per iteraton per local point 1309 1310 if (nid.eq.0) write(6,1) 'case scalar:' 1311 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1312 1313 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 1314 3 format(i3,i9,e12.4,1x,a8,i9) 1315 1316C Destroy ceed handles 1317 call ceedvectordestroy(vec_p1,err) 1318 call ceedvectordestroy(vec_ap1,err) 1319 call ceedvectordestroy(vec_rhs,err) 1320 call ceedvectordestroy(vec_qdata,err) 1321 call ceedvectordestroy(vec_coords,err) 1322 call ceedelemrestrictiondestroy(erstrctu,err) 1323 call ceedelemrestrictiondestroy(erstrctx,err) 1324 call ceedelemrestrictiondestroy(erstrctw,err) 1325 call ceedbasisdestroy(basisu,err) 1326 call ceedbasisdestroy(basisx,err) 1327 call ceedqfunctiondestroy(qf_setup,err) 1328 call ceedqfunctiondestroy(qf_diffusion,err) 1329 call ceedoperatordestroy(op_setup,err) 1330 call ceedoperatordestroy(op_diffusion,err) 1331 call ceeddestroy(ceed,err) 1332 1333 return 1334 end 1335C----------------------------------------------------------------------- 1336 subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1, 1337 $ vec_ap1,maxit,bpname) 1338C Scalar conjugate gradient iteration for solution of uncoupled 1339C Helmholtz equations 1340C Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname 1341C Output: u1,maxit 1342 include 'SIZE' 1343 include 'TOTAL' 1344 include 'DOMAIN' 1345 include 'FDMH1' 1346 character*3 bpname 1347 1348C INPUT: rhs1 - rhs 1349C h1 - exact solution 1350 1351 parameter (lt=lx1*ly1*lz1*lelt) 1352 parameter (ld=lxd*lyd*lzd*lelt) 1353 real*8 u1(lt),r1(lt),h1(lt),h2(lt) 1354 real*8 rmult(1),binv(1) 1355 integer ceed,ceed_op,vec_ap1,vec_p1 1356 common /scrcg/ dpc(lt),p1(lt),z1(lt) 1357 common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4) 1358 1359 real*8 ap1(lt) 1360 equivalence (ap1,z1) 1361 1362 vol = volfld(ifield) 1363 nel = nelfld(ifield) 1364 nxyz = lx1*ly1*lz1 1365 n = nxyz*nel 1366 nx = nx1-1 ! Polynomial order (just for i/o) 1367 1368 tol=tin 1369 1370 if(bpname.ne.'bp1') then 1371 call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner 1372 else 1373 call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner 1374 endif 1375 1376 call rzero (u1,n) ! Initialize solution 1377 1378 wv(1)=0 1379 do i=1,n 1380 s=rmult(i) ! -1 1381 p1(i)=dpc(i)*r1(i) ! p = M r T 1382 wv(1)=wv(1)+s*p1(i)*r1(i) ! r p 1383 enddo 1384 call gop(wv(1),wk,'+ ',1) 1385 rpp1(1) = wv (1) 1386 1387 do 1000 iter=1,maxit 1388 call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op, 1389 $ vec_ap1,vec_p1) 1390 call dssum (ap1,nx1,ny1,nz1) 1391 if (bpname.ne.'bp1') call xmask1(ap1,h2,nel) 1392 1393 call gop (pap,wk,'+ ',1) 1394 alph(1) = rpp1(1)/pap(1) 1395 1396 do i=1,n 1397 u1(i)=u1(i)+alph(1)* p1(i) 1398 r1(i)=r1(i)-alph(1)*ap1(i) 1399 enddo 1400 1401C tolerance check here 1402 call rzero(wv,2) 1403 do i=1,n 1404 wv(1)=wv(1)+r1(i)*r1(i) ! L2 error estimate 1405 z1(i)=dpc(i)*r1(i) ! z = M r 1406 wv(2)=wv(2)+rmult(i)*z1(i)*r1(i) ! r z 1407 enddo 1408 call gop(wv,wk,'+ ',2) 1409 1410C if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1) 1411 1 format(i2,i9,i5,i4,1p1e12.4,' cggos') 1412 1413 enorm=sqrt(wv(1)) 1414 if (enorm.lt.tol) then 1415 ifin = iter 1416 if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol 1417 goto 9999 1418 endif 1419C if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha' 1420 2 format(i5,1p3e12.4,2x,a5) 1421 1422 rpp2(1)=rpp1(1) 1423 rpp1(1)=wv (2) 1424 beta1 =rpp1(1)/rpp2(1) 1425 do i=1,n 1426 p1(i)=z1(i) + beta1*p1(i) 1427 enddo 1428 1429 1000 continue 1430 1431 rbnorm=sqrt(wv(1)) 1432 if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol 1433 iter = iter-1 1434 1435 9999 continue 1436 1437 maxit=iter 1438 1439 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4) 1440 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6) 1441 1442 return 1443 end 1444C----------------------------------------------------------------------- 1445 subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op, 1446 $ vec_ap1,vec_p1) 1447C Vector conjugate gradient matvec for solution of uncoupled 1448C Helmholtz equations 1449C Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1 1450C Output: ap1 1451 include 'SIZE' 1452 include 'TOTAL' 1453 include 'ceedf.h' 1454 1455 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1456 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1457 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1458 1459 real*8 pap(3) 1460 real*8 ap1(lx,lelt) 1461 real*8 p1(lx,lelt) 1462 real*8 h1(lx,lelt),h2(lx,lelt) 1463 integer ceed,ceed_op,vec_ap1,vec_p1,err 1464 integer i,e 1465 integer*8 offset 1466 1467 offset=0 1468 call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer, 1469 $ p1,offset,err) 1470 call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer, 1471 $ ap1,offset,err) 1472 1473 call ceedoperatorapply(ceed_op,vec_p1,vec_ap1, 1474 $ ceed_request_immediate,err) 1475 1476 call ceedvectortakearray(vec_p1,ceed_mem_host,0,offset,err) 1477 call ceedvectortakearray(vec_ap1,ceed_mem_host,0,offset,err) 1478 1479 pap(1)=0. 1480 1481 do e=1,nelt 1482 do i=1,lx 1483 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1484 enddo 1485 enddo 1486 1487 return 1488 end 1489C----------------------------------------------------------------------- 1490 subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut) 1491C Local matrix-vector for solution of BP3 (stiffness matrix) 1492C Input: u,g,h1,h2,b,ju,us,ut Output: w 1493 include 'SIZE' 1494 include 'TOTAL' 1495 1496 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1497 real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz) 1498 real*8 ju(lxyz),us(lxyz),ut(lxyz) 1499 1500 nxq = nx1+1 ! Number of quadrature points 1501 1502 lxyzq = nxq**ldim 1503 1504 call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation 1505 do i=1,lxyzq 1506 ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts 1507 enddo 1508 call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u 1509 1510 return 1511 end 1512C----------------------------------------------------------------------- 1513 subroutine axhm1_bp1(pap,ap1,p1,h1,h2) 1514C Vector conjugate gradient matvec for solution of BP1 (mass matrix) 1515C Input: pap,p1,h1,h2 Output: ap1 1516 include 'SIZE' 1517 include 'TOTAL' 1518 1519 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1520 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1521 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1522 1523 real*8 pap(3) 1524 real*8 ap1(lx,lelt) 1525 real*8 p1(lx,lelt) 1526 real*8 h1(lx,lelt),h2(lx,lelt) 1527 1528 real*8 ur(lx),us(lx),ut(lx) 1529 common /ctmp1/ ur,us,ut 1530 1531 integer e 1532 1533 pap(1)=0. 1534 1535 k=1 1536 nxq = nx1+1 1537 1538 do e=1,nelt 1539 1540 call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1) 1541 $ ,bm1(1,1,1,e),ur,us,ut) 1542 do i=1,lx 1543 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1544 enddo 1545 k=k+nxq*nxq*nxq 1546 1547 enddo 1548 1549 return 1550 end 1551C----------------------------------------------------------------------- 1552 subroutine ax_e_bp3(w,u,g,ur,us,ut,wk) 1553C Local matrix-vector for solution of BP3 (stiffness matrix) 1554C Input: u,g,ur,us,ut,wk Output: w 1555 include 'SIZE' 1556 include 'TOTAL' 1557 1558 parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq) 1559 1560 common /ctmp0/ tmp(lxyzq) 1561 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 1562 1563 real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq) 1564 real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq) 1565 1566 n = lzq-1 1567 1568 call intp_rstd (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation 1569 call loc_grad3 (ur,us,ut,wk,n,dxmq,dxtmq) 1570 1571 do i=1,lxyzq 1572 wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i) 1573 ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i) 1574 wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i) 1575 ur(i) = wr 1576 us(i) = ws 1577 ut(i) = wt 1578 enddo 1579 1580 call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp) 1581 call intp_rstd (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u 1582 1583 return 1584 end 1585C----------------------------------------------------------------------- 1586 subroutine axhm1_bp3(pap,ap1,p1,h1,h2) 1587C Vector conjugate gradient matvec for solution of BP3 (stiffness matrix) 1588C Input: pap,p1,h1,h2 Output: ap1 1589 include 'SIZE' 1590 include 'TOTAL' 1591 1592 parameter (lzq=lx1+1) 1593 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1594 common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq) 1595 1596 real*8 pap(3) 1597 real*8 ap1(lx,lelt) 1598 real*8 p1(lx,lelt) 1599 real*8 h1(lx,lelt),h2(lx,lelt) 1600 1601 common /ctmp1/ ur,us,ut,wk 1602 real*8 ur(lq),us(lq),ut(lq),wk(lq) 1603 1604 integer e 1605 1606 pap(1)=0. 1607 1608 do e=1,nelt 1609 1610 call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk) 1611 do i=1,lx 1612 pap(1)=pap(1)+p1(i,e)*ap1(i,e) 1613 enddo 1614 1615 enddo 1616 1617 return 1618 end 1619C----------------------------------------------------------------------- 1620 subroutine axhm1(pap,ap1,p1,h1,h2,bpname) 1621C Vector conjugate gradient matvec for solution of uncoupled 1622C Helmholtz equations 1623C Input: pap,p1,h1,h2,bpname Output: ap1 1624 include 'SIZE' 1625 include 'TOTAL' 1626 1627 parameter (lx=lx1*ly1*lz1) 1628 1629 real*8 pap(3),ap1(lx,lelt),p1(lx,lelt) 1630 real*8 h1(lx,lelt),h2(lx,lelt) 1631 1632 character*3 bpname 1633 1634 if (bpname.eq.'bp1') then 1635 call axhm1_bp1(pap,ap1,p1,h1,h2) 1636 1637 elseif (bpname.eq.'bp3') then 1638 call axhm1_bp3(pap,ap1,p1,h1,h2) 1639 1640 else 1641 write(6,*) bpname,' axhm1 bpname error' 1642 stop 1643 1644 endif 1645 1646 return 1647 end 1648C----------------------------------------------------------------------- 1649 subroutine get_bp(bp) 1650C Get BP to run 1651C Input: Output: bp 1652 integer i,bp 1653 character*64 bpval 1654 1655 bp=0 1656 if(iargc().ge.1) then 1657 call getarg(1,bpval) 1658 endif 1659 if(bpval.eq."bp1") then 1660 bp=1 1661 elseif(bpval.eq."bp3") then 1662 bp=3 1663 endif 1664 1665 return 1666 end 1667C----------------------------------------------------------------------- 1668 subroutine get_spec(spec) 1669C Get CEED backend specification 1670C Input: Output: spec 1671 integer i 1672 character*64 spec 1673 1674 spec = '/cpu/self' 1675 if(iargc().ge.2) then 1676 call getarg(2,spec) 1677 endif 1678 1679 return 1680 end 1681C----------------------------------------------------------------------- 1682 subroutine get_test(test) 1683C Get test mode flag 1684C Input: Output: test 1685 integer i,test 1686 character*64 testval 1687 1688 test=0 1689 if(iargc().ge.3) then 1690 call getarg(3,testval) 1691 endif 1692 if(testval.eq."test") then 1693 test=1 1694 endif 1695 1696 return 1697 end 1698C----------------------------------------------------------------------- 1699 1700