1C Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2C the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3C reserved. See files LICENSE and NOTICE for details. 4C 5C This file is part of CEED, a collection of benchmarks, miniapps, software 6C libraries and APIs for efficient high-order finite element and spectral 7C element discretizations for exascale applications. For more information and 8C source code availability see http://github.com/ceed. 9C 10C The CEED research is supported by the Exascale Computing Project (17-SC-20-SC) 11C a collaborative effort of two U.S. Department of Energy organizations (Office 12C of Science and the National Nuclear Security Administration) responsible for 13C the planning and preparation of a capable exascale ecosystem, including 14C software, applications, hardware, advanced system engineering and early 15C testbed platforms, in support of the nation's exascale computing imperative. 16 17C> @file 18C> Mass and diffusion operators examples using Nek5000 19C TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test 20 21C----------------------------------------------------------------------- 22 subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 23 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 24 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 25C Set up mass operator 26C Input: u1,u2,u3,q Output: v1,v2,ierr 27 integer q,ierr 28 real*8 ctx(1) 29 real*8 u1(3*q) 30 real*8 u2(9*q) 31 real*8 u3(q) 32 real*8 v1(q) 33 real*8 v2(q) 34 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 35 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 36 real*8 jacmq 37 38C Quadrature Point Loop 39 do i=1,q 40 a11=u2(i+q*0) 41 a12=u2(i+q*3) 42 a13=u2(i+q*6) 43 44 a21=u2(i+q*1) 45 a22=u2(i+q*4) 46 a23=u2(i+q*7) 47 48 a31=u2(i+q*2) 49 a32=u2(i+q*5) 50 a33=u2(i+q*8) 51 52 g11 = (a22*a33-a23*a32) 53 g12 = (a13*a32-a33*a12) 54 g13 = (a12*a23-a22*a13) 55 56 g21 = (a23*a31-a21*a33) 57 g22 = (a11*a33-a31*a13) 58 g23 = (a13*a21-a23*a11) 59 60 g31 = (a21*a32-a22*a31) 61 g32 = (a12*a31-a32*a11) 62 g33 = (a11*a22-a21*a12) 63 64 jacmq = a11*g11+a21*g12+a31*g13 65 66C Rho 67 v1(i)=u3(i)*jacmq 68 69C RHS 70 v2(i)=u3(i)*jacmq 71 $ *dsqrt(u1(i+q*0)*u1(i+q*0) 72 $ +u1(i+q*1)*u1(i+q*1) 73 $ +u1(i+q*2)*u1(i+q*2)) 74 enddo 75 76 ierr=0 77 end 78C----------------------------------------------------------------------- 79 subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 80 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 81 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 82C Apply mass operator 83C Input: u1,u2,q Output: v1,ierr 84 integer q,ierr 85 real*8 ctx(1) 86 real*8 u1(q) 87 real*8 u2(q) 88 real*8 v1(q) 89 90C Quadrature Point Loop 91 do i=1,q 92 v1(i)=u2(i)*u1(i) 93 enddo 94 95 ierr=0 96 end 97C----------------------------------------------------------------------- 98 subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 99 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 100 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 101C Set up diffusion operator 102C Input: u1,u2,u3,q Output: v1,v2,ierr 103 integer q,ierr 104 real*8 ctx(1) 105 real*8 u1(3*q) 106 real*8 u2(9*q) 107 real*8 u3(q) 108 real*8 v1(6*q) 109 real*8 v2(q) 110 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 111 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 112 real*8 jacmq,scl 113 real*8 c(3),k(3) 114 115C Quadrature Point Loop 116 do i=1,q 117 pi = 3.14159265358979323846 118 119 c(1)=0. 120 c(2)=1. 121 c(3)=2. 122 k(1)=1. 123 k(2)=2. 124 k(3)=3. 125 126 a11=u2(i+q*0) 127 a12=u2(i+q*3) 128 a13=u2(i+q*6) 129 130 a21=u2(i+q*1) 131 a22=u2(i+q*4) 132 a23=u2(i+q*7) 133 134 a31=u2(i+q*2) 135 a32=u2(i+q*5) 136 a33=u2(i+q*8) 137 138 g11 = (a22*a33-a23*a32) 139 g12 = (a13*a32-a33*a12) 140 g13 = (a12*a23-a22*a13) 141 142 g21 = (a23*a31-a21*a33) 143 g22 = (a11*a33-a31*a13) 144 g23 = (a13*a21-a23*a11) 145 146 g31 = (a21*a32-a22*a31) 147 g32 = (a12*a31-a32*a11) 148 g33 = (a11*a22-a21*a12) 149 150 jacmq = a11*g11+a21*g12+a31*g13 151 152 scl = u3(i)/jacmq 153 154C Geometric factors 155C Stored in Voigt convention 156C 0 5 4 157C 5 1 3 158C 4 3 2 159 v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr 160 v1(i+1*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss 161 v1(i+2*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt 162 v1(i+3*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst 163 v1(i+4*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt 164 v1(i+5*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs 165 166C RHS 167 v2(i) = u3(i)*jacmq*pi*pi 168 $ *dsin(pi*(c(1)+k(1)*u1(i+0*q))) 169 $ *dsin(pi*(c(2)+k(2)*u1(i+1*q))) 170 $ *dsin(pi*(c(3)+k(3)*u1(i+2*q))) 171 $ *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3)) 172 173 enddo 174 175 ierr=0 176 end 177C----------------------------------------------------------------------- 178 subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 179 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 180 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 181C Apply diffusion operator 182C Input: u1,u2,q Output: v1,ierr 183 integer q,ierr 184 real*8 ctx(1) 185 real*8 u1(3*q) 186 real*8 u2(6*q) 187 real*8 v1(3*q) 188 189C Quadrature Point Loop 190 do i=1,q 191 v1(i+0*q)= 192 $ u2(i+0*q)*u1(i)+u2(i+5*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q) 193 v1(i+1*q)= 194 $ u2(i+5*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+3*q)*u1(i+2*q) 195 v1(i+2*q)= 196 $ u2(i+4*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q) 197 enddo 198 199 ierr=0 200 end 201C----------------------------------------------------------------------- 202 subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq) 203C Set h2 as rhoJac 204C Input: bmq,nxq Output: h2 205 include 'SIZE' 206 real*8 h2(1),bmq(1) 207 208 common /ctmp77/ wd(lxd),zd(lxd) 209 integer e,i,L 210 211 call zwgl(zd,wd,nxq) ! nxq = number of points 212 213 q = 1.0 ! Later, this can be a function of position... 214 215 L = 0 216 do e=1,lelt 217 do i=1,nxq**ldim 218 L=L+1 219 h2(L) = q*bmq(L) 220 enddo 221 enddo 222 223 return 224 end 225C----------------------------------------------------------------------- 226 subroutine dist_fld_h1(e) 227C Set distance initial condition for BP1 228C Input: Output: e 229 include 'SIZE' 230 include 'TOTAL' 231 real*8 x,y,z 232 real*8 e(1) 233 234 n=lx1*ly1*lz1*nelt 235 236 do i=1,n 237 x=xm1(i,1,1,1) 238 y=ym1(i,1,1,1) 239 z=zm1(i,1,1,1) 240 241 e(i) = dsqrt(x*x+y*y+z*z) 242 enddo 243 244 call dsavg(e) ! This is requisite for random fields 245 246 return 247 end 248C----------------------------------------------------------------------- 249 subroutine sin_fld_h1(e) 250C Set sine initial condition for BP3 251C Input: Output: e 252 include 'SIZE' 253 include 'TOTAL' 254 real*8 x,y,z 255 real*8 e(1) 256 real*8 c(3),k(3) 257 258 n=lx1*ly1*lz1*nelt 259 pi = 3.14159265358979323846 260 261 c(1)=0. 262 c(2)=1. 263 c(3)=2. 264 k(1)=1. 265 k(2)=2. 266 k(3)=3. 267 268 do i=1,n 269 x=xm1(i,1,1,1) 270 y=ym1(i,1,1,1) 271 z=zm1(i,1,1,1) 272 273 e(i) = dsin(pi*(c(1)+k(1)*x)) 274 & *dsin(pi*(c(2)+k(2)*y)) 275 & *dsin(pi*(c(3)+k(3)*z)) 276 277 enddo 278 279 call dsavg(e) ! This is requisite for random fields 280 281 return 282 end 283C----------------------------------------------------------------------- 284 subroutine uservp(ix,iy,iz,eg) ! set variable properties 285 include 'SIZE' 286 include 'TOTAL' 287 include 'NEKUSE' 288 integer e,f,eg 289C e = gllel(eg) 290 291 udiff = 0.0 292 utrans = 0.0 293 294 return 295 end 296C----------------------------------------------------------------------- 297 subroutine userf(ix,iy,iz,eg) ! set acceleration term 298C 299C Note: this is an acceleration term, NOT a force! 300C Thus, ffx will subsequently be multiplied by rho(x,t). 301C 302 include 'SIZE' 303 include 'TOTAL' 304 include 'NEKUSE' 305 integer e,f,eg 306C e = gllel(eg) 307 308 ffx = 0.0 309 ffy = 0.0 310 ffz = 0.0 311 312 return 313 end 314C----------------------------------------------------------------------- 315 subroutine userq(i,j,k,eg) ! set source term 316 include 'SIZE' 317 include 'TOTAL' 318 include 'NEKUSE' 319 integer e,f,eg 320 e = gllel(eg) 321 322 qvol = 0 323 324 return 325 end 326C----------------------------------------------------------------------- 327 subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions 328C NOTE ::: This subroutine MAY NOT be called by every process 329 include 'SIZE' 330 include 'TOTAL' 331 include 'NEKUSE' 332 integer e,f,eg 333 334 ux = 0.0 335 uy = 0.0 336 uz = 0.0 337 temp = 0.0 338 339 return 340 end 341C----------------------------------------------------------------------- 342 subroutine useric(ix,iy,iz,eg) ! set up initial conditions 343 include 'SIZE' 344 include 'TOTAL' 345 include 'NEKUSE' 346 integer e,f,eg 347 348 ux = 0.0 349 uy = 0.0 350 uz = 0.0 351 temp = 0.0 352 353 return 354 end 355C----------------------------------------------------------------------- 356 subroutine usrdat ! This routine to modify element vertices 357 include 'SIZE' 358 include 'TOTAL' 359 360 return 361 end 362C----------------------------------------------------------------------- 363 subroutine usrdat2 ! This routine to modify mesh coordinates 364 include 'SIZE' 365 include 'TOTAL' 366 367 x0 = 0 368 x1 = 1 369 call rescale_x(xm1,x0,x1) 370 call rescale_x(ym1,x0,x1) 371 call rescale_x(zm1,x0,x1) 372 373 param(59)=1 ! Force Nek to use the "deformed element" formulation 374 375 return 376 end 377C----------------------------------------------------------------------- 378 subroutine usrdat3 379 include 'SIZE' 380 include 'TOTAL' 381 382 return 383 end 384C----------------------------------------------------------------------- 385 subroutine xmask1 (r1,h2,nel) 386C Apply zero Dirichlet boundary conditions 387C Input: h2,nel Output: r1 388 include 'SIZE' 389 include 'TOTAL' 390 real*8 r1(1),h2(1) 391 392 n=nx1*ny1*nz1*nel 393 do i=1,n 394 r1(i)=r1(i)*h2(i) 395 enddo 396 397 return 398 end 399C----------------------------------------------------------------------- 400 function glrdif(x,y,n) 401C Compute Linfty norm of (x-y) 402C Input: x,y Output: n 403 real*8 x(n),y(n) 404 405 dmx=0 406 xmx=0 407 ymx=0 408 409 do i=1,n 410 diff=abs(x(i)-y(i)) 411 dmx =max(dmx,diff) 412 xmx =max(xmx,x(i)) 413 ymx =max(ymx,y(i)) 414 enddo 415 416 xmx = max(xmx,ymx) 417 dmx = glmax(dmx,1) ! max across processors 418 xmx = glmax(xmx,1) 419 420 if (xmx.gt.0) then 421 glrdif = dmx/xmx 422 else 423 glrdif = -dmx ! Negative indicates something strange 424 endif 425 426 return 427 end 428C----------------------------------------------------------------------- 429 subroutine loc_grad3(ur,us,ut,u,N,D,Dt) 430C 3D transpose of local gradient 431C Input: u,N,D,Dt Output: ur,us,ut 432 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 433 real*8 u (0:N,0:N,0:N) 434 real*8 D (0:N,0:N),Dt(0:N,0:N) 435 436 m1 = N+1 437 m2 = m1*m1 438 439 call mxm(D ,m1,u(0,0,0),m1,ur,m2) 440 do k=0,N 441 call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1) 442 enddo 443 call mxm(u(0,0,0),m2,Dt,m1,ut,m1) 444 445 return 446 end 447c----------------------------------------------------------------------- 448 subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w) 449C 3D transpose of local gradient 450C Input: ur,us,ut,N,D,Dt Output: u 451 real*8 u (0:N,0:N,0:N) 452 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 453 real*8 D (0:N,0:N),Dt(0:N,0:N) 454 real*8 w (0:N,0:N,0:N) 455 456 m1 = N+1 457 m2 = m1*m1 458 m3 = m1*m1*m1 459 460 call mxm(Dt,m1,ur,m1,u(0,0,0),m2) 461 do k=0,N 462 call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1) 463 enddo 464 call add2(u(0,0,0),w,m3) 465 call mxm(ut,m2,D ,m1,w,m1) 466 call add2(u(0,0,0),w,m3) 467 468 return 469 end 470C----------------------------------------------------------------------- 471 subroutine geodatq(gf,bmq,w3mq,nxq) 472C Routine to generate elemental geometric matrices on mesh 1 473C (Gauss-Legendre Lobatto mesh). 474 include 'SIZE' 475 include 'TOTAL' 476 477 parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim) 478 479 real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq) 480 481 common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd) 482 common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd) 483 $ , zr(lxyd),zs(lxyd),zt(lxyd) 484 485 common /ctmp77/ wd(lxd),zd(lxd) 486 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 487 488 integer e 489 real*8 tmp(lxyd) 490 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 491 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 492 real*8 jacmq 493 494 if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq) 495 496 call zwgl (zd,wd,lzq) ! nxq = number of points 497 call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp) 498 499 do k=1,nxq 500 do j=1,nxq 501 do i=1,nxq 502 w3mq(i,j,k) = wd(i)*wd(j)*wd(k) 503 enddo 504 enddo 505 enddo 506 507 nxyzq = nxq**ldim 508 nxqm1 = lzq-1 509 510 do e=1,nelt 511 call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation 512 call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq) 513 514 call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0) 515 call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq) 516 517 call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0) 518 call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq) 519 520 do i=1,nxyzq 521 a11 = xr(i) 522 a12 = xs(i) 523 a13 = xt(i) 524 525 a21 = yr(i) 526 a22 = ys(i) 527 a23 = yt(i) 528 529 a31 = zr(i) 530 a32 = zs(i) 531 a33 = zt(i) 532 533 g11 = (a22*a33-a23*a32) 534 g12 = (a13*a32-a33*a12) 535 g13 = (a12*a23-a22*a13) 536 537 g21 = (a23*a31-a21*a33) 538 g22 = (a11*a33-a31*a13) 539 g23 = (a13*a21-a23*a11) 540 541 g31 = (a21*a32-a22*a31) 542 g32 = (a12*a31-a32*a11) 543 g33 = (a11*a22-a21*a12) 544 545 jacmq = a11*g11+a21*g12+a31*g13 546 547 bmq(i,e) = w3mq(i,1,1)*jacmq 548 scale = w3mq(i,1,1)/jacmq 549 550 gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr 551 gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs 552 gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt 553 gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss 554 gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst 555 gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt 556 557 enddo 558 enddo 559 560 return 561 end 562C----------------------------------------------------------------------- 563 subroutine setprecn_bp1 (d,h1,h2) 564C Generate diagonal preconditioner for Helmholtz operator 565C Input: h1,h2 Output: d 566 include 'SIZE' 567 include 'TOTAL' 568 569 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 570 571 real*8 d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt) 572 integer e 573 574 real*8 gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data 575 equivalence (gf,g1m1) ! layout to g1m1...g6m1 576 577 real*8 ysm1(ly1) 578 579 nel = nelfld(ifield) 580 n = nel*lx1*ly1*lz1 581 nxyz = lx1*ly1*lz1 582 583 call copy (d,bm1,n) ! Mass matrix preconditioning full mass matrix 584 call dssum (d,nx1,ny1,nz1) 585 call invcol1 (d,n) 586 return 587 588 call dsset(lx1,ly1,lz1) 589 590 do 1000 e=1,nel 591 592 call rzero(d(1,1,1,e),nxyz) 593 594 do 320 iz=1,lz1 595 do 320 iy=1,ly1 596 do 320 ix=1,lx1 597 do 320 iq=1,lx1 598 d(ix,iy,iz,e) = d(ix,iy,iz,e) 599 $ + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2 600 $ + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2 601 $ + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2 602 320 continue 603C 604C Add cross terms if element is deformed. 605C 606 if (lxyz.gt.0) then 607 608 do i2=1,ly1,ly1-1 609 do i1=1,lx1,lx1-1 610 d(1,i1,i2,e) = d(1,i1,i2,e) 611 $ + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1) 612 $ + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2) 613 d(lx1,i1,i2,e) = d(lx1,i1,i2,e) 614 $ + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1) 615 $ + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2) 616 d(i1,1,i2,e) = d(i1,1,i2,e) 617 $ + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1) 618 $ + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2) 619 d(i1,ly1,i2,e) = d(i1,ly1,i2,e) 620 $ + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1) 621 $ + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2) 622 d(i1,i2,1,e) = d(i1,i2,1,e) 623 $ + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1) 624 $ + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2) 625 d(i1,i2,lz1,e) = d(i1,i2,lz1,e) 626 $ + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1) 627 $ + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2) 628 629 enddo 630 enddo 631 endif 632 633 do i=1,lxyz 634 d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e) 635 enddo 636 637 1000 continue ! element loop 638 639C If axisymmetric, add a diagonal term in the radial direction (ISD=2) 640 641 if (ifaxis.and.(isd.eq.2)) then 642 do 1200 e=1,nel 643 if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1) 644 k=0 645 do 1190 j=1,ly1 646 do 1190 i=1,lx1 647 k=k+1 648 if (ym1(i,j,1,e).ne.0.) then 649 term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2 650 if (ifrzer(e)) then 651 term2 = wxm1(i)*wam1(1)*dam1(1,j) 652 $ *jacm1(i,1,1,e)/ysm1(i) 653 else 654 term2 = 0. 655 endif 656 d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2) 657 endif 658 1190 continue 659 1200 continue 660 661 endif 662 call dssum (d,nx1,ny1,nz1) 663 call invcol1 (d,n) 664 665 if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1) 666 1 format(i9,1p4e12.4,' diag prec') 667 668 return 669 end 670C----------------------------------------------------------------------- 671 subroutine setprecn_bp3 (d,h1,h2) 672C Generate dummy diagonal preconditioner for Helmholtz operator 673C Input: h1,h2 Output: d 674 include 'SIZE' 675 include 'TOTAL' 676 677 parameter (n=lx1*ly1*lz1*lelt) 678 real*8 d(n),h1(n),h2(n) 679 680 call rone (d,n) 681 682 return 683 end 684C----------------------------------------------------------------------- 685 subroutine userchk 686 include 'SIZE' 687 include 'TOTAL' 688 689 integer bp 690 691 call get_bp(bp) 692 693 if (bp==1) then 694 if (istep.gt.0) call bp1 695 elseif (bp==3) then 696 if (istep.gt.0) call bp3 697 else 698 write(6,*) "INVALID BP SPECIFICED" 699 endif 700 701 return 702 end 703C----------------------------------------------------------------------- 704 subroutine bp1 705C Solution to BP1 using libCEED 706 include 'SIZE' 707 include 'TOTAL' 708 include 'CTIMER' ! ifsync 709 include 'FDMH1' 710 include 'ceedf.h' 711 712 parameter (lzq=lx1+1) 713 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 714 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 715 716 parameter (lt=lx1*ly1*lz1*lelt) 717 parameter (ld=lxd*lyd*lzd*lelt) 718 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 719 common /vcrny/ e1(lt) 720 common /vcrvh/ h1(lt),h2(lx*lelt),pap(3) 721 real*8 coords(ldim*lx*lelt) 722 723 logical ifh3 724 integer*8 nnode 725 integer ceed,err,test 726 character*64 spec 727 728 integer p,q,ncompx,ncompu,enode,lnode 729 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 730 integer stridesu(3),stridesx(3),stridesw(3) 731 integer erstrctu,erstrctx,erstrctw 732 integer basisu,basisx 733 integer qf_mass,qf_setup 734 integer op_mass,op_setup 735 real*8 x,y,z 736 integer*8 offset 737 738 external massf,masssetupf 739 740 ifield = 1 741 nxq = nx1+1 742 n = nx1*ny1*nz1*nelt 743 744 ifsync = .false. 745 746C Set up coordinates 747 ii=0 748 do j=0,nelt-1 749 do i=1,lx 750 ii=ii+1 751 x = xm1(ii,1,1,1) 752 y = ym1(ii,1,1,1) 753 z = zm1(ii,1,1,1) 754 coords(i+0*lx+3*j*lx)=x 755 coords(i+1*lx+3*j*lx)=y 756 coords(i+2*lx+3*j*lx)=z 757 enddo 758 enddo 759 760C Init ceed library 761 call get_spec(spec) 762 call ceedinit(trim(spec)//char(0),ceed,err) 763 764 call get_test(test) 765 766C Set up Nek geometry data 767 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 768 call set_h2_as_rhoJac_GL(h2,bmq,nxq) 769 770C Set up true soln 771 call dist_fld_h1 (e1) 772 call copy (h1,e1,n) ! Save exact soln in h1 773 774C Set up solver parameters 775 tol = 1e-10 776 param(22) = tol 777 maxit = 100 778 779 call nekgsync() 780 781C Create ceed basis for mesh and computation 782 p=nx1 783 q=p+1 784 ncompu=1 785 ncompx=ldim 786 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q, 787 $ ceed_gauss,basisx,err) 788 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q, 789 $ ceed_gauss,basisu,err) 790 791C Create ceed element restrictions for mesh and computation 792 enode=p**ldim 793 lnode=enode*nelt*ncompu 794 stridesx=[1,enode,enode*ldim] 795 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,lnode, 796 $ ldim,stridesx,erstrctx,err) 797 stridesu=[1,enode,enode*ncompu] 798 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,lnode, 799 $ ncompu,stridesu,erstrctu,err) 800 call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim, 801 $ nelt*q**ldim,1,ceed_strides_backend,erstrctw,err) 802 803C Create ceed vectors 804 call ceedvectorcreate(ceed,lnode,vec_p1,err) 805 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 806 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 807 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 808 call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err) 809 810 offset=0 811 call ceedvectorsetarray(vec_coords,ceed_mem_host, 812 $ ceed_use_pointer,coords,offset,err) 813 814C Create ceed qfunctions for masssetupf and massf 815 call ceedqfunctioncreateinterior(ceed,1,masssetupf, 816 $ EXAMPLE_DIR 817 $ //'bps/bps.h:masssetupf',qf_setup,err) 818 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 819 $ ceed_eval_interp,err) 820 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 821 $ ceed_eval_grad,err) 822 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 823 $ ceed_eval_weight,err) 824 call ceedqfunctionaddoutput(qf_setup,'qdata',ncompu, 825 $ ceed_eval_none,err) 826 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 827 $ ceed_eval_interp,err) 828 829 call ceedqfunctioncreateinterior(ceed,1,massf, 830 $ EXAMPLE_DIR 831 $ //'bps/bps.h:massf',qf_mass,err) 832 call ceedqfunctionaddinput(qf_mass,'u',ncompu, 833 $ ceed_eval_interp,err) 834 call ceedqfunctionaddinput(qf_mass,'qdata',ncompu, 835 $ ceed_eval_none,err) 836 call ceedqfunctionaddoutput(qf_mass,'v',ncompu, 837 $ ceed_eval_interp,err) 838 839C Create ceed operators 840 call ceedoperatorcreate(ceed,qf_setup, 841 $ ceed_qfunction_none,ceed_qfunction_none,op_setup,err) 842 call ceedoperatorsetfield(op_setup,'x',erstrctx, 843 $ basisx,ceed_vector_active,err) 844 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 845 $ basisx,ceed_vector_active,err) 846 call ceedoperatorsetfield(op_setup,'weight', 847 $ ceed_elemrestriction_none,basisx,ceed_vector_none,err) 848 call ceedoperatorsetfield(op_setup,'qdata',erstrctw, 849 $ ceed_basis_collocated,ceed_vector_active,err) 850 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 851 $ basisu,vec_rhs,err) 852 853 call ceedoperatorcreate(ceed,qf_mass, 854 $ ceed_qfunction_none,ceed_qfunction_none,op_mass,err) 855 call ceedoperatorsetfield(op_mass,'u',erstrctu, 856 $ basisu,ceed_vector_active,err) 857 call ceedoperatorsetfield(op_mass,'qdata',erstrctw, 858 $ ceed_basis_collocated,vec_qdata,err) 859 call ceedoperatorsetfield(op_mass,'v',erstrctu, 860 $ basisu,ceed_vector_active,err) 861 862C Compute setup data 863 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 864 $ ceed_use_pointer,r1,offset,err) 865 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 866 $ ceed_request_immediate,err) 867 868C Set up true RHS 869 call dssum (r1,nx1,ny1,nz1) ! r1 870 871C Set up algebraic RHS with libCEED 872 call ceedvectorsetarray(vec_p1,ceed_mem_host, 873 $ ceed_use_pointer,h1,offset,err) 874 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 875 $ ceed_use_pointer,r2,offset,err) 876 call ceedoperatorapply(op_mass,vec_p1,vec_ap1, 877 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 878 call dssum (r2,nx1,ny1,nz1) 879 880C Set up algebraic RHS with Nek5000 881 call axhm1 (pap,r3,h1,h1,h2,'bp1') ! r3 = A_nek5k*h1 882 call dssum (r3,nx1,ny1,nz1) 883 884 call nekgsync() 885 886C Solve true RHS 887 if (nid.eq.0) write (6,*) "libCEED true RHS" 888 tstart = dnekclock() 889 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass, 890 $ vec_p1,vec_ap1,maxit,'bp1') 891 tstop = dnekclock() 892 893C Output 894 telaps = (tstop-tstart) 895 maxits = maxit 896 er1 = glrdif(u1,e1,n) 897 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 898 899 if (test.eq.1.and.nid.eq.0) then 900 if (maxit>=100) then 901 write(6,*) "UNCONVERGED CG" 902 endif 903 if (dabs(er1)>5e-3) then 904 write(6,*) "ERROR IS TOO LARGE" 905 endif 906 endif 907 908 nx = nx1-1 909 nnode = nelgt ! nnodes 910 nnode = nnode*(nx**ldim) ! nnodes 911 nppp = nnode/np ! nnodes/proc 912 913 dofps = nnode/telaps ! DOF/sec - scalar form 914 titers = telaps/maxits ! time per iteration 915 tppp_s = titers/nppp ! time per iteraton per local point 916 917 if (nid.eq.0) write(6,1) 'case scalar:' 918 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 919 920C Solve libCEED algebraic RHS 921 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 922 maxit = 100 923 tstart = dnekclock() 924 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass, 925 $ vec_p1,vec_ap1,maxit,'bp1') 926 tstop = dnekclock() 927 928C Output 929 telaps = (tstop-tstart) 930 maxits = maxit 931 er1 = glrdif(u1,e1,n) 932 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 933 934 if (test.eq.1.and.nid.eq.0) then 935 if (maxit>=100) then 936 write(6,*) "UNCONVERGED CG" 937 endif 938 if (dabs(er1)>1e-5) then 939 write(6,*) "ERROR IS TOO LARGE" 940 endif 941 endif 942 943 nx = nx1-1 944 nnode = nelgt ! nnodes 945 nnode = nnode*(nx**ldim) ! nnodes 946 nppp = nnode/np ! nnodes/proc 947 948 dofps = nnode/telaps ! DOF/sec - scalar form 949 titers = telaps/maxits ! time per iteration 950 tppp_s = titers/nppp ! time per iteraton per local point 951 952 if (nid.eq.0) write(6,1) 'case scalar:' 953 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 954 955C Solve Nek5000 algebraic RHS 956 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 957 maxit = 100 958 tstart = dnekclock() 959 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass, 960 $ vec_p1,vec_ap1,maxit,'bp1') 961 tstop = dnekclock() 962 963C Output 964 telaps = (tstop-tstart) 965 maxits = maxit 966 er1 = glrdif(u1,e1,n) 967 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 968 969 if (test.eq.1.and.nid.eq.0) then 970 if (maxit>=100) then 971 write(6,*) "UNCONVERGED CG" 972 endif 973 if (dabs(er1)>1e-5) then 974 write(6,*) "ERROR IS TOO LARGE" 975 endif 976 endif 977 978 nx = nx1-1 979 nnode = nelgt ! nnodes 980 nnode = nnode*(nx**ldim) ! nnodes 981 nppp = nnode/np ! nnodes/proc 982 983 dofps = nnode/telaps ! DOF/sec - scalar form 984 titers = telaps/maxits ! time per iteration 985 tppp_s = titers/nppp ! time per iteraton per local point 986 987 if (nid.eq.0) write(6,1) 'case scalar:' 988 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 989 990 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 991 3 format(i3,i9,e12.4,1x,a8,i9) 992 993C Destroy ceed handles 994 call ceedvectordestroy(vec_p1,err) 995 call ceedvectordestroy(vec_ap1,err) 996 call ceedvectordestroy(vec_rhs,err) 997 call ceedvectordestroy(vec_qdata,err) 998 call ceedvectordestroy(vec_coords,err) 999 call ceedelemrestrictiondestroy(erstrctu,err) 1000 call ceedelemrestrictiondestroy(erstrctx,err) 1001 call ceedelemrestrictiondestroy(erstrctw,err) 1002 call ceedbasisdestroy(basisu,err) 1003 call ceedbasisdestroy(basisx,err) 1004 call ceedqfunctiondestroy(qf_setup,err) 1005 call ceedqfunctiondestroy(qf_mass,err) 1006 call ceedoperatordestroy(op_setup,err) 1007 call ceedoperatordestroy(op_mass,err) 1008 call ceeddestroy(ceed,err) 1009 1010 return 1011 end 1012C----------------------------------------------------------------------- 1013 subroutine bp3 1014C Solution to BP3 using libCEED 1015 include 'SIZE' 1016 include 'TOTAL' 1017 include 'CTIMER' ! ifsync 1018 include 'FDMH1' 1019 include 'ceedf.h' 1020 1021 parameter (lzq=lx1+1) 1022 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1023 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 1024 1025 parameter (lt=lx1*ly1*lz1*lelt) 1026 parameter (ld=lxd*lyd*lzd*lelt) 1027 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 1028 common /vcrny/ e1(lt) 1029 common /vcrvh/ h1(lt),h2(ld),pap(3) 1030 real*8 coords(ldim*lx*lelt) 1031 1032 logical ifh3 1033 integer*8 nnode 1034 integer ceed,err,test 1035 character*64 spec 1036 1037 integer p,q,ncompx,ncompu,enode,lnode 1038 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 1039 integer stridesu(3),stridesx(3),stridesw(3) 1040 integer erstrctu,erstrctx,erstrctw 1041 integer basisu,basisx 1042 integer qf_diffusion,qf_setup 1043 integer op_diffusion,op_setup 1044 integer ii,i,ngeo 1045 real*8 x,y,z 1046 integer*8 offset 1047 1048 external diffusionf,diffsetupf 1049 1050 ifield = 1 1051 nxq = nx1+1 1052 n = nx1*ny1*nz1*nelt 1053 1054 ifsync = .false. 1055 1056C Set up coordinates and mask 1057 ii=0 1058 do j=0,nelt-1 1059 do i=1,lx 1060 ii=ii+1 1061 x = xm1(ii,1,1,1) 1062 y = ym1(ii,1,1,1) 1063 z = zm1(ii,1,1,1) 1064 coords(i+0*lx+3*j*lx)=x 1065 coords(i+1*lx+3*j*lx)=y 1066 coords(i+2*lx+3*j*lx)=z 1067 if ( x.eq.0.or.x.eq.1 1068 $ .or.y.eq.0.or.y.eq.1 1069 $ .or.z.eq.0.or.z.eq.1 ) then 1070 h2(ii)=0. 1071 else 1072 h2(ii)=1. 1073 endif 1074 enddo 1075 enddo 1076 1077C Init ceed library 1078 call get_spec(spec) 1079 call ceedinit(trim(spec)//char(0),ceed,err) 1080 1081 call get_test(test) 1082 1083C Set up Nek geometry data 1084 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 1085 1086C Set up true soln 1087 call sin_fld_h1 (e1) 1088 call xmask1 (e1,h2,nelt) 1089 call copy (h1,e1,n) ! Save exact soln in h1 1090 1091C Set up solver parameters 1092 tol = 1e-10 1093 param(22) = tol 1094 maxit = 100 1095 1096 call nekgsync() 1097 1098C Create ceed basis for mesh and computation 1099 p=nx1 1100 q=p+1 1101 ncompu=1 1102 ncompx=ldim 1103 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q, 1104 $ ceed_gauss,basisx,err) 1105 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q, 1106 $ ceed_gauss,basisu,err) 1107 1108C Create ceed element restrictions for mesh and computation 1109 enode=p**ldim 1110 lnode=enode*nelt*ncompu 1111 ngeo=(ldim*(ldim+1))/2 1112 stridesx=[1,enode,enode*ldim] 1113 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,lnode, 1114 $ ldim,stridesx,erstrctx,err) 1115 stridesu=[1,enode,enode*ncompu] 1116 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,lnode, 1117 $ ncompu,stridesu,erstrctu,err) 1118 stridesw=[1,q**ldim,ngeo*q**ldim] 1119 call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim, 1120 $ nelt*q**ldim,ngeo,stridesw,erstrctw,err) 1121 1122C Create ceed vectors 1123 call ceedvectorcreate(ceed,lnode,vec_p1,err) 1124 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 1125 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 1126 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 1127 call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err) 1128 1129 offset=0 1130 call ceedvectorsetarray(vec_coords,ceed_mem_host, 1131 $ ceed_use_pointer,coords,offset,err) 1132 1133C Create ceed qfunctions for diffsetupf and diffusionf 1134 call ceedqfunctioncreateinterior(ceed,1,diffsetupf, 1135 $ EXAMPLE_DIR 1136 $ //'bps/bps.h:diffsetupf'//char(0),qf_setup,err) 1137 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 1138 $ ceed_eval_interp,err) 1139 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 1140 $ ceed_eval_grad,err) 1141 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 1142 $ ceed_eval_weight,err) 1143 call ceedqfunctionaddoutput(qf_setup,'qdata',ngeo, 1144 $ ceed_eval_none,err) 1145 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 1146 $ ceed_eval_interp,err) 1147 1148 call ceedqfunctioncreateinterior(ceed,1,diffusionf, 1149 $ EXAMPLE_DIR 1150 $ //'bps/bps.h:diffusionf'//char(0),qf_diffusion,err) 1151 call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim, 1152 $ ceed_eval_grad,err) 1153 call ceedqfunctionaddinput(qf_diffusion,'qdata',ngeo, 1154 $ ceed_eval_none,err) 1155 call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim, 1156 $ ceed_eval_grad,err) 1157 1158C Create ceed operators 1159 call ceedoperatorcreate(ceed,qf_setup, 1160 $ ceed_qfunction_none,ceed_qfunction_none,op_setup,err) 1161 call ceedoperatorsetfield(op_setup,'x',erstrctx, 1162 $ basisx,ceed_vector_active,err) 1163 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 1164 $ basisx,ceed_vector_active,err) 1165 call ceedoperatorsetfield(op_setup,'weight', 1166 $ ceed_elemrestriction_none,basisx,ceed_vector_none,err) 1167 call ceedoperatorsetfield(op_setup,'qdata',erstrctw, 1168 $ ceed_basis_collocated,ceed_vector_active,err) 1169 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 1170 $ basisu,vec_rhs,err) 1171 1172 call ceedoperatorcreate(ceed,qf_diffusion, 1173 $ ceed_qfunction_none,ceed_qfunction_none,op_diffusion,err) 1174 call ceedoperatorsetfield(op_diffusion,'u',erstrctu, 1175 $ basisu,ceed_vector_active,err) 1176 call ceedoperatorsetfield(op_diffusion,'qdata',erstrctw, 1177 $ ceed_basis_collocated,vec_qdata,err) 1178 call ceedoperatorsetfield(op_diffusion,'v',erstrctu, 1179 $ basisu,ceed_vector_active,err) 1180 1181C Compute setup data 1182 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 1183 $ ceed_use_pointer,r1,offset,err) 1184 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 1185 $ ceed_request_immediate,err) 1186 1187C Set up true RHS 1188 call dssum (r1,nx1,ny1,nz1) ! r1 1189 call xmask1 (r1,h2,nelt) 1190 1191C Set up algebraic RHS with libCEED 1192 call ceedvectorsetarray(vec_p1,ceed_mem_host, 1193 $ ceed_use_pointer,h1,offset,err) 1194 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 1195 $ ceed_use_pointer,r2,offset,err) 1196 call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1, 1197 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 1198 call dssum (r2,nx1,ny1,nz1) 1199 call xmask1 (r2,h2,nelt) 1200 1201C Set up algebraic RHS with Nek5000 1202 call axhm1 (pap,r3,h1,h1,h2,'bp3') ! r3 = A_nek5k*h1 1203 call dssum (r3,nx1,ny1,nz1) 1204 call xmask1 (r3,h2,nelt) 1205 1206 call nekgsync() 1207 1208C Solve true RHS 1209 if (nid.eq.0) write (6,*) "libCEED true RHS" 1210 tstart = dnekclock() 1211 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1212 $ vec_p1,vec_ap1,maxit,'bp3') 1213 tstop = dnekclock() 1214 1215C Output 1216 telaps = (tstop-tstart) 1217 maxits = maxit 1218 er1 = glrdif(u1,e1,n) 1219 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1220 1221 if (test.eq.1.and.nid.eq.0) then 1222 if (maxit>=100) then 1223 write(6,*) "UNCONVERGED CG" 1224 endif 1225 if (dabs(er1)>1e-3) then 1226 write(6,*) "ERROR IS TOO LARGE" 1227 endif 1228 endif 1229 1230 nx = nx1-1 1231 nnode = nelgt ! nnodes 1232 nnode = nnode*(nx**ldim) ! nnodes 1233 nppp = nnode/np ! nnodes/proc 1234 1235 dofps = nnode/telaps ! DOF/sec - scalar form 1236 titers = telaps/maxits ! time per iteration 1237 tppp_s = titers/nppp ! time per iteraton per local point 1238 1239 if (nid.eq.0) write(6,1) 'case scalar:' 1240 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1241 1242C Solve libCEED algebraic RHS 1243 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 1244 maxit = 100 1245 tstart = dnekclock() 1246 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1247 $ vec_p1,vec_ap1,maxit,'bp3') 1248 tstop = dnekclock() 1249 1250C Output 1251 telaps = (tstop-tstart) 1252 maxits = maxit 1253 er1 = glrdif(u1,e1,n) 1254 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1255 1256 if (test.eq.1.and.nid.eq.0) then 1257 if (maxit>=100) then 1258 write(6,*) "UNCONVERGED CG" 1259 endif 1260 if (dabs(er1)>1e-9) then 1261 write(6,*) "ERROR IS TOO LARGE" 1262 endif 1263 endif 1264 1265 nx = nx1-1 1266 nnode = nelgt ! nnodes 1267 nnode = nnode*(nx**ldim) ! nnodes 1268 nppp = nnode/np ! nnodes/proc 1269 1270 dofps = nnode/telaps ! DOF/sec - scalar form 1271 titers = telaps/maxits ! time per iteration 1272 tppp_s = titers/nppp ! time per iteraton per local point 1273 1274 if (nid.eq.0) write(6,1) 'case scalar:' 1275 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1276 1277C Solve Nek5000 algebraic RHS 1278 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 1279 maxit = 100 1280 tstart = dnekclock() 1281 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1282 $ vec_p1,vec_ap1,maxit,'bp3') 1283 tstop = dnekclock() 1284 1285C Output 1286 telaps = (tstop-tstart) 1287 maxits = maxit 1288 er1 = glrdif(u1,e1,n) 1289 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1290 1291 if (test.eq.1.and.nid.eq.0) then 1292 if (maxit>=100) then 1293 write(6,*) "UNCONVERGED CG" 1294 endif 1295 if (dabs(er1)>1e-9) then 1296 write(6,*) "ERROR IS TOO LARGE" 1297 endif 1298 endif 1299 1300 nx = nx1-1 1301 nnode = nelgt ! nnodes 1302 nnode = nnode*(nx**ldim) ! nnodes 1303 nppp = nnode/np ! nnodes/proc 1304 1305 dofps = nnode/telaps ! DOF/sec - scalar form 1306 titers = telaps/maxits ! time per iteration 1307 tppp_s = titers/nppp ! time per iteraton per local point 1308 1309 if (nid.eq.0) write(6,1) 'case scalar:' 1310 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1311 1312 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 1313 3 format(i3,i9,e12.4,1x,a8,i9) 1314 1315C Destroy ceed handles 1316 call ceedvectordestroy(vec_p1,err) 1317 call ceedvectordestroy(vec_ap1,err) 1318 call ceedvectordestroy(vec_rhs,err) 1319 call ceedvectordestroy(vec_qdata,err) 1320 call ceedvectordestroy(vec_coords,err) 1321 call ceedelemrestrictiondestroy(erstrctu,err) 1322 call ceedelemrestrictiondestroy(erstrctx,err) 1323 call ceedelemrestrictiondestroy(erstrctw,err) 1324 call ceedbasisdestroy(basisu,err) 1325 call ceedbasisdestroy(basisx,err) 1326 call ceedqfunctiondestroy(qf_setup,err) 1327 call ceedqfunctiondestroy(qf_diffusion,err) 1328 call ceedoperatordestroy(op_setup,err) 1329 call ceedoperatordestroy(op_diffusion,err) 1330 call ceeddestroy(ceed,err) 1331 1332 return 1333 end 1334C----------------------------------------------------------------------- 1335 subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1, 1336 $ vec_ap1,maxit,bpname) 1337C Scalar conjugate gradient iteration for solution of uncoupled 1338C Helmholtz equations 1339C Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname 1340C Output: u1,maxit 1341 include 'SIZE' 1342 include 'TOTAL' 1343 include 'DOMAIN' 1344 include 'FDMH1' 1345 character*3 bpname 1346 1347C INPUT: rhs1 - rhs 1348C h1 - exact solution 1349 1350 parameter (lt=lx1*ly1*lz1*lelt) 1351 parameter (ld=lxd*lyd*lzd*lelt) 1352 real*8 u1(lt),r1(lt),h1(lt),h2(lt) 1353 real*8 rmult(1),binv(1) 1354 integer ceed,ceed_op,vec_ap1,vec_p1 1355 common /scrcg/ dpc(lt),p1(lt),z1(lt) 1356 common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4) 1357 1358 real*8 ap1(lt) 1359 equivalence (ap1,z1) 1360 1361 vol = volfld(ifield) 1362 nel = nelfld(ifield) 1363 nxyz = lx1*ly1*lz1 1364 n = nxyz*nel 1365 nx = nx1-1 ! Polynomial order (just for i/o) 1366 1367 tol=tin 1368 1369 if(bpname.ne.'bp1') then 1370 call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner 1371 else 1372 call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner 1373 endif 1374 1375 call rzero (u1,n) ! Initialize solution 1376 1377 wv(1)=0 1378 do i=1,n 1379 s=rmult(i) ! -1 1380 p1(i)=dpc(i)*r1(i) ! p = M r T 1381 wv(1)=wv(1)+s*p1(i)*r1(i) ! r p 1382 enddo 1383 call gop(wv(1),wk,'+ ',1) 1384 rpp1(1) = wv (1) 1385 1386 do 1000 iter=1,maxit 1387 call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op, 1388 $ vec_ap1,vec_p1) 1389 call dssum (ap1,nx1,ny1,nz1) 1390 if (bpname.ne.'bp1') call xmask1(ap1,h2,nel) 1391 1392 call gop (pap,wk,'+ ',1) 1393 alph(1) = rpp1(1)/pap(1) 1394 1395 do i=1,n 1396 u1(i)=u1(i)+alph(1)* p1(i) 1397 r1(i)=r1(i)-alph(1)*ap1(i) 1398 enddo 1399 1400C tolerance check here 1401 call rzero(wv,2) 1402 do i=1,n 1403 wv(1)=wv(1)+r1(i)*r1(i) ! L2 error estimate 1404 z1(i)=dpc(i)*r1(i) ! z = M r 1405 wv(2)=wv(2)+rmult(i)*z1(i)*r1(i) ! r z 1406 enddo 1407 call gop(wv,wk,'+ ',2) 1408 1409C if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1) 1410 1 format(i2,i9,i5,i4,1p1e12.4,' cggos') 1411 1412 enorm=sqrt(wv(1)) 1413 if (enorm.lt.tol) then 1414 ifin = iter 1415 if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol 1416 goto 9999 1417 endif 1418C if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha' 1419 2 format(i5,1p3e12.4,2x,a5) 1420 1421 rpp2(1)=rpp1(1) 1422 rpp1(1)=wv (2) 1423 beta1 =rpp1(1)/rpp2(1) 1424 do i=1,n 1425 p1(i)=z1(i) + beta1*p1(i) 1426 enddo 1427 1428 1000 continue 1429 1430 rbnorm=sqrt(wv(1)) 1431 if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol 1432 iter = iter-1 1433 1434 9999 continue 1435 1436 maxit=iter 1437 1438 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4) 1439 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6) 1440 1441 return 1442 end 1443C----------------------------------------------------------------------- 1444 subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op, 1445 $ vec_ap1,vec_p1) 1446C Vector conjugate gradient matvec for solution of uncoupled 1447C Helmholtz equations 1448C Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1 1449C Output: ap1 1450 include 'SIZE' 1451 include 'TOTAL' 1452 include 'ceedf.h' 1453 1454 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1455 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1456 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1457 1458 real*8 pap(3) 1459 real*8 ap1(lx,lelt) 1460 real*8 p1(lx,lelt) 1461 real*8 h1(lx,lelt),h2(lx,lelt) 1462 integer ceed,ceed_op,vec_ap1,vec_p1,err 1463 integer i,e 1464 integer*8 offset 1465 1466 offset=0 1467 call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer, 1468 $ p1,offset,err) 1469 call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer, 1470 $ ap1,offset,err) 1471 1472 call ceedoperatorapply(ceed_op,vec_p1,vec_ap1, 1473 $ ceed_request_immediate,err) 1474 1475 call ceedvectorsyncarray(vec_ap1,ceed_mem_host,err) 1476 1477 pap(1)=0. 1478 1479 do e=1,nelt 1480 do i=1,lx 1481 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1482 enddo 1483 enddo 1484 1485 return 1486 end 1487C----------------------------------------------------------------------- 1488 subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut) 1489C Local matrix-vector for solution of BP3 (stiffness matrix) 1490C Input: u,g,h1,h2,b,ju,us,ut Output: w 1491 include 'SIZE' 1492 include 'TOTAL' 1493 1494 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1495 real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz) 1496 real*8 ju(lxyz),us(lxyz),ut(lxyz) 1497 1498 nxq = nx1+1 ! Number of quadrature points 1499 1500 lxyzq = nxq**ldim 1501 1502 call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation 1503 do i=1,lxyzq 1504 ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts 1505 enddo 1506 call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u 1507 1508 return 1509 end 1510C----------------------------------------------------------------------- 1511 subroutine axhm1_bp1(pap,ap1,p1,h1,h2) 1512C Vector conjugate gradient matvec for solution of BP1 (mass matrix) 1513C Input: pap,p1,h1,h2 Output: ap1 1514 include 'SIZE' 1515 include 'TOTAL' 1516 1517 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1518 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1519 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1520 1521 real*8 pap(3) 1522 real*8 ap1(lx,lelt) 1523 real*8 p1(lx,lelt) 1524 real*8 h1(lx,lelt),h2(lx,lelt) 1525 1526 real*8 ur(lx),us(lx),ut(lx) 1527 common /ctmp1/ ur,us,ut 1528 1529 integer e 1530 1531 pap(1)=0. 1532 1533 k=1 1534 nxq = nx1+1 1535 1536 do e=1,nelt 1537 1538 call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1) 1539 $ ,bm1(1,1,1,e),ur,us,ut) 1540 do i=1,lx 1541 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1542 enddo 1543 k=k+nxq*nxq*nxq 1544 1545 enddo 1546 1547 return 1548 end 1549C----------------------------------------------------------------------- 1550 subroutine ax_e_bp3(w,u,g,ur,us,ut,wk) 1551C Local matrix-vector for solution of BP3 (stiffness matrix) 1552C Input: u,g,ur,us,ut,wk Output: w 1553 include 'SIZE' 1554 include 'TOTAL' 1555 1556 parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq) 1557 1558 common /ctmp0/ tmp(lxyzq) 1559 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 1560 1561 real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq) 1562 real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq) 1563 1564 n = lzq-1 1565 1566 call intp_rstd (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation 1567 call loc_grad3 (ur,us,ut,wk,n,dxmq,dxtmq) 1568 1569 do i=1,lxyzq 1570 wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i) 1571 ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i) 1572 wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i) 1573 ur(i) = wr 1574 us(i) = ws 1575 ut(i) = wt 1576 enddo 1577 1578 call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp) 1579 call intp_rstd (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u 1580 1581 return 1582 end 1583C----------------------------------------------------------------------- 1584 subroutine axhm1_bp3(pap,ap1,p1,h1,h2) 1585C Vector conjugate gradient matvec for solution of BP3 (stiffness matrix) 1586C Input: pap,p1,h1,h2 Output: ap1 1587 include 'SIZE' 1588 include 'TOTAL' 1589 1590 parameter (lzq=lx1+1) 1591 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1592 common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq) 1593 1594 real*8 pap(3) 1595 real*8 ap1(lx,lelt) 1596 real*8 p1(lx,lelt) 1597 real*8 h1(lx,lelt),h2(lx,lelt) 1598 1599 common /ctmp1/ ur,us,ut,wk 1600 real*8 ur(lq),us(lq),ut(lq),wk(lq) 1601 1602 integer e 1603 1604 pap(1)=0. 1605 1606 do e=1,nelt 1607 1608 call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk) 1609 do i=1,lx 1610 pap(1)=pap(1)+p1(i,e)*ap1(i,e) 1611 enddo 1612 1613 enddo 1614 1615 return 1616 end 1617C----------------------------------------------------------------------- 1618 subroutine axhm1(pap,ap1,p1,h1,h2,bpname) 1619C Vector conjugate gradient matvec for solution of uncoupled 1620C Helmholtz equations 1621C Input: pap,p1,h1,h2,bpname Output: ap1 1622 include 'SIZE' 1623 include 'TOTAL' 1624 1625 parameter (lx=lx1*ly1*lz1) 1626 1627 real*8 pap(3),ap1(lx,lelt),p1(lx,lelt) 1628 real*8 h1(lx,lelt),h2(lx,lelt) 1629 1630 character*3 bpname 1631 1632 if (bpname.eq.'bp1') then 1633 call axhm1_bp1(pap,ap1,p1,h1,h2) 1634 1635 elseif (bpname.eq.'bp3') then 1636 call axhm1_bp3(pap,ap1,p1,h1,h2) 1637 1638 else 1639 write(6,*) bpname,' axhm1 bpname error' 1640 stop 1641 1642 endif 1643 1644 return 1645 end 1646C----------------------------------------------------------------------- 1647 subroutine get_bp(bp) 1648C Get BP to run 1649C Input: Output: bp 1650 integer i,bp 1651 character*64 bpval 1652 1653 bp=0 1654 if(iargc().ge.1) then 1655 call getarg(1,bpval) 1656 endif 1657 if(bpval.eq."bp1") then 1658 bp=1 1659 elseif(bpval.eq."bp3") then 1660 bp=3 1661 endif 1662 1663 return 1664 end 1665C----------------------------------------------------------------------- 1666 subroutine get_spec(spec) 1667C Get CEED backend specification 1668C Input: Output: spec 1669 integer i 1670 character*64 spec 1671 1672 spec = '/cpu/self' 1673 if(iargc().ge.2) then 1674 call getarg(2,spec) 1675 endif 1676 1677 return 1678 end 1679C----------------------------------------------------------------------- 1680 subroutine get_test(test) 1681C Get test mode flag 1682C Input: Output: test 1683 integer i,test 1684 character*64 testval 1685 1686 test=0 1687 if(iargc().ge.3) then 1688 call getarg(3,testval) 1689 endif 1690 if(testval.eq."test") then 1691 test=1 1692 endif 1693 1694 return 1695 end 1696C----------------------------------------------------------------------- 1697 1698