1C Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2C the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3C reserved. See files LICENSE and NOTICE for details. 4C 5C This file is part of CEED, a collection of benchmarks, miniapps, software 6C libraries and APIs for efficient high-order finite element and spectral 7C element discretizations for exascale applications. For more information and 8C source code availability see http://github.com/ceed. 9C 10C The CEED research is supported by the Exascale Computing Project (17-SC-20-SC) 11C a collaborative effort of two U.S. Department of Energy organizations (Office 12C of Science and the National Nuclear Security Administration) responsible for 13C the planning and preparation of a capable exascale ecosystem, including 14C software, applications, hardware, advanced system engineering and early 15C testbed platforms, in support of the nation's exascale computing imperative. 16 17C> @file 18C> Mass and diffusion operators examples using Nek5000 19C TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test 20 21C----------------------------------------------------------------------- 22 subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 23 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 24 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 25C Set up mass operator 26C Input: u1,u2,u3,q Output: v1,v2,ierr 27 integer q,ierr 28 real*8 ctx(1) 29 real*8 u1(3*q) 30 real*8 u2(9*q) 31 real*8 u3(q) 32 real*8 v1(q) 33 real*8 v2(q) 34 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 35 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 36 real*8 jacmq 37 38C Quadrature Point Loop 39 do i=1,q 40 a11=u2(i+q*0) 41 a12=u2(i+q*3) 42 a13=u2(i+q*6) 43 44 a21=u2(i+q*1) 45 a22=u2(i+q*4) 46 a23=u2(i+q*7) 47 48 a31=u2(i+q*2) 49 a32=u2(i+q*5) 50 a33=u2(i+q*8) 51 52 g11 = (a22*a33-a23*a32) 53 g12 = (a13*a32-a33*a12) 54 g13 = (a12*a23-a22*a13) 55 56 g21 = (a23*a31-a21*a33) 57 g22 = (a11*a33-a31*a13) 58 g23 = (a13*a21-a23*a11) 59 60 g31 = (a21*a32-a22*a31) 61 g32 = (a12*a31-a32*a11) 62 g33 = (a11*a22-a21*a12) 63 64 jacmq = a11*g11+a21*g12+a31*g13 65 66C Rho 67 v1(i)=u3(i)*jacmq 68 69C RHS 70 v2(i)=u3(i)*jacmq 71 $ *dsqrt(u1(i+q*0)*u1(i+q*0) 72 $ +u1(i+q*1)*u1(i+q*1) 73 $ +u1(i+q*2)*u1(i+q*2)) 74 enddo 75 76 ierr=0 77 end 78C----------------------------------------------------------------------- 79 subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 80 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 81 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 82C Apply mass operator 83C Input: u1,u2,q Output: v1,ierr 84 integer q,ierr 85 real*8 ctx(1) 86 real*8 u1(q) 87 real*8 u2(q) 88 real*8 v1(q) 89 90C Quadrature Point Loop 91 do i=1,q 92 v1(i)=u2(i)*u1(i) 93 enddo 94 95 ierr=0 96 end 97C----------------------------------------------------------------------- 98 subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 99 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 100 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 101C Set up diffusion operator 102C Input: u1,u2,u3,q Output: v1,v2,ierr 103 integer q,ierr 104 real*8 ctx(1) 105 real*8 u1(3*q) 106 real*8 u2(9*q) 107 real*8 u3(q) 108 real*8 v1(6*q) 109 real*8 v2(q) 110 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 111 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 112 real*8 jacmq,scl 113 real*8 c(3),k(3) 114 115C Quadrature Point Loop 116 do i=1,q 117 pi = 3.14159265358979323846 118 119 c(1)=0. 120 c(2)=1. 121 c(3)=2. 122 k(1)=1. 123 k(2)=2. 124 k(3)=3. 125 126 a11=u2(i+q*0) 127 a12=u2(i+q*3) 128 a13=u2(i+q*6) 129 130 a21=u2(i+q*1) 131 a22=u2(i+q*4) 132 a23=u2(i+q*7) 133 134 a31=u2(i+q*2) 135 a32=u2(i+q*5) 136 a33=u2(i+q*8) 137 138 g11 = (a22*a33-a23*a32) 139 g12 = (a13*a32-a33*a12) 140 g13 = (a12*a23-a22*a13) 141 142 g21 = (a23*a31-a21*a33) 143 g22 = (a11*a33-a31*a13) 144 g23 = (a13*a21-a23*a11) 145 146 g31 = (a21*a32-a22*a31) 147 g32 = (a12*a31-a32*a11) 148 g33 = (a11*a22-a21*a12) 149 150 jacmq = a11*g11+a21*g12+a31*g13 151 152 scl = u3(i)/jacmq 153 154C Geometric factors 155 v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr 156 v1(i+1*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs 157 v1(i+2*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt 158 v1(i+3*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss 159 v1(i+4*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst 160 v1(i+5*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt 161 162C RHS 163 v2(i) = u3(i)*jacmq*pi*pi 164 $ *dsin(pi*(c(1)+k(1)*u1(i+0*q))) 165 $ *dsin(pi*(c(2)+k(2)*u1(i+1*q))) 166 $ *dsin(pi*(c(3)+k(3)*u1(i+2*q))) 167 $ *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3)) 168 169 enddo 170 171 ierr=0 172 end 173C----------------------------------------------------------------------- 174 subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 175 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 176 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 177C Apply diffusion operator 178C Input: u1,u2,q Output: v1,ierr 179 integer q,ierr 180 real*8 ctx(1) 181 real*8 u1(3*q) 182 real*8 u2(6*q) 183 real*8 v1(3*q) 184 185C Quadrature Point Loop 186 do i=1,q 187 v1(i+0*q)= 188 $ u2(i+0*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q) 189 v1(i+1*q)= 190 $ u2(i+1*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q) 191 v1(i+2*q)= 192 $ u2(i+2*q)*u1(i)+u2(i+4*q)*u1(i+q)+u2(i+5*q)*u1(i+2*q) 193 enddo 194 195 ierr=0 196 end 197C----------------------------------------------------------------------- 198 subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq) 199C Set h2 as rhoJac 200C Input: bmq,nxq Output: h2 201 include 'SIZE' 202 real*8 h2(1),bmq(1) 203 204 common /ctmp77/ wd(lxd),zd(lxd) 205 integer e,i,L 206 207 call zwgl(zd,wd,nxq) ! nxq = number of points 208 209 q = 1.0 ! Later, this can be a function of position... 210 211 L = 0 212 do e=1,lelt 213 do i=1,nxq**ldim 214 L=L+1 215 h2(L) = q*bmq(L) 216 enddo 217 enddo 218 219 return 220 end 221C----------------------------------------------------------------------- 222 subroutine dist_fld_h1(e) 223C Set distance initial condition for BP1 224C Input: Output: e 225 include 'SIZE' 226 include 'TOTAL' 227 real*8 x,y,z 228 real*8 e(1) 229 230 n=lx1*ly1*lz1*nelt 231 232 do i=1,n 233 x=xm1(i,1,1,1) 234 y=ym1(i,1,1,1) 235 z=zm1(i,1,1,1) 236 237 e(i) = dsqrt(x*x+y*y+z*z) 238 enddo 239 240 call dsavg(e) ! This is requisite for random fields 241 242 return 243 end 244C----------------------------------------------------------------------- 245 subroutine sin_fld_h1(e) 246C Set sine initial condition for BP3 247C Input: Output: e 248 include 'SIZE' 249 include 'TOTAL' 250 real*8 x,y,z 251 real*8 e(1) 252 real*8 c(3),k(3) 253 254 n=lx1*ly1*lz1*nelt 255 pi = 3.14159265358979323846 256 257 c(1)=0. 258 c(2)=1. 259 c(3)=2. 260 k(1)=1. 261 k(2)=2. 262 k(3)=3. 263 264 do i=1,n 265 x=xm1(i,1,1,1) 266 y=ym1(i,1,1,1) 267 z=zm1(i,1,1,1) 268 269 e(i) = dsin(pi*(c(1)+k(1)*x)) 270 & *dsin(pi*(c(2)+k(2)*y)) 271 & *dsin(pi*(c(3)+k(3)*z)) 272 273 enddo 274 275 call dsavg(e) ! This is requisite for random fields 276 277 return 278 end 279C----------------------------------------------------------------------- 280 subroutine uservp(ix,iy,iz,eg) ! set variable properties 281 include 'SIZE' 282 include 'TOTAL' 283 include 'NEKUSE' 284 integer e,f,eg 285C e = gllel(eg) 286 287 udiff = 0.0 288 utrans = 0.0 289 290 return 291 end 292C----------------------------------------------------------------------- 293 subroutine userf(ix,iy,iz,eg) ! set acceleration term 294C 295C Note: this is an acceleration term, NOT a force! 296C Thus, ffx will subsequently be multiplied by rho(x,t). 297C 298 include 'SIZE' 299 include 'TOTAL' 300 include 'NEKUSE' 301 integer e,f,eg 302C e = gllel(eg) 303 304 ffx = 0.0 305 ffy = 0.0 306 ffz = 0.0 307 308 return 309 end 310C----------------------------------------------------------------------- 311 subroutine userq(i,j,k,eg) ! set source term 312 include 'SIZE' 313 include 'TOTAL' 314 include 'NEKUSE' 315 integer e,f,eg 316 e = gllel(eg) 317 318 qvol = 0 319 320 return 321 end 322C----------------------------------------------------------------------- 323 subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions 324C NOTE ::: This subroutine MAY NOT be called by every process 325 include 'SIZE' 326 include 'TOTAL' 327 include 'NEKUSE' 328 integer e,f,eg 329 330 ux = 0.0 331 uy = 0.0 332 uz = 0.0 333 temp = 0.0 334 335 return 336 end 337C----------------------------------------------------------------------- 338 subroutine useric(ix,iy,iz,eg) ! set up initial conditions 339 include 'SIZE' 340 include 'TOTAL' 341 include 'NEKUSE' 342 integer e,f,eg 343 344 ux = 0.0 345 uy = 0.0 346 uz = 0.0 347 temp = 0.0 348 349 return 350 end 351C----------------------------------------------------------------------- 352 subroutine usrdat ! This routine to modify element vertices 353 include 'SIZE' 354 include 'TOTAL' 355 356 return 357 end 358C----------------------------------------------------------------------- 359 subroutine usrdat2 ! This routine to modify mesh coordinates 360 include 'SIZE' 361 include 'TOTAL' 362 363 x0 = 0 364 x1 = 1 365 call rescale_x(xm1,x0,x1) 366 call rescale_x(ym1,x0,x1) 367 call rescale_x(zm1,x0,x1) 368 369 param(59)=1 ! Force Nek to use the "deformed element" formulation 370 371 return 372 end 373C----------------------------------------------------------------------- 374 subroutine usrdat3 375 include 'SIZE' 376 include 'TOTAL' 377 378 return 379 end 380C----------------------------------------------------------------------- 381 subroutine xmask1 (r1,h2,nel) 382C Apply zero Dirichlet boundary conditions 383C Input: h2,nel Output: r1 384 include 'SIZE' 385 include 'TOTAL' 386 real*8 r1(1),h2(1) 387 388 n=nx1*ny1*nz1*nel 389 do i=1,n 390 r1(i)=r1(i)*h2(i) 391 enddo 392 393 return 394 end 395C----------------------------------------------------------------------- 396 function glrdif(x,y,n) 397C Compute Linfty norm of (x-y) 398C Input: x,y Output: n 399 real*8 x(n),y(n) 400 401 dmx=0 402 xmx=0 403 ymx=0 404 405 do i=1,n 406 diff=abs(x(i)-y(i)) 407 dmx =max(dmx,diff) 408 xmx =max(xmx,x(i)) 409 ymx =max(ymx,y(i)) 410 enddo 411 412 xmx = max(xmx,ymx) 413 dmx = glmax(dmx,1) ! max across processors 414 xmx = glmax(xmx,1) 415 416 if (xmx.gt.0) then 417 glrdif = dmx/xmx 418 else 419 glrdif = -dmx ! Negative indicates something strange 420 endif 421 422 return 423 end 424C----------------------------------------------------------------------- 425 subroutine loc_grad3(ur,us,ut,u,N,D,Dt) 426C 3D transpose of local gradient 427C Input: u,N,D,Dt Output: ur,us,ut 428 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 429 real*8 u (0:N,0:N,0:N) 430 real*8 D (0:N,0:N),Dt(0:N,0:N) 431 432 m1 = N+1 433 m2 = m1*m1 434 435 call mxm(D ,m1,u(0,0,0),m1,ur,m2) 436 do k=0,N 437 call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1) 438 enddo 439 call mxm(u(0,0,0),m2,Dt,m1,ut,m1) 440 441 return 442 end 443c----------------------------------------------------------------------- 444 subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w) 445C 3D transpose of local gradient 446C Input: ur,us,ut,N,D,Dt Output: u 447 real*8 u (0:N,0:N,0:N) 448 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 449 real*8 D (0:N,0:N),Dt(0:N,0:N) 450 real*8 w (0:N,0:N,0:N) 451 452 m1 = N+1 453 m2 = m1*m1 454 m3 = m1*m1*m1 455 456 call mxm(Dt,m1,ur,m1,u(0,0,0),m2) 457 do k=0,N 458 call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1) 459 enddo 460 call add2(u(0,0,0),w,m3) 461 call mxm(ut,m2,D ,m1,w,m1) 462 call add2(u(0,0,0),w,m3) 463 464 return 465 end 466C----------------------------------------------------------------------- 467 subroutine geodatq(gf,bmq,w3mq,nxq) 468C Routine to generate elemental geometric matrices on mesh 1 469C (Gauss-Legendre Lobatto mesh). 470 include 'SIZE' 471 include 'TOTAL' 472 473 parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim) 474 475 real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq) 476 477 common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd) 478 common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd) 479 $ , zr(lxyd),zs(lxyd),zt(lxyd) 480 481 common /ctmp77/ wd(lxd),zd(lxd) 482 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 483 484 integer e 485 real*8 tmp(lxyd) 486 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 487 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 488 real*8 jacmq 489 490 if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq) 491 492 call zwgl (zd,wd,lzq) ! nxq = number of points 493 call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp) 494 495 do k=1,nxq 496 do j=1,nxq 497 do i=1,nxq 498 w3mq(i,j,k) = wd(i)*wd(j)*wd(k) 499 enddo 500 enddo 501 enddo 502 503 nxyzq = nxq**ldim 504 nxqm1 = lzq-1 505 506 do e=1,nelt 507 call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation 508 call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq) 509 510 call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0) 511 call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq) 512 513 call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0) 514 call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq) 515 516 do i=1,nxyzq 517 a11 = xr(i) 518 a12 = xs(i) 519 a13 = xt(i) 520 521 a21 = yr(i) 522 a22 = ys(i) 523 a23 = yt(i) 524 525 a31 = zr(i) 526 a32 = zs(i) 527 a33 = zt(i) 528 529 g11 = (a22*a33-a23*a32) 530 g12 = (a13*a32-a33*a12) 531 g13 = (a12*a23-a22*a13) 532 533 g21 = (a23*a31-a21*a33) 534 g22 = (a11*a33-a31*a13) 535 g23 = (a13*a21-a23*a11) 536 537 g31 = (a21*a32-a22*a31) 538 g32 = (a12*a31-a32*a11) 539 g33 = (a11*a22-a21*a12) 540 541 jacmq = a11*g11+a21*g12+a31*g13 542 543 bmq(i,e) = w3mq(i,1,1)*jacmq 544 scale = w3mq(i,1,1)/jacmq 545 546 gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr 547 gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs 548 gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt 549 gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss 550 gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst 551 gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt 552 553 enddo 554 enddo 555 556 return 557 end 558C----------------------------------------------------------------------- 559 subroutine setprecn_bp1 (d,h1,h2) 560C Generate diagonal preconditioner for Helmholtz operator 561C Input: h1,h2 Output: d 562 include 'SIZE' 563 include 'TOTAL' 564 565 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 566 567 real*8 d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt) 568 integer e 569 570 real*8 gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data 571 equivalence (gf,g1m1) ! layout to g1m1...g6m1 572 573 real*8 ysm1(ly1) 574 575 nel = nelfld(ifield) 576 n = nel*lx1*ly1*lz1 577 nxyz = lx1*ly1*lz1 578 579 call copy (d,bm1,n) ! Mass matrix preconditioning full mass matrix 580 call dssum (d,nx1,ny1,nz1) 581 call invcol1 (d,n) 582 return 583 584 call dsset(lx1,ly1,lz1) 585 586 do 1000 e=1,nel 587 588 call rzero(d(1,1,1,e),nxyz) 589 590 do 320 iz=1,lz1 591 do 320 iy=1,ly1 592 do 320 ix=1,lx1 593 do 320 iq=1,lx1 594 d(ix,iy,iz,e) = d(ix,iy,iz,e) 595 $ + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2 596 $ + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2 597 $ + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2 598 320 continue 599C 600C Add cross terms if element is deformed. 601C 602 if (lxyz.gt.0) then 603 604 do i2=1,ly1,ly1-1 605 do i1=1,lx1,lx1-1 606 d(1,i1,i2,e) = d(1,i1,i2,e) 607 $ + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1) 608 $ + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2) 609 d(lx1,i1,i2,e) = d(lx1,i1,i2,e) 610 $ + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1) 611 $ + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2) 612 d(i1,1,i2,e) = d(i1,1,i2,e) 613 $ + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1) 614 $ + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2) 615 d(i1,ly1,i2,e) = d(i1,ly1,i2,e) 616 $ + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1) 617 $ + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2) 618 d(i1,i2,1,e) = d(i1,i2,1,e) 619 $ + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1) 620 $ + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2) 621 d(i1,i2,lz1,e) = d(i1,i2,lz1,e) 622 $ + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1) 623 $ + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2) 624 625 enddo 626 enddo 627 endif 628 629 do i=1,lxyz 630 d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e) 631 enddo 632 633 1000 continue ! element loop 634 635C If axisymmetric, add a diagonal term in the radial direction (ISD=2) 636 637 if (ifaxis.and.(isd.eq.2)) then 638 do 1200 e=1,nel 639 if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1) 640 k=0 641 do 1190 j=1,ly1 642 do 1190 i=1,lx1 643 k=k+1 644 if (ym1(i,j,1,e).ne.0.) then 645 term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2 646 if (ifrzer(e)) then 647 term2 = wxm1(i)*wam1(1)*dam1(1,j) 648 $ *jacm1(i,1,1,e)/ysm1(i) 649 else 650 term2 = 0. 651 endif 652 d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2) 653 endif 654 1190 continue 655 1200 continue 656 657 endif 658 call dssum (d,nx1,ny1,nz1) 659 call invcol1 (d,n) 660 661 if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1) 662 1 format(i9,1p4e12.4,' diag prec') 663 664 return 665 end 666C----------------------------------------------------------------------- 667 subroutine setprecn_bp3 (d,h1,h2) 668C Generate dummy diagonal preconditioner for Helmholtz operator 669C Input: h1,h2 Output: d 670 include 'SIZE' 671 include 'TOTAL' 672 673 parameter (n=lx1*ly1*lz1*lelt) 674 real*8 d(n),h1(n),h2(n) 675 676 call rone (d,n) 677 678 return 679 end 680C----------------------------------------------------------------------- 681 subroutine userchk 682 include 'SIZE' 683 include 'TOTAL' 684 685 integer bp 686 687 call get_bp(bp) 688 689 if (bp==1) then 690 if (istep.gt.0) call bp1 691 elseif (bp==3) then 692 if (istep.gt.0) call bp3 693 else 694 write(6,*) "INVALID BP SPECIFICED" 695 endif 696 697 return 698 end 699C----------------------------------------------------------------------- 700 subroutine bp1 701C Solution to BP1 using libCEED 702 include 'SIZE' 703 include 'TOTAL' 704 include 'CTIMER' ! ifsync 705 include 'FDMH1' 706 include 'ceedf.h' 707 708 parameter (lzq=lx1+1) 709 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 710 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 711 712 parameter (lt=lx1*ly1*lz1*lelt) 713 parameter (ld=lxd*lyd*lzd*lelt) 714 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 715 common /vcrny/ e1(lt) 716 common /vcrvh/ h1(lt),h2(lx*lelt),pap(3) 717 real*8 coords(ldim*lx*lelt) 718 719 logical ifh3 720 integer*8 nnode 721 integer ceed,err,test 722 character*64 spec 723 724 integer p,q,ncompx,ncompu,enode,lnode 725 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 726 integer erstrctu,erstrctx,erstrctw 727 integer basisu,basisx 728 integer qf_mass,qf_setup 729 integer op_mass,op_setup 730 real*8 x,y,z 731 integer*8 offset 732 733 external massf,masssetupf 734 735 ifield = 1 736 nxq = nx1+1 737 n = nx1*ny1*nz1*nelt 738 739 ifsync = .false. 740 741C Set up coordinates 742 ii=0 743 do j=0,nelt-1 744 do i=1,lx 745 ii=ii+1 746 x = xm1(ii,1,1,1) 747 y = ym1(ii,1,1,1) 748 z = zm1(ii,1,1,1) 749 coords(i+0*lx+3*j*lx)=x 750 coords(i+1*lx+3*j*lx)=y 751 coords(i+2*lx+3*j*lx)=z 752 enddo 753 enddo 754 755C Init ceed library 756 call get_spec(spec) 757 call ceedinit(trim(spec)//char(0),ceed,err) 758 759 call get_test(test) 760 761C Set up Nek geometry data 762 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 763 call set_h2_as_rhoJac_GL(h2,bmq,nxq) 764 765C Set up true soln 766 call dist_fld_h1 (e1) 767 call copy (h1,e1,n) ! Save exact soln in h1 768 769C Set up solver parameters 770 tol = 1e-10 771 param(22) = tol 772 maxit = 100 773 774 call nekgsync() 775 776C Create ceed basis for mesh and computation 777 p=nx1 778 q=p+1 779 ncompu=1 780 ncompx=ldim 781 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q, 782 $ ceed_gauss,basisx,err) 783 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q, 784 $ ceed_gauss,basisu,err) 785 786C Create ceed element restrictions for mesh and computation 787 enode=p**ldim 788 lnode=enode*nelt*ncompu 789 call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode, 790 $ ldim,erstrctx,err) 791 call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode, 792 $ ncompu,erstrctu,err) 793 call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim, 794 $ nelt*q**ldim,1,erstrctw,err) 795 796C Create ceed vectors 797 call ceedvectorcreate(ceed,lnode,vec_p1,err) 798 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 799 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 800 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 801 call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err) 802 803 offset=0 804 call ceedvectorsetarray(vec_coords,ceed_mem_host, 805 $ ceed_use_pointer,coords,offset,err) 806 807C Create ceed qfunctions for masssetupf and massf 808 call ceedqfunctioncreateinterior(ceed,1,masssetupf, 809 $ __FILE__ 810 $ //':masssetupf',qf_setup,err) 811 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 812 $ ceed_eval_interp,err) 813 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 814 $ ceed_eval_grad,err) 815 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 816 $ ceed_eval_weight,err) 817 call ceedqfunctionaddoutput(qf_setup,'rho',ncompu, 818 $ ceed_eval_none,err) 819 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 820 $ ceed_eval_interp,err) 821 822 call ceedqfunctioncreateinterior(ceed,1,massf, 823 $ __FILE__ 824 $ //':massf',qf_mass,err) 825 call ceedqfunctionaddinput(qf_mass,'u',ncompu, 826 $ ceed_eval_interp,err) 827 call ceedqfunctionaddinput(qf_mass,'rho',ncompu, 828 $ ceed_eval_none,err) 829 call ceedqfunctionaddoutput(qf_mass,'v',ncompu, 830 $ ceed_eval_interp,err) 831 832C Create ceed operators 833 call ceedoperatorcreate(ceed,qf_setup, 834 $ ceed_null,ceed_null,op_setup,err) 835 call ceedoperatorsetfield(op_setup,'x',erstrctx, 836 $ ceed_notranspose,basisx,ceed_vector_active,err) 837 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 838 $ ceed_notranspose,basisx,ceed_vector_active,err) 839 call ceedoperatorsetfield(op_setup,'weight',erstrctx, 840 $ ceed_notranspose,basisx,ceed_vector_none,err) 841 call ceedoperatorsetfield(op_setup,'rho',erstrctw, 842 $ ceed_notranspose,ceed_basis_collocated, 843 $ ceed_vector_active,err) 844 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 845 $ ceed_notranspose,basisu,vec_rhs,err) 846 847 call ceedoperatorcreate(ceed,qf_mass, 848 $ ceed_null,ceed_null,op_mass,err) 849 call ceedoperatorsetfield(op_mass,'u',erstrctu, 850 $ ceed_notranspose,basisu,ceed_vector_active,err) 851 call ceedoperatorsetfield(op_mass,'rho',erstrctw, 852 $ ceed_notranspose,ceed_basis_collocated, 853 $ vec_qdata,err) 854 call ceedoperatorsetfield(op_mass,'v',erstrctu, 855 $ ceed_notranspose,basisu,ceed_vector_active,err) 856 857C Compute setup data 858 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 859 $ ceed_use_pointer,r1,offset,err) 860 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 861 $ ceed_request_immediate,err) 862 863C Set up true RHS 864 call dssum (r1,nx1,ny1,nz1) ! r1 865 866C Set up algebraic RHS with libCEED 867 call ceedvectorsetarray(vec_p1,ceed_mem_host, 868 $ ceed_use_pointer,h1,offset,err) 869 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 870 $ ceed_use_pointer,r2,offset,err) 871 call ceedoperatorapply(op_mass,vec_p1,vec_ap1, 872 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 873 call dssum (r2,nx1,ny1,nz1) 874 875C Set up algebraic RHS with Nek5000 876 call axhm1 (pap,r3,h1,h1,h2,'bp1') ! r3 = A_nek5k*h1 877 call dssum (r3,nx1,ny1,nz1) 878 879 call nekgsync() 880 881C Solve true RHS 882 if (nid.eq.0) write (6,*) "libCEED true RHS" 883 tstart = dnekclock() 884 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass, 885 $ vec_p1,vec_ap1,maxit,'bp1') 886 tstop = dnekclock() 887 888C Output 889 telaps = (tstop-tstart) 890 maxits = maxit 891 er1 = glrdif(u1,e1,n) 892 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 893 894 if (test.eq.1.and.nid.eq.0) then 895 if (maxit>=100) then 896 write(6,*) "UNCONVERGED CG" 897 endif 898 if (dabs(er1)>5e-3) then 899 write(6,*) "ERROR IS TOO LARGE" 900 endif 901 endif 902 903 nx = nx1-1 904 nnode = nelgt ! nnodes 905 nnode = nnode*(nx**ldim) ! nnodes 906 nppp = nnode/np ! nnodes/proc 907 908 dofps = nnode/telaps ! DOF/sec - scalar form 909 titers = telaps/maxits ! time per iteration 910 tppp_s = titers/nppp ! time per iteraton per local point 911 912 if (nid.eq.0) write(6,1) 'case scalar:' 913 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 914 915C Solve libCEED algebraic RHS 916 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 917 maxit = 100 918 tstart = dnekclock() 919 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass, 920 $ vec_p1,vec_ap1,maxit,'bp1') 921 tstop = dnekclock() 922 923C Output 924 telaps = (tstop-tstart) 925 maxits = maxit 926 er1 = glrdif(u1,e1,n) 927 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 928 929 if (test.eq.1.and.nid.eq.0) then 930 if (maxit>=100) then 931 write(6,*) "UNCONVERGED CG" 932 endif 933 if (dabs(er1)>1e-5) then 934 write(6,*) "ERROR IS TOO LARGE" 935 endif 936 endif 937 938 nx = nx1-1 939 nnode = nelgt ! nnodes 940 nnode = nnode*(nx**ldim) ! nnodes 941 nppp = nnode/np ! nnodes/proc 942 943 dofps = nnode/telaps ! DOF/sec - scalar form 944 titers = telaps/maxits ! time per iteration 945 tppp_s = titers/nppp ! time per iteraton per local point 946 947 if (nid.eq.0) write(6,1) 'case scalar:' 948 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 949 950C Solve Nek5000 algebraic RHS 951 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 952 maxit = 100 953 tstart = dnekclock() 954 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass, 955 $ vec_p1,vec_ap1,maxit,'bp1') 956 tstop = dnekclock() 957 958C Output 959 telaps = (tstop-tstart) 960 maxits = maxit 961 er1 = glrdif(u1,e1,n) 962 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 963 964 if (test.eq.1.and.nid.eq.0) then 965 if (maxit>=100) then 966 write(6,*) "UNCONVERGED CG" 967 endif 968 if (dabs(er1)>1e-5) then 969 write(6,*) "ERROR IS TOO LARGE" 970 endif 971 endif 972 973 nx = nx1-1 974 nnode = nelgt ! nnodes 975 nnode = nnode*(nx**ldim) ! nnodes 976 nppp = nnode/np ! nnodes/proc 977 978 dofps = nnode/telaps ! DOF/sec - scalar form 979 titers = telaps/maxits ! time per iteration 980 tppp_s = titers/nppp ! time per iteraton per local point 981 982 if (nid.eq.0) write(6,1) 'case scalar:' 983 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 984 985 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 986 3 format(i3,i9,e12.4,1x,a8,i9) 987 988C Destroy ceed handles 989 call ceedvectordestroy(vec_p1,err) 990 call ceedvectordestroy(vec_ap1,err) 991 call ceedvectordestroy(vec_rhs,err) 992 call ceedvectordestroy(vec_qdata,err) 993 call ceedvectordestroy(vec_coords,err) 994 call ceedelemrestrictiondestroy(erstrctu,err) 995 call ceedelemrestrictiondestroy(erstrctx,err) 996 call ceedelemrestrictiondestroy(erstrctw,err) 997 call ceedbasisdestroy(basisu,err) 998 call ceedbasisdestroy(basisx,err) 999 call ceedqfunctiondestroy(qf_setup,err) 1000 call ceedqfunctiondestroy(qf_mass,err) 1001 call ceedoperatordestroy(op_setup,err) 1002 call ceedoperatordestroy(op_mass,err) 1003 call ceeddestroy(ceed,err) 1004 1005 return 1006 end 1007C----------------------------------------------------------------------- 1008 subroutine bp3 1009C Solution to BP3 using libCEED 1010 include 'SIZE' 1011 include 'TOTAL' 1012 include 'CTIMER' ! ifsync 1013 include 'FDMH1' 1014 include 'ceedf.h' 1015 1016 parameter (lzq=lx1+1) 1017 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1018 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 1019 1020 parameter (lt=lx1*ly1*lz1*lelt) 1021 parameter (ld=lxd*lyd*lzd*lelt) 1022 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 1023 common /vcrny/ e1(lt) 1024 common /vcrvh/ h1(lt),h2(ld),pap(3) 1025 real*8 coords(ldim*lx*lelt) 1026 1027 logical ifh3 1028 integer*8 nnode 1029 integer ceed,err,test 1030 character*64 spec 1031 1032 integer p,q,ncompx,ncompu,enode,lnode 1033 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 1034 integer erstrctu,erstrctx,erstrctw 1035 integer basisu,basisx 1036 integer qf_diffusion,qf_setup 1037 integer op_diffusion,op_setup 1038 integer ii,i,ngeo 1039 real*8 x,y,z 1040 integer*8 offset 1041 1042 external diffusionf,diffsetupf 1043 1044 ifield = 1 1045 nxq = nx1+1 1046 n = nx1*ny1*nz1*nelt 1047 1048 ifsync = .false. 1049 1050C Set up coordinates and mask 1051 ii=0 1052 do j=0,nelt-1 1053 do i=1,lx 1054 ii=ii+1 1055 x = xm1(ii,1,1,1) 1056 y = ym1(ii,1,1,1) 1057 z = zm1(ii,1,1,1) 1058 coords(i+0*lx+3*j*lx)=x 1059 coords(i+1*lx+3*j*lx)=y 1060 coords(i+2*lx+3*j*lx)=z 1061 if ( x.eq.0.or.x.eq.1 1062 $ .or.y.eq.0.or.y.eq.1 1063 $ .or.z.eq.0.or.z.eq.1 ) then 1064 h2(ii)=0. 1065 else 1066 h2(ii)=1. 1067 endif 1068 enddo 1069 enddo 1070 1071C Init ceed library 1072 call get_spec(spec) 1073 call ceedinit(trim(spec)//char(0),ceed,err) 1074 1075 call get_test(test) 1076 1077C Set up Nek geometry data 1078 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 1079 1080C Set up true soln 1081 call sin_fld_h1 (e1) 1082 call xmask1 (e1,h2,nelt) 1083 call copy (h1,e1,n) ! Save exact soln in h1 1084 1085C Set up solver parameters 1086 tol = 1e-10 1087 param(22) = tol 1088 maxit = 100 1089 1090 call nekgsync() 1091 1092C Create ceed basis for mesh and computation 1093 p=nx1 1094 q=p+1 1095 ncompu=1 1096 ncompx=ldim 1097 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q, 1098 $ ceed_gauss,basisx,err) 1099 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q, 1100 $ ceed_gauss,basisu,err) 1101 1102C Create ceed element restrictions for mesh and computation 1103 enode=p**ldim 1104 lnode=enode*nelt*ncompu 1105 ngeo=(ldim*(ldim+1))/2 1106 call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode, 1107 $ ldim,erstrctx,err) 1108 call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode, 1109 $ ncompu,erstrctu,err) 1110 call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim, 1111 $ nelt*q**ldim,ngeo,erstrctw,err) 1112 1113C Create ceed vectors 1114 call ceedvectorcreate(ceed,lnode,vec_p1,err) 1115 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 1116 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 1117 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 1118 call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err) 1119 1120 offset=0 1121 call ceedvectorsetarray(vec_coords,ceed_mem_host, 1122 $ ceed_use_pointer,coords,offset,err) 1123 1124C Create ceed qfunctions for diffsetupf and diffusionf 1125 call ceedqfunctioncreateinterior(ceed,1,diffsetupf, 1126 $ __FILE__ 1127 $ //':diffsetupf'//char(0),qf_setup,err) 1128 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 1129 $ ceed_eval_interp,err) 1130 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 1131 $ ceed_eval_grad,err) 1132 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 1133 $ ceed_eval_weight,err) 1134 call ceedqfunctionaddoutput(qf_setup,'rho',ngeo, 1135 $ ceed_eval_none,err) 1136 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 1137 $ ceed_eval_interp,err) 1138 1139 call ceedqfunctioncreateinterior(ceed,1,diffusionf, 1140 $ __FILE__ 1141 $ //':diffusionf'//char(0),qf_diffusion,err) 1142 call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim, 1143 $ ceed_eval_grad,err) 1144 call ceedqfunctionaddinput(qf_diffusion,'rho',ngeo, 1145 $ ceed_eval_none,err) 1146 call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim, 1147 $ ceed_eval_grad,err) 1148 1149C Create ceed operators 1150 call ceedoperatorcreate(ceed,qf_setup, 1151 $ ceed_null,ceed_null,op_setup,err) 1152 call ceedoperatorsetfield(op_setup,'x',erstrctx, 1153 $ ceed_notranspose,basisx,ceed_vector_active,err) 1154 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 1155 $ ceed_notranspose,basisx,ceed_vector_active,err) 1156 call ceedoperatorsetfield(op_setup,'weight',erstrctx, 1157 $ ceed_notranspose,basisx,ceed_vector_none,err) 1158 call ceedoperatorsetfield(op_setup,'rho',erstrctw, 1159 $ ceed_notranspose,ceed_basis_collocated, 1160 $ ceed_vector_active,err) 1161 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 1162 $ ceed_notranspose,basisu,vec_rhs,err) 1163 1164 call ceedoperatorcreate(ceed,qf_diffusion, 1165 $ ceed_null,ceed_null,op_diffusion,err) 1166 call ceedoperatorsetfield(op_diffusion,'u',erstrctu, 1167 $ ceed_notranspose,basisu,ceed_vector_active,err) 1168 call ceedoperatorsetfield(op_diffusion,'rho',erstrctw, 1169 $ ceed_notranspose,ceed_basis_collocated, 1170 $ vec_qdata,err) 1171 call ceedoperatorsetfield(op_diffusion,'v',erstrctu, 1172 $ ceed_notranspose,basisu,ceed_vector_active,err) 1173 1174C Compute setup data 1175 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 1176 $ ceed_use_pointer,r1,offset,err) 1177 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 1178 $ ceed_request_immediate,err) 1179 1180C Set up true RHS 1181 call dssum (r1,nx1,ny1,nz1) ! r1 1182 call xmask1 (r1,h2,nelt) 1183 1184C Set up algebraic RHS with libCEED 1185 call ceedvectorsetarray(vec_p1,ceed_mem_host, 1186 $ ceed_use_pointer,h1,offset,err) 1187 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 1188 $ ceed_use_pointer,r2,offset,err) 1189 call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1, 1190 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 1191 call dssum (r2,nx1,ny1,nz1) 1192 call xmask1 (r2,h2,nelt) 1193 1194C Set up algebraic RHS with Nek5000 1195 call axhm1 (pap,r3,h1,h1,h2,'bp3') ! r3 = A_nek5k*h1 1196 call dssum (r3,nx1,ny1,nz1) 1197 call xmask1 (r3,h2,nelt) 1198 1199 call nekgsync() 1200 1201C Solve true RHS 1202 if (nid.eq.0) write (6,*) "libCEED true RHS" 1203 tstart = dnekclock() 1204 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1205 $ vec_p1,vec_ap1,maxit,'bp3') 1206 tstop = dnekclock() 1207 1208C Output 1209 telaps = (tstop-tstart) 1210 maxits = maxit 1211 er1 = glrdif(u1,e1,n) 1212 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1213 1214 if (test.eq.1.and.nid.eq.0) then 1215 if (maxit>=100) then 1216 write(6,*) "UNCONVERGED CG" 1217 endif 1218 if (dabs(er1)>1e-3) then 1219 write(6,*) "ERROR IS TOO LARGE" 1220 endif 1221 endif 1222 1223 nx = nx1-1 1224 nnode = nelgt ! nnodes 1225 nnode = nnode*(nx**ldim) ! nnodes 1226 nppp = nnode/np ! nnodes/proc 1227 1228 dofps = nnode/telaps ! DOF/sec - scalar form 1229 titers = telaps/maxits ! time per iteration 1230 tppp_s = titers/nppp ! time per iteraton per local point 1231 1232 if (nid.eq.0) write(6,1) 'case scalar:' 1233 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1234 1235C Solve libCEED algebraic RHS 1236 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 1237 maxit = 100 1238 tstart = dnekclock() 1239 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1240 $ vec_p1,vec_ap1,maxit,'bp3') 1241 tstop = dnekclock() 1242 1243C Output 1244 telaps = (tstop-tstart) 1245 maxits = maxit 1246 er1 = glrdif(u1,e1,n) 1247 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1248 1249 if (test.eq.1.and.nid.eq.0) then 1250 if (maxit>=100) then 1251 write(6,*) "UNCONVERGED CG" 1252 endif 1253 if (dabs(er1)>1e-9) then 1254 write(6,*) "ERROR IS TOO LARGE" 1255 endif 1256 endif 1257 1258 nx = nx1-1 1259 nnode = nelgt ! nnodes 1260 nnode = nnode*(nx**ldim) ! nnodes 1261 nppp = nnode/np ! nnodes/proc 1262 1263 dofps = nnode/telaps ! DOF/sec - scalar form 1264 titers = telaps/maxits ! time per iteration 1265 tppp_s = titers/nppp ! time per iteraton per local point 1266 1267 if (nid.eq.0) write(6,1) 'case scalar:' 1268 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1269 1270C Solve Nek5000 algebraic RHS 1271 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 1272 maxit = 100 1273 tstart = dnekclock() 1274 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1275 $ vec_p1,vec_ap1,maxit,'bp3') 1276 tstop = dnekclock() 1277 1278C Output 1279 telaps = (tstop-tstart) 1280 maxits = maxit 1281 er1 = glrdif(u1,e1,n) 1282 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1283 1284 if (test.eq.1.and.nid.eq.0) then 1285 if (maxit>=100) then 1286 write(6,*) "UNCONVERGED CG" 1287 endif 1288 if (dabs(er1)>1e-9) then 1289 write(6,*) "ERROR IS TOO LARGE" 1290 endif 1291 endif 1292 1293 nx = nx1-1 1294 nnode = nelgt ! nnodes 1295 nnode = nnode*(nx**ldim) ! nnodes 1296 nppp = nnode/np ! nnodes/proc 1297 1298 nodepss = nnode/telaps ! DOF/sec - scalar form 1299 titers = telaps/maxits ! time per iteration 1300 tppp_s = titers/nppp ! time per iteraton per local point 1301 1302 if (nid.eq.0) write(6,1) 'case scalar:' 1303 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,nodepss,titers,tppp_s 1304 1305 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 1306 3 format(i3,i9,e12.4,1x,a8,i9) 1307 1308C Destroy ceed handles 1309 call ceedvectordestroy(vec_p1,err) 1310 call ceedvectordestroy(vec_ap1,err) 1311 call ceedvectordestroy(vec_rhs,err) 1312 call ceedvectordestroy(vec_qdata,err) 1313 call ceedvectordestroy(vec_coords,err) 1314 call ceedelemrestrictiondestroy(erstrctu,err) 1315 call ceedelemrestrictiondestroy(erstrctx,err) 1316 call ceedelemrestrictiondestroy(erstrctw,err) 1317 call ceedbasisdestroy(basisu,err) 1318 call ceedbasisdestroy(basisx,err) 1319 call ceedqfunctiondestroy(qf_setup,err) 1320 call ceedqfunctiondestroy(qf_diffusion,err) 1321 call ceedoperatordestroy(op_setup,err) 1322 call ceedoperatordestroy(op_diffusion,err) 1323 call ceeddestroy(ceed,err) 1324 1325 return 1326 end 1327C----------------------------------------------------------------------- 1328 subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1, 1329 $ vec_ap1,maxit,bpname) 1330C Scalar conjugate gradient iteration for solution of uncoupled 1331C Helmholtz equations 1332C Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname 1333C Output: u1,maxit 1334 include 'SIZE' 1335 include 'TOTAL' 1336 include 'DOMAIN' 1337 include 'FDMH1' 1338 character*3 bpname 1339 1340C INPUT: rhs1 - rhs 1341C h1 - exact solution 1342 1343 parameter (lt=lx1*ly1*lz1*lelt) 1344 parameter (ld=lxd*lyd*lzd*lelt) 1345 real*8 u1(lt),r1(lt),h1(lt),h2(lt) 1346 real*8 rmult(1),binv(1) 1347 integer ceed,ceed_op,vec_ap1,vec_p1 1348 common /scrcg/ dpc(lt),p1(lt),z1(lt) 1349 common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4) 1350 1351 real*8 ap1(lt) 1352 equivalence (ap1,z1) 1353 1354 vol = volfld(ifield) 1355 nel = nelfld(ifield) 1356 nxyz = lx1*ly1*lz1 1357 n = nxyz*nel 1358 nx = nx1-1 ! Polynomial order (just for i/o) 1359 1360 tol=tin 1361 1362 if(bpname.ne.'bp1') then 1363 call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner 1364 else 1365 call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner 1366 endif 1367 1368 call rzero (u1,n) ! Initialize solution 1369 1370 wv(1)=0 1371 do i=1,n 1372 s=rmult(i) ! -1 1373 p1(i)=dpc(i)*r1(i) ! p = M r T 1374 wv(1)=wv(1)+s*p1(i)*r1(i) ! r p 1375 enddo 1376 call gop(wv(1),wk,'+ ',1) 1377 rpp1(1) = wv (1) 1378 1379 do 1000 iter=1,maxit 1380 call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op, 1381 $ vec_ap1,vec_p1) 1382 call dssum (ap1,nx1,ny1,nz1) 1383 if (bpname.ne.'bp1') call xmask1(ap1,h2,nel) 1384 1385 call gop (pap,wk,'+ ',1) 1386 alph(1) = rpp1(1)/pap(1) 1387 1388 do i=1,n 1389 u1(i)=u1(i)+alph(1)* p1(i) 1390 r1(i)=r1(i)-alph(1)*ap1(i) 1391 enddo 1392 1393C tolerance check here 1394 call rzero(wv,2) 1395 do i=1,n 1396 wv(1)=wv(1)+r1(i)*r1(i) ! L2 error estimate 1397 z1(i)=dpc(i)*r1(i) ! z = M r 1398 wv(2)=wv(2)+rmult(i)*z1(i)*r1(i) ! r z 1399 enddo 1400 call gop(wv,wk,'+ ',2) 1401 1402C if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1) 1403 1 format(i2,i9,i5,i4,1p1e12.4,' cggos') 1404 1405 enorm=sqrt(wv(1)) 1406 if (enorm.lt.tol) then 1407 ifin = iter 1408 if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol 1409 goto 9999 1410 endif 1411C if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha' 1412 2 format(i5,1p3e12.4,2x,a5) 1413 1414 rpp2(1)=rpp1(1) 1415 rpp1(1)=wv (2) 1416 beta1 =rpp1(1)/rpp2(1) 1417 do i=1,n 1418 p1(i)=z1(i) + beta1*p1(i) 1419 enddo 1420 1421 1000 continue 1422 1423 rbnorm=sqrt(wv(1)) 1424 if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol 1425 iter = iter-1 1426 1427 9999 continue 1428 1429 maxit=iter 1430 1431 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4) 1432 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6) 1433 1434 return 1435 end 1436C----------------------------------------------------------------------- 1437 subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op, 1438 $ vec_ap1,vec_p1) 1439C Vector conjugate gradient matvec for solution of uncoupled 1440C Helmholtz equations 1441C Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1 1442C Output: ap1 1443 include 'SIZE' 1444 include 'TOTAL' 1445 include 'ceedf.h' 1446 1447 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1448 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1449 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1450 1451 real*8 pap(3) 1452 real*8 ap1(lx,lelt) 1453 real*8 p1(lx,lelt) 1454 real*8 h1(lx,lelt),h2(lx,lelt) 1455 integer ceed,ceed_op,vec_ap1,vec_p1,err 1456 integer i,e 1457 integer*8 offset 1458 1459 offset=0 1460 call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer, 1461 $ p1,offset,err) 1462 call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer, 1463 $ ap1,offset,err) 1464 1465 call ceedoperatorapply(ceed_op,vec_p1,vec_ap1, 1466 $ ceed_request_immediate,err) 1467 1468 call ceedvectorsyncarray(vec_ap1,ceed_mem_host,err) 1469 1470 pap(1)=0. 1471 1472 do e=1,nelt 1473 do i=1,lx 1474 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1475 enddo 1476 enddo 1477 1478 return 1479 end 1480C----------------------------------------------------------------------- 1481 subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut) 1482C Local matrix-vector for solution of BP3 (stiffness matrix) 1483C Input: u,g,h1,h2,b,ju,us,ut Output: w 1484 include 'SIZE' 1485 include 'TOTAL' 1486 1487 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1488 real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz) 1489 real*8 ju(lxyz),us(lxyz),ut(lxyz) 1490 1491 nxq = nx1+1 ! Number of quadrature points 1492 1493 lxyzq = nxq**ldim 1494 1495 call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation 1496 do i=1,lxyzq 1497 ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts 1498 enddo 1499 call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u 1500 1501 return 1502 end 1503C----------------------------------------------------------------------- 1504 subroutine axhm1_bp1(pap,ap1,p1,h1,h2) 1505C Vector conjugate gradient matvec for solution of BP1 (mass matrix) 1506C Input: pap,p1,h1,h2 Output: ap1 1507 include 'SIZE' 1508 include 'TOTAL' 1509 1510 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1511 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1512 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1513 1514 real*8 pap(3) 1515 real*8 ap1(lx,lelt) 1516 real*8 p1(lx,lelt) 1517 real*8 h1(lx,lelt),h2(lx,lelt) 1518 1519 real*8 ur(lx),us(lx),ut(lx) 1520 common /ctmp1/ ur,us,ut 1521 1522 integer e 1523 1524 pap(1)=0. 1525 1526 k=1 1527 nxq = nx1+1 1528 1529 do e=1,nelt 1530 1531 call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1) 1532 $ ,bm1(1,1,1,e),ur,us,ut) 1533 do i=1,lx 1534 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1535 enddo 1536 k=k+nxq*nxq*nxq 1537 1538 enddo 1539 1540 return 1541 end 1542C----------------------------------------------------------------------- 1543 subroutine ax_e_bp3(w,u,g,ur,us,ut,wk) 1544C Local matrix-vector for solution of BP3 (stiffness matrix) 1545C Input: u,g,ur,us,ut,wk Output: w 1546 include 'SIZE' 1547 include 'TOTAL' 1548 1549 parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq) 1550 1551 common /ctmp0/ tmp(lxyzq) 1552 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 1553 1554 real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq) 1555 real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq) 1556 1557 n = lzq-1 1558 1559 call intp_rstd (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation 1560 call loc_grad3 (ur,us,ut,wk,n,dxmq,dxtmq) 1561 1562 do i=1,lxyzq 1563 wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i) 1564 ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i) 1565 wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i) 1566 ur(i) = wr 1567 us(i) = ws 1568 ut(i) = wt 1569 enddo 1570 1571 call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp) 1572 call intp_rstd (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u 1573 1574 return 1575 end 1576C----------------------------------------------------------------------- 1577 subroutine axhm1_bp3(pap,ap1,p1,h1,h2) 1578C Vector conjugate gradient matvec for solution of BP3 (stiffness matrix) 1579C Input: pap,p1,h1,h2 Output: ap1 1580 include 'SIZE' 1581 include 'TOTAL' 1582 1583 parameter (lzq=lx1+1) 1584 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1585 common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq) 1586 1587 real*8 pap(3) 1588 real*8 ap1(lx,lelt) 1589 real*8 p1(lx,lelt) 1590 real*8 h1(lx,lelt),h2(lx,lelt) 1591 1592 common /ctmp1/ ur,us,ut,wk 1593 real*8 ur(lq),us(lq),ut(lq),wk(lq) 1594 1595 integer e 1596 1597 pap(1)=0. 1598 1599 do e=1,nelt 1600 1601 call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk) 1602 do i=1,lx 1603 pap(1)=pap(1)+p1(i,e)*ap1(i,e) 1604 enddo 1605 1606 enddo 1607 1608 return 1609 end 1610C----------------------------------------------------------------------- 1611 subroutine axhm1(pap,ap1,p1,h1,h2,bpname) 1612C Vector conjugate gradient matvec for solution of uncoupled 1613C Helmholtz equations 1614C Input: pap,p1,h1,h2,bpname Output: ap1 1615 include 'SIZE' 1616 include 'TOTAL' 1617 1618 parameter (lx=lx1*ly1*lz1) 1619 1620 real*8 pap(3),ap1(lx,lelt),p1(lx,lelt) 1621 real*8 h1(lx,lelt),h2(lx,lelt) 1622 1623 character*3 bpname 1624 1625 if (bpname.eq.'bp1') then 1626 call axhm1_bp1(pap,ap1,p1,h1,h2) 1627 1628 elseif (bpname.eq.'bp3') then 1629 call axhm1_bp3(pap,ap1,p1,h1,h2) 1630 1631 else 1632 write(6,*) bpname,' axhm1 bpname error' 1633 stop 1634 1635 endif 1636 1637 return 1638 end 1639C----------------------------------------------------------------------- 1640 subroutine get_bp(bp) 1641C Get BP to run 1642C Input: Output: bp 1643 integer i,bp 1644 character*64 bpval 1645 1646 bp=0 1647 if(iargc().ge.1) then 1648 call getarg(1,bpval) 1649 endif 1650 if(bpval.eq."bp1") then 1651 bp=1 1652 elseif(bpval.eq."bp3") then 1653 bp=3 1654 endif 1655 1656 return 1657 end 1658C----------------------------------------------------------------------- 1659 subroutine get_spec(spec) 1660C Get CEED backend specification 1661C Input: Output: spec 1662 integer i 1663 character*64 spec 1664 1665 spec = '/cpu/self' 1666 if(iargc().ge.2) then 1667 call getarg(2,spec) 1668 endif 1669 1670 return 1671 end 1672C----------------------------------------------------------------------- 1673 subroutine get_test(test) 1674C Get test mode flag 1675C Input: Output: test 1676 integer i,test 1677 character*64 testval 1678 1679 test=0 1680 if(iargc().ge.3) then 1681 call getarg(3,testval) 1682 endif 1683 if(testval.eq."test") then 1684 test=1 1685 endif 1686 1687 return 1688 end 1689C----------------------------------------------------------------------- 1690