1C Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2C the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3C reserved. See files LICENSE and NOTICE for details. 4C 5C This file is part of CEED, a collection of benchmarks, miniapps, software 6C libraries and APIs for efficient high-order finite element and spectral 7C element discretizations for exascale applications. For more information and 8C source code availability see http://github.com/ceed. 9C 10C The CEED research is supported by the Exascale Computing Project (17-SC-20-SC) 11C a collaborative effort of two U.S. Department of Energy organizations (Office 12C of Science and the National Nuclear Security Administration) responsible for 13C the planning and preparation of a capable exascale ecosystem, including 14C software, applications, hardware, advanced system engineering and early 15C testbed platforms, in support of the nation's exascale computing imperative. 16 17C> @file 18C> Mass and diffusion operators examples using Nek5000 19C TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test 20 21C----------------------------------------------------------------------- 22 subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 23 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 24 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 25C Set up mass operator 26C Input: u1,u2,u3,q Output: v1,v2,ierr 27 integer q,ierr 28 real*8 ctx(1) 29 real*8 u1(3*q) 30 real*8 u2(9*q) 31 real*8 u3(q) 32 real*8 v1(q) 33 real*8 v2(q) 34 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 35 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 36 real*8 jacmq 37 38 do i=1,q 39 a11=u2(i+q*0) 40 a12=u2(i+q*3) 41 a13=u2(i+q*6) 42 43 a21=u2(i+q*1) 44 a22=u2(i+q*4) 45 a23=u2(i+q*7) 46 47 a31=u2(i+q*2) 48 a32=u2(i+q*5) 49 a33=u2(i+q*8) 50 51 g11 = (a22*a33-a23*a32) 52 g12 = (a13*a32-a33*a12) 53 g13 = (a12*a23-a22*a13) 54 55 g21 = (a23*a31-a21*a33) 56 g22 = (a11*a33-a31*a13) 57 g23 = (a13*a21-a23*a11) 58 59 g31 = (a21*a32-a22*a31) 60 g32 = (a12*a31-a32*a11) 61 g33 = (a11*a22-a21*a12) 62 63 jacmq = a11*g11+a21*g12+a31*g13 64 65C Rho 66 v1(i)=u3(i)*jacmq 67 68C RHS 69 v2(i)=u3(i)*jacmq 70 $ *dsqrt(u1(i+q*0)*u1(i+q*0) 71 $ +u1(i+q*1)*u1(i+q*1) 72 $ +u1(i+q*2)*u1(i+q*2)) 73 enddo 74 75 ierr=0 76 end 77C----------------------------------------------------------------------- 78 subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 79 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 80 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 81C Apply mass operator 82C Input: u1,u2,q Output: v1,ierr 83 integer q,ierr 84 real*8 ctx(1) 85 real*8 u1(q) 86 real*8 u2(q) 87 real*8 v1(q) 88 89 do i=1,q 90 v1(i)=u2(i)*u1(i) 91 enddo 92 93 ierr=0 94 end 95C----------------------------------------------------------------------- 96 subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 97 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 98 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 99C Set up diffusion operator 100C Input: u1,u2,u3,q Output: v1,v2,ierr 101 integer q,ierr 102 real*8 ctx(1) 103 real*8 u1(3*q) 104 real*8 u2(9*q) 105 real*8 u3(q) 106 real*8 v1(6*q) 107 real*8 v2(q) 108 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 109 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 110 real*8 jacmq,scl 111 real*8 c(3),k(3) 112 113 do i=1,q 114 pi = 3.14159265358979323846 115 116 c(1)=0. 117 c(2)=1. 118 c(3)=2. 119 k(1)=1. 120 k(2)=2. 121 k(3)=3. 122 123 a11=u2(i+q*0) 124 a12=u2(i+q*3) 125 a13=u2(i+q*6) 126 127 a21=u2(i+q*1) 128 a22=u2(i+q*4) 129 a23=u2(i+q*7) 130 131 a31=u2(i+q*2) 132 a32=u2(i+q*5) 133 a33=u2(i+q*8) 134 135 g11 = (a22*a33-a23*a32) 136 g12 = (a13*a32-a33*a12) 137 g13 = (a12*a23-a22*a13) 138 139 g21 = (a23*a31-a21*a33) 140 g22 = (a11*a33-a31*a13) 141 g23 = (a13*a21-a23*a11) 142 143 g31 = (a21*a32-a22*a31) 144 g32 = (a12*a31-a32*a11) 145 g33 = (a11*a22-a21*a12) 146 147 jacmq = a11*g11+a21*g12+a31*g13 148 149 scl = u3(i)/jacmq 150 151C Geometric factors 152 v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr 153 v1(i+1*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs 154 v1(i+2*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt 155 v1(i+3*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss 156 v1(i+4*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst 157 v1(i+5*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt 158 159C RHS 160 v2(i) = u3(i)*jacmq*pi*pi 161 $ *dsin(pi*(c(1)+k(1)*u1(i+0*q))) 162 $ *dsin(pi*(c(2)+k(2)*u1(i+1*q))) 163 $ *dsin(pi*(c(3)+k(3)*u1(i+2*q))) 164 $ *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3)) 165 166 enddo 167 168 ierr=0 169 end 170C----------------------------------------------------------------------- 171 subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 172 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 173 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 174C Apply diffusion operator 175C Input: u1,u2,q Output: v1,ierr 176 integer q,ierr 177 real*8 ctx(1) 178 real*8 u1(3*q) 179 real*8 u2(6*q) 180 real*8 v1(3*q) 181 182 do i=1,q 183 v1(i+0*q)= 184 $ u2(i+0*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q) 185 v1(i+1*q)= 186 $ u2(i+1*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q) 187 v1(i+2*q)= 188 $ u2(i+2*q)*u1(i)+u2(i+4*q)*u1(i+q)+u2(i+5*q)*u1(i+2*q) 189 enddo 190 191 ierr=0 192 end 193C----------------------------------------------------------------------- 194 subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq) 195C Set h2 as rhoJac 196C Input: bmq,nxq Output: h2 197 include 'SIZE' 198 real*8 h2(1),bmq(1) 199 200 common /ctmp77/ wd(lxd),zd(lxd) 201 integer e,i,L 202 203 call zwgl(zd,wd,nxq) ! nxq = number of points 204 205 q = 1.0 ! Later, this can be a function of position... 206 207 L = 0 208 do e=1,lelt 209 do i=1,nxq**ldim 210 L=L+1 211 h2(L) = q*bmq(L) 212 enddo 213 enddo 214 215 return 216 end 217C----------------------------------------------------------------------- 218 subroutine dist_fld_h1(e) 219C Set distance initial condition for BP1 220C Input: Output: e 221 include 'SIZE' 222 include 'TOTAL' 223 real*8 x,y,z 224 real*8 e(1) 225 226 n=lx1*ly1*lz1*nelt 227 228 do i=1,n 229 x=xm1(i,1,1,1) 230 y=ym1(i,1,1,1) 231 z=zm1(i,1,1,1) 232 233 e(i) = dsqrt(x*x+y*y+z*z) 234 enddo 235 236 call dsavg(e) ! This is requisite for random fields 237 238 return 239 end 240C----------------------------------------------------------------------- 241 subroutine sin_fld_h1(e) 242C Set sine initial condition for BP3 243C Input: Output: e 244 include 'SIZE' 245 include 'TOTAL' 246 real*8 x,y,z 247 real*8 e(1) 248 real*8 c(3),k(3) 249 250 n=lx1*ly1*lz1*nelt 251 pi = 3.14159265358979323846 252 253 c(1)=0. 254 c(2)=1. 255 c(3)=2. 256 k(1)=1. 257 k(2)=2. 258 k(3)=3. 259 260 do i=1,n 261 x=xm1(i,1,1,1) 262 y=ym1(i,1,1,1) 263 z=zm1(i,1,1,1) 264 265 e(i) = dsin(pi*(c(1)+k(1)*x)) 266 & *dsin(pi*(c(2)+k(2)*y)) 267 & *dsin(pi*(c(3)+k(3)*z)) 268 269 enddo 270 271 call dsavg(e) ! This is requisite for random fields 272 273 return 274 end 275C----------------------------------------------------------------------- 276 subroutine uservp(ix,iy,iz,eg) ! set variable properties 277 include 'SIZE' 278 include 'TOTAL' 279 include 'NEKUSE' 280 integer e,f,eg 281C e = gllel(eg) 282 283 udiff = 0.0 284 utrans = 0.0 285 286 return 287 end 288C----------------------------------------------------------------------- 289 subroutine userf(ix,iy,iz,eg) ! set acceleration term 290C 291C Note: this is an acceleration term, NOT a force! 292C Thus, ffx will subsequently be multiplied by rho(x,t). 293C 294 include 'SIZE' 295 include 'TOTAL' 296 include 'NEKUSE' 297 integer e,f,eg 298C e = gllel(eg) 299 300 ffx = 0.0 301 ffy = 0.0 302 ffz = 0.0 303 304 return 305 end 306C----------------------------------------------------------------------- 307 subroutine userq(i,j,k,eg) ! set source term 308 include 'SIZE' 309 include 'TOTAL' 310 include 'NEKUSE' 311 integer e,f,eg 312 e = gllel(eg) 313 314 qvol = 0 315 316 return 317 end 318C----------------------------------------------------------------------- 319 subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions 320C NOTE ::: This subroutine MAY NOT be called by every process 321 include 'SIZE' 322 include 'TOTAL' 323 include 'NEKUSE' 324 integer e,f,eg 325 326 ux = 0.0 327 uy = 0.0 328 uz = 0.0 329 temp = 0.0 330 331 return 332 end 333C----------------------------------------------------------------------- 334 subroutine useric(ix,iy,iz,eg) ! set up initial conditions 335 include 'SIZE' 336 include 'TOTAL' 337 include 'NEKUSE' 338 integer e,f,eg 339 340 ux = 0.0 341 uy = 0.0 342 uz = 0.0 343 temp = 0.0 344 345 return 346 end 347C----------------------------------------------------------------------- 348 subroutine usrdat ! This routine to modify element vertices 349 include 'SIZE' 350 include 'TOTAL' 351 352 return 353 end 354C----------------------------------------------------------------------- 355 subroutine usrdat2 ! This routine to modify mesh coordinates 356 include 'SIZE' 357 include 'TOTAL' 358 359 x0 = 0 360 x1 = 1 361 call rescale_x(xm1,x0,x1) 362 call rescale_x(ym1,x0,x1) 363 call rescale_x(zm1,x0,x1) 364 365 param(59)=1 ! Force Nek to use the "deformed element" formulation 366 367 return 368 end 369C----------------------------------------------------------------------- 370 subroutine usrdat3 371 include 'SIZE' 372 include 'TOTAL' 373 374 return 375 end 376C----------------------------------------------------------------------- 377 subroutine xmask1 (r1,h2,nel) 378C Apply zero Dirichlet boundary conditions 379C Input: h2,nel Output: r1 380 include 'SIZE' 381 include 'TOTAL' 382 real*8 r1(1),h2(1) 383 384 n=nx1*ny1*nz1*nel 385 do i=1,n 386 r1(i)=r1(i)*h2(i) 387 enddo 388 389 return 390 end 391C----------------------------------------------------------------------- 392 function glrdif(x,y,n) 393C Compute Linfty norm of (x-y) 394C Input: x,y Output: n 395 real*8 x(n),y(n) 396 397 dmx=0 398 xmx=0 399 ymx=0 400 401 do i=1,n 402 diff=abs(x(i)-y(i)) 403 dmx =max(dmx,diff) 404 xmx =max(xmx,x(i)) 405 ymx =max(ymx,y(i)) 406 enddo 407 408 xmx = max(xmx,ymx) 409 dmx = glmax(dmx,1) ! max across processors 410 xmx = glmax(xmx,1) 411 412 if (xmx.gt.0) then 413 glrdif = dmx/xmx 414 else 415 glrdif = -dmx ! Negative indicates something strange 416 endif 417 418 return 419 end 420C----------------------------------------------------------------------- 421 subroutine loc_grad3(ur,us,ut,u,N,D,Dt) 422C 3D transpose of local gradient 423C Input: u,N,D,Dt Output: ur,us,ut 424 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 425 real*8 u (0:N,0:N,0:N) 426 real*8 D (0:N,0:N),Dt(0:N,0:N) 427 428 m1 = N+1 429 m2 = m1*m1 430 431 call mxm(D ,m1,u(0,0,0),m1,ur,m2) 432 do k=0,N 433 call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1) 434 enddo 435 call mxm(u(0,0,0),m2,Dt,m1,ut,m1) 436 437 return 438 end 439c----------------------------------------------------------------------- 440 subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w) 441C 3D transpose of local gradient 442C Input: ur,us,ut,N,D,Dt Output: u 443 real*8 u (0:N,0:N,0:N) 444 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 445 real*8 D (0:N,0:N),Dt(0:N,0:N) 446 real*8 w (0:N,0:N,0:N) 447 448 m1 = N+1 449 m2 = m1*m1 450 m3 = m1*m1*m1 451 452 call mxm(Dt,m1,ur,m1,u(0,0,0),m2) 453 do k=0,N 454 call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1) 455 enddo 456 call add2(u(0,0,0),w,m3) 457 call mxm(ut,m2,D ,m1,w,m1) 458 call add2(u(0,0,0),w,m3) 459 460 return 461 end 462C----------------------------------------------------------------------- 463 subroutine geodatq(gf,bmq,w3mq,nxq) 464C Routine to generate elemental geometric matrices on mesh 1 465C (Gauss-Legendre Lobatto mesh). 466 include 'SIZE' 467 include 'TOTAL' 468 469 parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim) 470 471 real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq) 472 473 common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd) 474 common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd) 475 $ , zr(lxyd),zs(lxyd),zt(lxyd) 476 477 common /ctmp77/ wd(lxd),zd(lxd) 478 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 479 480 integer e 481 real*8 tmp(lxyd) 482 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 483 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 484 real*8 jacmq 485 486 if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq) 487 488 call zwgl (zd,wd,lzq) ! nxq = number of points 489 call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp) 490 491 do k=1,nxq 492 do j=1,nxq 493 do i=1,nxq 494 w3mq(i,j,k) = wd(i)*wd(j)*wd(k) 495 enddo 496 enddo 497 enddo 498 499 nxyzq = nxq**ldim 500 nxqm1 = lzq-1 501 502 do e=1,nelt 503 call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation 504 call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq) 505 506 call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0) 507 call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq) 508 509 call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0) 510 call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq) 511 512 do i=1,nxyzq 513 a11 = xr(i) 514 a12 = xs(i) 515 a13 = xt(i) 516 517 a21 = yr(i) 518 a22 = ys(i) 519 a23 = yt(i) 520 521 a31 = zr(i) 522 a32 = zs(i) 523 a33 = zt(i) 524 525 g11 = (a22*a33-a23*a32) 526 g12 = (a13*a32-a33*a12) 527 g13 = (a12*a23-a22*a13) 528 529 g21 = (a23*a31-a21*a33) 530 g22 = (a11*a33-a31*a13) 531 g23 = (a13*a21-a23*a11) 532 533 g31 = (a21*a32-a22*a31) 534 g32 = (a12*a31-a32*a11) 535 g33 = (a11*a22-a21*a12) 536 537 jacmq = a11*g11+a21*g12+a31*g13 538 539 bmq(i,e) = w3mq(i,1,1)*jacmq 540 scale = w3mq(i,1,1)/jacmq 541 542 gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr 543 gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs 544 gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt 545 gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss 546 gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst 547 gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt 548 549 enddo 550 enddo 551 552 return 553 end 554C----------------------------------------------------------------------- 555 subroutine setprecn_bp1 (d,h1,h2) 556C Generate diagonal preconditioner for Helmholtz operator 557C Input: h1,h2 Output: d 558 include 'SIZE' 559 include 'TOTAL' 560 561 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 562 563 real*8 d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt) 564 integer e 565 566 real*8 gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data 567 equivalence (gf,g1m1) ! layout to g1m1...g6m1 568 569 real*8 ysm1(ly1) 570 571 nel = nelfld(ifield) 572 n = nel*lx1*ly1*lz1 573 nxyz = lx1*ly1*lz1 574 575 call copy (d,bm1,n) ! Mass matrix preconditioning full mass matrix 576 call dssum (d,nx1,ny1,nz1) 577 call invcol1 (d,n) 578 return 579 580 call dsset(lx1,ly1,lz1) 581 582 do 1000 e=1,nel 583 584 call rzero(d(1,1,1,e),nxyz) 585 586 do 320 iz=1,lz1 587 do 320 iy=1,ly1 588 do 320 ix=1,lx1 589 do 320 iq=1,lx1 590 d(ix,iy,iz,e) = d(ix,iy,iz,e) 591 $ + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2 592 $ + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2 593 $ + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2 594 320 continue 595C 596C Add cross terms if element is deformed. 597C 598 if (lxyz.gt.0) then 599 600 do i2=1,ly1,ly1-1 601 do i1=1,lx1,lx1-1 602 d(1,i1,i2,e) = d(1,i1,i2,e) 603 $ + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1) 604 $ + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2) 605 d(lx1,i1,i2,e) = d(lx1,i1,i2,e) 606 $ + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1) 607 $ + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2) 608 d(i1,1,i2,e) = d(i1,1,i2,e) 609 $ + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1) 610 $ + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2) 611 d(i1,ly1,i2,e) = d(i1,ly1,i2,e) 612 $ + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1) 613 $ + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2) 614 d(i1,i2,1,e) = d(i1,i2,1,e) 615 $ + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1) 616 $ + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2) 617 d(i1,i2,lz1,e) = d(i1,i2,lz1,e) 618 $ + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1) 619 $ + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2) 620 621 enddo 622 enddo 623 endif 624 625 do i=1,lxyz 626 d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e) 627 enddo 628 629 1000 continue ! element loop 630 631C If axisymmetric, add a diagonal term in the radial direction (ISD=2) 632 633 if (ifaxis.and.(isd.eq.2)) then 634 do 1200 e=1,nel 635 if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1) 636 k=0 637 do 1190 j=1,ly1 638 do 1190 i=1,lx1 639 k=k+1 640 if (ym1(i,j,1,e).ne.0.) then 641 term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2 642 if (ifrzer(e)) then 643 term2 = wxm1(i)*wam1(1)*dam1(1,j) 644 $ *jacm1(i,1,1,e)/ysm1(i) 645 else 646 term2 = 0. 647 endif 648 d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2) 649 endif 650 1190 continue 651 1200 continue 652 653 endif 654 call dssum (d,nx1,ny1,nz1) 655 call invcol1 (d,n) 656 657 if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1) 658 1 format(i9,1p4e12.4,' diag prec') 659 660 return 661 end 662C----------------------------------------------------------------------- 663 subroutine setprecn_bp3 (d,h1,h2) 664C Generate dummy diagonal preconditioner for Helmholtz operator 665C Input: h1,h2 Output: d 666 include 'SIZE' 667 include 'TOTAL' 668 669 parameter (n=lx1*ly1*lz1*lelt) 670 real*8 d(n),h1(n),h2(n) 671 672 call rone (d,n) 673 674 return 675 end 676C----------------------------------------------------------------------- 677 subroutine userchk 678 include 'SIZE' 679 include 'TOTAL' 680 681 integer bp 682 683 call get_bp(bp) 684 685 if (bp==1) then 686 if (istep.gt.0) call bp1 687 elseif (bp==3) then 688 if (istep.gt.0) call bp3 689 else 690 write(6,*) "INVALID BP SPECIFICED" 691 endif 692 693 return 694 end 695C----------------------------------------------------------------------- 696 subroutine bp1 697C Solution to BP1 using libCEED 698 include 'SIZE' 699 include 'TOTAL' 700 include 'CTIMER' ! ifsync 701 include 'FDMH1' 702 include 'ceedf.h' 703 704 parameter (lzq=lx1+1) 705 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 706 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 707 708 parameter (lt=lx1*ly1*lz1*lelt) 709 parameter (ld=lxd*lyd*lzd*lelt) 710 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 711 common /vcrny/ e1(lt) 712 common /vcrvh/ h1(lt),h2(lx*lelt),pap(3) 713 real*8 coords(ldim*lx*lelt) 714 715 logical ifh3 716 integer*8 nnode 717 integer ceed,err,test 718 character*64 spec 719 720 integer p,q,ncompx,ncompu,enode,lnode 721 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 722 integer erstrctu,erstrctx,erstrctw 723 integer basisu,basisx 724 integer qf_mass,qf_setup 725 integer op_mass,op_setup 726 real*8 x,y,z 727 integer*8 offset 728 729 external massf,masssetupf 730 731 ifield = 1 732 nxq = nx1+1 733 n = nx1*ny1*nz1*nelt 734 735 ifsync = .false. 736 737C Set up coordinates 738 ii=0 739 do j=0,nelt-1 740 do i=1,lx 741 ii=ii+1 742 x = xm1(ii,1,1,1) 743 y = ym1(ii,1,1,1) 744 z = zm1(ii,1,1,1) 745 coords(i+0*lx+3*j*lx)=x 746 coords(i+1*lx+3*j*lx)=y 747 coords(i+2*lx+3*j*lx)=z 748 enddo 749 enddo 750 751C Init ceed library 752 call get_spec(spec) 753 call ceedinit(trim(spec)//char(0),ceed,err) 754 755 call get_test(test) 756 757C Set up Nek geometry data 758 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 759 call set_h2_as_rhoJac_GL(h2,bmq,nxq) 760 761C Set up true soln 762 call dist_fld_h1 (e1) 763 call copy (h1,e1,n) ! Save exact soln in h1 764 765C Set up solver parameters 766 tol = 1e-10 767 param(22) = tol 768 maxit = 100 769 770 call nekgsync() 771 772C Create ceed basis for mesh and computation 773 p=nx1 774 q=p+1 775 ncompu=1 776 ncompx=ldim 777 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q, 778 $ ceed_gauss,basisx,err) 779 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q, 780 $ ceed_gauss,basisu,err) 781 782C Create ceed element restrictions for mesh and computation 783 enode=p**ldim 784 lnode=enode*nelt*ncompu 785 call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode, 786 $ ldim,erstrctx,err) 787 call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode, 788 $ ncompu,erstrctu,err) 789 call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim, 790 $ nelt*q**ldim,1,erstrctw,err) 791 792C Create ceed vectors 793 call ceedvectorcreate(ceed,lnode,vec_p1,err) 794 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 795 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 796 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 797 call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err) 798 799 offset=0 800 call ceedvectorsetarray(vec_coords,ceed_mem_host, 801 $ ceed_use_pointer,coords,offset,err) 802 803C Create ceed qfunctions for masssetupf and massf 804 call ceedqfunctioncreateinterior(ceed,1,masssetupf, 805 $ __FILE__ 806 $ //':masssetupf',qf_setup,err) 807 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 808 $ ceed_eval_interp,err) 809 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 810 $ ceed_eval_grad,err) 811 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 812 $ ceed_eval_weight,err) 813 call ceedqfunctionaddoutput(qf_setup,'rho',ncompu, 814 $ ceed_eval_none,err) 815 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 816 $ ceed_eval_interp,err) 817 818 call ceedqfunctioncreateinterior(ceed,1,massf, 819 $ __FILE__ 820 $ //':massf',qf_mass,err) 821 call ceedqfunctionaddinput(qf_mass,'u',ncompu, 822 $ ceed_eval_interp,err) 823 call ceedqfunctionaddinput(qf_mass,'rho',ncompu, 824 $ ceed_eval_none,err) 825 call ceedqfunctionaddoutput(qf_mass,'v',ncompu, 826 $ ceed_eval_interp,err) 827 828C Create ceed operators 829 call ceedoperatorcreate(ceed,qf_setup, 830 $ ceed_null,ceed_null,op_setup,err) 831 call ceedoperatorsetfield(op_setup,'x',erstrctx, 832 $ ceed_notranspose,basisx,ceed_vector_active,err) 833 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 834 $ ceed_notranspose,basisx,ceed_vector_active,err) 835 call ceedoperatorsetfield(op_setup,'weight',erstrctx, 836 $ ceed_notranspose,basisx,ceed_vector_none,err) 837 call ceedoperatorsetfield(op_setup,'rho',erstrctw, 838 $ ceed_notranspose,ceed_basis_collocated, 839 $ ceed_vector_active,err) 840 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 841 $ ceed_notranspose,basisu,vec_rhs,err) 842 843 call ceedoperatorcreate(ceed,qf_mass, 844 $ ceed_null,ceed_null,op_mass,err) 845 call ceedoperatorsetfield(op_mass,'u',erstrctu, 846 $ ceed_notranspose,basisu,ceed_vector_active,err) 847 call ceedoperatorsetfield(op_mass,'rho',erstrctw, 848 $ ceed_notranspose,ceed_basis_collocated, 849 $ vec_qdata,err) 850 call ceedoperatorsetfield(op_mass,'v',erstrctu, 851 $ ceed_notranspose,basisu,ceed_vector_active,err) 852 853C Compute setup data 854 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 855 $ ceed_use_pointer,r1,offset,err) 856 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 857 $ ceed_request_immediate,err) 858 859C Set up true RHS 860 call dssum (r1,nx1,ny1,nz1) ! r1 861 862C Set up algebraic RHS with libCEED 863 call ceedvectorsetarray(vec_p1,ceed_mem_host, 864 $ ceed_use_pointer,h1,offset,err) 865 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 866 $ ceed_use_pointer,r2,offset,err) 867 call ceedoperatorapply(op_mass,vec_p1,vec_ap1, 868 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 869 call dssum (r2,nx1,ny1,nz1) 870 871C Set up algebraic RHS with Nek5000 872 call axhm1 (pap,r3,h1,h1,h2,'bp1') ! r3 = A_nek5k*h1 873 call dssum (r3,nx1,ny1,nz1) 874 875 call nekgsync() 876 877C Solve true RHS 878 if (nid.eq.0) write (6,*) "libCEED true RHS" 879 tstart = dnekclock() 880 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass, 881 $ vec_p1,vec_ap1,maxit,'bp1') 882 tstop = dnekclock() 883 884C Output 885 telaps = (tstop-tstart) 886 maxits = maxit 887 er1 = glrdif(u1,e1,n) 888 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 889 890 if (test.eq.1.and.nid.eq.0) then 891 if (maxit>=100) then 892 write(6,*) "UNCONVERGED CG" 893 endif 894 if (dabs(er1)>5e-3) then 895 write(6,*) "ERROR IS TOO LARGE" 896 endif 897 endif 898 899 nx = nx1-1 900 nnode = nelgt ! nnodes 901 nnode = nnode*(nx**ldim) ! nnodes 902 nppp = nnode/np ! nnodes/proc 903 904 dofps = nnode/telaps ! DOF/sec - scalar form 905 titers = telaps/maxits ! time per iteration 906 tppp_s = titers/nppp ! time per iteraton per local point 907 908 if (nid.eq.0) write(6,1) 'case scalar:' 909 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 910 911C Solve libCEED algebraic RHS 912 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 913 maxit = 100 914 tstart = dnekclock() 915 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass, 916 $ vec_p1,vec_ap1,maxit,'bp1') 917 tstop = dnekclock() 918 919C Output 920 telaps = (tstop-tstart) 921 maxits = maxit 922 er1 = glrdif(u1,e1,n) 923 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 924 925 if (test.eq.1.and.nid.eq.0) then 926 if (maxit>=100) then 927 write(6,*) "UNCONVERGED CG" 928 endif 929 if (dabs(er1)>1e-5) then 930 write(6,*) "ERROR IS TOO LARGE" 931 endif 932 endif 933 934 nx = nx1-1 935 nnode = nelgt ! nnodes 936 nnode = nnode*(nx**ldim) ! nnodes 937 nppp = nnode/np ! nnodes/proc 938 939 dofps = nnode/telaps ! DOF/sec - scalar form 940 titers = telaps/maxits ! time per iteration 941 tppp_s = titers/nppp ! time per iteraton per local point 942 943 if (nid.eq.0) write(6,1) 'case scalar:' 944 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 945 946C Solve Nek5000 algebraic RHS 947 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 948 maxit = 100 949 tstart = dnekclock() 950 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass, 951 $ vec_p1,vec_ap1,maxit,'bp1') 952 tstop = dnekclock() 953 954C Output 955 telaps = (tstop-tstart) 956 maxits = maxit 957 er1 = glrdif(u1,e1,n) 958 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 959 960 if (test.eq.1.and.nid.eq.0) then 961 if (maxit>=100) then 962 write(6,*) "UNCONVERGED CG" 963 endif 964 if (dabs(er1)>1e-5) then 965 write(6,*) "ERROR IS TOO LARGE" 966 endif 967 endif 968 969 nx = nx1-1 970 nnode = nelgt ! nnodes 971 nnode = nnode*(nx**ldim) ! nnodes 972 nppp = nnode/np ! nnodes/proc 973 974 dofps = nnode/telaps ! DOF/sec - scalar form 975 titers = telaps/maxits ! time per iteration 976 tppp_s = titers/nppp ! time per iteraton per local point 977 978 if (nid.eq.0) write(6,1) 'case scalar:' 979 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 980 981 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 982 3 format(i3,i9,e12.4,1x,a8,i9) 983 984C Destroy ceed handles 985 call ceedvectordestroy(vec_p1,err) 986 call ceedvectordestroy(vec_ap1,err) 987 call ceedvectordestroy(vec_rhs,err) 988 call ceedvectordestroy(vec_qdata,err) 989 call ceedvectordestroy(vec_coords,err) 990 call ceedelemrestrictiondestroy(erstrctu,err) 991 call ceedelemrestrictiondestroy(erstrctx,err) 992 call ceedelemrestrictiondestroy(erstrctw,err) 993 call ceedbasisdestroy(basisu,err) 994 call ceedbasisdestroy(basisx,err) 995 call ceedqfunctiondestroy(qf_setup,err) 996 call ceedqfunctiondestroy(qf_mass,err) 997 call ceedoperatordestroy(op_setup,err) 998 call ceedoperatordestroy(op_mass,err) 999 call ceeddestroy(ceed,err) 1000 1001 return 1002 end 1003C----------------------------------------------------------------------- 1004 subroutine bp3 1005C Solution to BP3 using libCEED 1006 include 'SIZE' 1007 include 'TOTAL' 1008 include 'CTIMER' ! ifsync 1009 include 'FDMH1' 1010 include 'ceedf.h' 1011 1012 parameter (lzq=lx1+1) 1013 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1014 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 1015 1016 parameter (lt=lx1*ly1*lz1*lelt) 1017 parameter (ld=lxd*lyd*lzd*lelt) 1018 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 1019 common /vcrny/ e1(lt) 1020 common /vcrvh/ h1(lt),h2(ld),pap(3) 1021 real*8 coords(ldim*lx*lelt) 1022 1023 logical ifh3 1024 integer*8 nnode 1025 integer ceed,err,test 1026 character*64 spec 1027 1028 integer p,q,ncompx,ncompu,enode,lnode 1029 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 1030 integer erstrctu,erstrctx,erstrctw 1031 integer basisu,basisx 1032 integer qf_diffusion,qf_setup 1033 integer op_diffusion,op_setup 1034 integer ii,i,ngeo 1035 real*8 x,y,z 1036 integer*8 offset 1037 1038 external diffusionf,diffsetupf 1039 1040 ifield = 1 1041 nxq = nx1+1 1042 n = nx1*ny1*nz1*nelt 1043 1044 ifsync = .false. 1045 1046C Set up coordinates and mask 1047 ii=0 1048 do j=0,nelt-1 1049 do i=1,lx 1050 ii=ii+1 1051 x = xm1(ii,1,1,1) 1052 y = ym1(ii,1,1,1) 1053 z = zm1(ii,1,1,1) 1054 coords(i+0*lx+3*j*lx)=x 1055 coords(i+1*lx+3*j*lx)=y 1056 coords(i+2*lx+3*j*lx)=z 1057 if ( x.eq.0.or.x.eq.1 1058 $ .or.y.eq.0.or.y.eq.1 1059 $ .or.z.eq.0.or.z.eq.1 ) then 1060 h2(ii)=0. 1061 else 1062 h2(ii)=1. 1063 endif 1064 enddo 1065 enddo 1066 1067C Init ceed library 1068 call get_spec(spec) 1069 call ceedinit(trim(spec)//char(0),ceed,err) 1070 1071 call get_test(test) 1072 1073C Set up Nek geometry data 1074 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 1075 1076C Set up true soln 1077 call sin_fld_h1 (e1) 1078 call xmask1 (e1,h2,nelt) 1079 call copy (h1,e1,n) ! Save exact soln in h1 1080 1081C Set up solver parameters 1082 tol = 1e-10 1083 param(22) = tol 1084 maxit = 100 1085 1086 call nekgsync() 1087 1088C Create ceed basis for mesh and computation 1089 p=nx1 1090 q=p+1 1091 ncompu=1 1092 ncompx=ldim 1093 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q, 1094 $ ceed_gauss,basisx,err) 1095 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q, 1096 $ ceed_gauss,basisu,err) 1097 1098C Create ceed element restrictions for mesh and computation 1099 enode=p**ldim 1100 lnode=enode*nelt*ncompu 1101 ngeo=(ldim*(ldim+1))/2 1102 call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode, 1103 $ ldim,erstrctx,err) 1104 call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode, 1105 $ ncompu,erstrctu,err) 1106 call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim, 1107 $ nelt*q**ldim,ngeo,erstrctw,err) 1108 1109C Create ceed vectors 1110 call ceedvectorcreate(ceed,lnode,vec_p1,err) 1111 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 1112 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 1113 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 1114 call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err) 1115 1116 offset=0 1117 call ceedvectorsetarray(vec_coords,ceed_mem_host, 1118 $ ceed_use_pointer,coords,offset,err) 1119 1120C Create ceed qfunctions for diffsetupf and diffusionf 1121 call ceedqfunctioncreateinterior(ceed,1,diffsetupf, 1122 $ __FILE__ 1123 $ //':diffsetupf'//char(0),qf_setup,err) 1124 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 1125 $ ceed_eval_interp,err) 1126 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 1127 $ ceed_eval_grad,err) 1128 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 1129 $ ceed_eval_weight,err) 1130 call ceedqfunctionaddoutput(qf_setup,'rho',ngeo, 1131 $ ceed_eval_none,err) 1132 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 1133 $ ceed_eval_interp,err) 1134 1135 call ceedqfunctioncreateinterior(ceed,1,diffusionf, 1136 $ __FILE__ 1137 $ //':diffusionf'//char(0),qf_diffusion,err) 1138 call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim, 1139 $ ceed_eval_grad,err) 1140 call ceedqfunctionaddinput(qf_diffusion,'rho',ngeo, 1141 $ ceed_eval_none,err) 1142 call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim, 1143 $ ceed_eval_grad,err) 1144 1145C Create ceed operators 1146 call ceedoperatorcreate(ceed,qf_setup, 1147 $ ceed_null,ceed_null,op_setup,err) 1148 call ceedoperatorsetfield(op_setup,'x',erstrctx, 1149 $ ceed_notranspose,basisx,ceed_vector_active,err) 1150 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 1151 $ ceed_notranspose,basisx,ceed_vector_active,err) 1152 call ceedoperatorsetfield(op_setup,'weight',erstrctx, 1153 $ ceed_notranspose,basisx,ceed_vector_none,err) 1154 call ceedoperatorsetfield(op_setup,'rho',erstrctw, 1155 $ ceed_notranspose,ceed_basis_collocated, 1156 $ ceed_vector_active,err) 1157 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 1158 $ ceed_notranspose,basisu,vec_rhs,err) 1159 1160 call ceedoperatorcreate(ceed,qf_diffusion, 1161 $ ceed_null,ceed_null,op_diffusion,err) 1162 call ceedoperatorsetfield(op_diffusion,'u',erstrctu, 1163 $ ceed_notranspose,basisu,ceed_vector_active,err) 1164 call ceedoperatorsetfield(op_diffusion,'rho',erstrctw, 1165 $ ceed_notranspose,ceed_basis_collocated, 1166 $ vec_qdata,err) 1167 call ceedoperatorsetfield(op_diffusion,'v',erstrctu, 1168 $ ceed_notranspose,basisu,ceed_vector_active,err) 1169 1170C Compute setup data 1171 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 1172 $ ceed_use_pointer,r1,offset,err) 1173 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 1174 $ ceed_request_immediate,err) 1175 1176C Set up true RHS 1177 call dssum (r1,nx1,ny1,nz1) ! r1 1178 call xmask1 (r1,h2,nelt) 1179 1180C Set up algebraic RHS with libCEED 1181 call ceedvectorsetarray(vec_p1,ceed_mem_host, 1182 $ ceed_use_pointer,h1,offset,err) 1183 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 1184 $ ceed_use_pointer,r2,offset,err) 1185 call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1, 1186 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 1187 call dssum (r2,nx1,ny1,nz1) 1188 call xmask1 (r2,h2,nelt) 1189 1190C Set up algebraic RHS with Nek5000 1191 call axhm1 (pap,r3,h1,h1,h2,'bp3') ! r3 = A_nek5k*h1 1192 call dssum (r3,nx1,ny1,nz1) 1193 call xmask1 (r3,h2,nelt) 1194 1195 call nekgsync() 1196 1197C Solve true RHS 1198 if (nid.eq.0) write (6,*) "libCEED true RHS" 1199 tstart = dnekclock() 1200 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1201 $ vec_p1,vec_ap1,maxit,'bp3') 1202 tstop = dnekclock() 1203 1204C Output 1205 telaps = (tstop-tstart) 1206 maxits = maxit 1207 er1 = glrdif(u1,e1,n) 1208 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1209 1210 if (test.eq.1.and.nid.eq.0) then 1211 if (maxit>=100) then 1212 write(6,*) "UNCONVERGED CG" 1213 endif 1214 if (dabs(er1)>1e-3) then 1215 write(6,*) "ERROR IS TOO LARGE" 1216 endif 1217 endif 1218 1219 nx = nx1-1 1220 nnode = nelgt ! nnodes 1221 nnode = nnode*(nx**ldim) ! nnodes 1222 nppp = nnode/np ! nnodes/proc 1223 1224 dofps = nnode/telaps ! DOF/sec - scalar form 1225 titers = telaps/maxits ! time per iteration 1226 tppp_s = titers/nppp ! time per iteraton per local point 1227 1228 if (nid.eq.0) write(6,1) 'case scalar:' 1229 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1230 1231C Solve libCEED algebraic RHS 1232 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 1233 maxit = 100 1234 tstart = dnekclock() 1235 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1236 $ vec_p1,vec_ap1,maxit,'bp3') 1237 tstop = dnekclock() 1238 1239C Output 1240 telaps = (tstop-tstart) 1241 maxits = maxit 1242 er1 = glrdif(u1,e1,n) 1243 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1244 1245 if (test.eq.1.and.nid.eq.0) then 1246 if (maxit>=100) then 1247 write(6,*) "UNCONVERGED CG" 1248 endif 1249 if (dabs(er1)>1e-9) then 1250 write(6,*) "ERROR IS TOO LARGE" 1251 endif 1252 endif 1253 1254 nx = nx1-1 1255 nnode = nelgt ! nnodes 1256 nnode = nnode*(nx**ldim) ! nnodes 1257 nppp = nnode/np ! nnodes/proc 1258 1259 dofps = nnode/telaps ! DOF/sec - scalar form 1260 titers = telaps/maxits ! time per iteration 1261 tppp_s = titers/nppp ! time per iteraton per local point 1262 1263 if (nid.eq.0) write(6,1) 'case scalar:' 1264 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1265 1266C Solve Nek5000 algebraic RHS 1267 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 1268 maxit = 100 1269 tstart = dnekclock() 1270 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1271 $ vec_p1,vec_ap1,maxit,'bp3') 1272 tstop = dnekclock() 1273 1274C Output 1275 telaps = (tstop-tstart) 1276 maxits = maxit 1277 er1 = glrdif(u1,e1,n) 1278 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1279 1280 if (test.eq.1.and.nid.eq.0) then 1281 if (maxit>=100) then 1282 write(6,*) "UNCONVERGED CG" 1283 endif 1284 if (dabs(er1)>1e-9) then 1285 write(6,*) "ERROR IS TOO LARGE" 1286 endif 1287 endif 1288 1289 nx = nx1-1 1290 nnode = nelgt ! nnodes 1291 nnode = nnode*(nx**ldim) ! nnodes 1292 nppp = nnode/np ! nnodes/proc 1293 1294 nodepss = nnode/telaps ! DOF/sec - scalar form 1295 titers = telaps/maxits ! time per iteration 1296 tppp_s = titers/nppp ! time per iteraton per local point 1297 1298 if (nid.eq.0) write(6,1) 'case scalar:' 1299 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,nodepss,titers,tppp_s 1300 1301 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 1302 3 format(i3,i9,e12.4,1x,a8,i9) 1303 1304C Destroy ceed handles 1305 call ceedvectordestroy(vec_p1,err) 1306 call ceedvectordestroy(vec_ap1,err) 1307 call ceedvectordestroy(vec_rhs,err) 1308 call ceedvectordestroy(vec_qdata,err) 1309 call ceedvectordestroy(vec_coords,err) 1310 call ceedelemrestrictiondestroy(erstrctu,err) 1311 call ceedelemrestrictiondestroy(erstrctx,err) 1312 call ceedelemrestrictiondestroy(erstrctw,err) 1313 call ceedbasisdestroy(basisu,err) 1314 call ceedbasisdestroy(basisx,err) 1315 call ceedqfunctiondestroy(qf_setup,err) 1316 call ceedqfunctiondestroy(qf_diffusion,err) 1317 call ceedoperatordestroy(op_setup,err) 1318 call ceedoperatordestroy(op_diffusion,err) 1319 call ceeddestroy(ceed,err) 1320 1321 return 1322 end 1323C----------------------------------------------------------------------- 1324 subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1, 1325 $ vec_ap1,maxit,bpname) 1326C Scalar conjugate gradient iteration for solution of uncoupled 1327C Helmholtz equations 1328C Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname 1329C Output: u1,maxit 1330 include 'SIZE' 1331 include 'TOTAL' 1332 include 'DOMAIN' 1333 include 'FDMH1' 1334 character*3 bpname 1335 1336C INPUT: rhs1 - rhs 1337C h1 - exact solution 1338 1339 parameter (lt=lx1*ly1*lz1*lelt) 1340 parameter (ld=lxd*lyd*lzd*lelt) 1341 real*8 u1(lt),r1(lt),h1(lt),h2(lt) 1342 real*8 rmult(1),binv(1) 1343 integer ceed,ceed_op,vec_ap1,vec_p1 1344 common /scrcg/ dpc(lt),p1(lt),z1(lt) 1345 common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4) 1346 1347 real*8 ap1(lt) 1348 equivalence (ap1,z1) 1349 1350 vol = volfld(ifield) 1351 nel = nelfld(ifield) 1352 nxyz = lx1*ly1*lz1 1353 n = nxyz*nel 1354 nx = nx1-1 ! Polynomial order (just for i/o) 1355 1356 tol=tin 1357 1358 if(bpname.ne.'bp1') then 1359 call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner 1360 else 1361 call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner 1362 endif 1363 1364 call rzero (u1,n) ! Initialize solution 1365 1366 wv(1)=0 1367 do i=1,n 1368 s=rmult(i) ! -1 1369 p1(i)=dpc(i)*r1(i) ! p = M r T 1370 wv(1)=wv(1)+s*p1(i)*r1(i) ! r p 1371 enddo 1372 call gop(wv(1),wk,'+ ',1) 1373 rpp1(1) = wv (1) 1374 1375 do 1000 iter=1,maxit 1376 call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op, 1377 $ vec_ap1,vec_p1) 1378 call dssum (ap1,nx1,ny1,nz1) 1379 if (bpname.ne.'bp1') call xmask1(ap1,h2,nel) 1380 1381 call gop (pap,wk,'+ ',1) 1382 alph(1) = rpp1(1)/pap(1) 1383 1384 do i=1,n 1385 u1(i)=u1(i)+alph(1)* p1(i) 1386 r1(i)=r1(i)-alph(1)*ap1(i) 1387 enddo 1388 1389C tolerance check here 1390 call rzero(wv,2) 1391 do i=1,n 1392 wv(1)=wv(1)+r1(i)*r1(i) ! L2 error estimate 1393 z1(i)=dpc(i)*r1(i) ! z = M r 1394 wv(2)=wv(2)+rmult(i)*z1(i)*r1(i) ! r z 1395 enddo 1396 call gop(wv,wk,'+ ',2) 1397 1398C if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1) 1399 1 format(i2,i9,i5,i4,1p1e12.4,' cggos') 1400 1401 enorm=sqrt(wv(1)) 1402 if (enorm.lt.tol) then 1403 ifin = iter 1404 if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol 1405 goto 9999 1406 endif 1407C if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha' 1408 2 format(i5,1p3e12.4,2x,a5) 1409 1410 rpp2(1)=rpp1(1) 1411 rpp1(1)=wv (2) 1412 beta1 =rpp1(1)/rpp2(1) 1413 do i=1,n 1414 p1(i)=z1(i) + beta1*p1(i) 1415 enddo 1416 1417 1000 continue 1418 1419 rbnorm=sqrt(wv(1)) 1420 if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol 1421 iter = iter-1 1422 1423 9999 continue 1424 1425 maxit=iter 1426 1427 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4) 1428 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6) 1429 1430 return 1431 end 1432C----------------------------------------------------------------------- 1433 subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op, 1434 $ vec_ap1,vec_p1) 1435C Vector conjugate gradient matvec for solution of uncoupled 1436C Helmholtz equations 1437C Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1 1438C Output: ap1 1439 include 'SIZE' 1440 include 'TOTAL' 1441 include 'ceedf.h' 1442 1443 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1444 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1445 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1446 1447 real*8 pap(3) 1448 real*8 ap1(lx,lelt) 1449 real*8 p1(lx,lelt) 1450 real*8 h1(lx,lelt),h2(lx,lelt) 1451 integer ceed,ceed_op,vec_ap1,vec_p1,err 1452 integer i,e 1453 integer*8 offset 1454 1455 offset=0 1456 call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer, 1457 $ p1,offset,err) 1458 call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer, 1459 $ ap1,offset,err) 1460 1461 call ceedoperatorapply(ceed_op,vec_p1,vec_ap1, 1462 $ ceed_request_immediate,err) 1463 1464 call ceedvectorsyncarray(vec_ap1,ceed_mem_host,err) 1465 1466 pap(1)=0. 1467 1468 do e=1,nelt 1469 do i=1,lx 1470 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1471 enddo 1472 enddo 1473 1474 return 1475 end 1476C----------------------------------------------------------------------- 1477 subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut) 1478C Local matrix-vector for solution of BP3 (stiffness matrix) 1479C Input: u,g,h1,h2,b,ju,us,ut Output: w 1480 include 'SIZE' 1481 include 'TOTAL' 1482 1483 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1484 real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz) 1485 real*8 ju(lxyz),us(lxyz),ut(lxyz) 1486 1487 nxq = nx1+1 ! Number of quadrature points 1488 1489 lxyzq = nxq**ldim 1490 1491 call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation 1492 do i=1,lxyzq 1493 ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts 1494 enddo 1495 call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u 1496 1497 return 1498 end 1499C----------------------------------------------------------------------- 1500 subroutine axhm1_bp1(pap,ap1,p1,h1,h2) 1501C Vector conjugate gradient matvec for solution of BP1 (mass matrix) 1502C Input: pap,p1,h1,h2 Output: ap1 1503 include 'SIZE' 1504 include 'TOTAL' 1505 1506 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1507 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1508 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1509 1510 real*8 pap(3) 1511 real*8 ap1(lx,lelt) 1512 real*8 p1(lx,lelt) 1513 real*8 h1(lx,lelt),h2(lx,lelt) 1514 1515 real*8 ur(lx),us(lx),ut(lx) 1516 common /ctmp1/ ur,us,ut 1517 1518 integer e 1519 1520 pap(1)=0. 1521 1522 k=1 1523 nxq = nx1+1 1524 1525 do e=1,nelt 1526 1527 call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1) 1528 $ ,bm1(1,1,1,e),ur,us,ut) 1529 do i=1,lx 1530 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1531 enddo 1532 k=k+nxq*nxq*nxq 1533 1534 enddo 1535 1536 return 1537 end 1538C----------------------------------------------------------------------- 1539 subroutine ax_e_bp3(w,u,g,ur,us,ut,wk) 1540C Local matrix-vector for solution of BP3 (stiffness matrix) 1541C Input: u,g,ur,us,ut,wk Output: w 1542 include 'SIZE' 1543 include 'TOTAL' 1544 1545 parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq) 1546 1547 common /ctmp0/ tmp(lxyzq) 1548 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 1549 1550 real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq) 1551 real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq) 1552 1553 n = lzq-1 1554 1555 call intp_rstd (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation 1556 call loc_grad3 (ur,us,ut,wk,n,dxmq,dxtmq) 1557 1558 do i=1,lxyzq 1559 wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i) 1560 ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i) 1561 wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i) 1562 ur(i) = wr 1563 us(i) = ws 1564 ut(i) = wt 1565 enddo 1566 1567 call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp) 1568 call intp_rstd (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u 1569 1570 return 1571 end 1572C----------------------------------------------------------------------- 1573 subroutine axhm1_bp3(pap,ap1,p1,h1,h2) 1574C Vector conjugate gradient matvec for solution of BP3 (stiffness matrix) 1575C Input: pap,p1,h1,h2 Output: ap1 1576 include 'SIZE' 1577 include 'TOTAL' 1578 1579 parameter (lzq=lx1+1) 1580 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1581 common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq) 1582 1583 real*8 pap(3) 1584 real*8 ap1(lx,lelt) 1585 real*8 p1(lx,lelt) 1586 real*8 h1(lx,lelt),h2(lx,lelt) 1587 1588 common /ctmp1/ ur,us,ut,wk 1589 real*8 ur(lq),us(lq),ut(lq),wk(lq) 1590 1591 integer e 1592 1593 pap(1)=0. 1594 1595 do e=1,nelt 1596 1597 call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk) 1598 do i=1,lx 1599 pap(1)=pap(1)+p1(i,e)*ap1(i,e) 1600 enddo 1601 1602 enddo 1603 1604 return 1605 end 1606C----------------------------------------------------------------------- 1607 subroutine axhm1(pap,ap1,p1,h1,h2,bpname) 1608C Vector conjugate gradient matvec for solution of uncoupled 1609C Helmholtz equations 1610C Input: pap,p1,h1,h2,bpname Output: ap1 1611 include 'SIZE' 1612 include 'TOTAL' 1613 1614 parameter (lx=lx1*ly1*lz1) 1615 1616 real*8 pap(3),ap1(lx,lelt),p1(lx,lelt) 1617 real*8 h1(lx,lelt),h2(lx,lelt) 1618 1619 character*3 bpname 1620 1621 if (bpname.eq.'bp1') then 1622 call axhm1_bp1(pap,ap1,p1,h1,h2) 1623 1624 elseif (bpname.eq.'bp3') then 1625 call axhm1_bp3(pap,ap1,p1,h1,h2) 1626 1627 else 1628 write(6,*) bpname,' axhm1 bpname error' 1629 stop 1630 1631 endif 1632 1633 return 1634 end 1635C----------------------------------------------------------------------- 1636 subroutine get_bp(bp) 1637C Get BP to run 1638C Input: Output: bp 1639 integer i,bp 1640 character*64 bpval 1641 1642 bp=0 1643 if(iargc().ge.1) then 1644 call getarg(1,bpval) 1645 endif 1646 if(bpval.eq."bp1") then 1647 bp=1 1648 elseif(bpval.eq."bp3") then 1649 bp=3 1650 endif 1651 1652 return 1653 end 1654C----------------------------------------------------------------------- 1655 subroutine get_spec(spec) 1656C Get CEED backend specification 1657C Input: Output: spec 1658 integer i 1659 character*64 spec 1660 1661 spec = '/cpu/self' 1662 if(iargc().ge.2) then 1663 call getarg(2,spec) 1664 endif 1665 1666 return 1667 end 1668C----------------------------------------------------------------------- 1669 subroutine get_test(test) 1670C Get test mode flag 1671C Input: Output: test 1672 integer i,test 1673 character*64 testval 1674 1675 test=0 1676 if(iargc().ge.3) then 1677 call getarg(3,testval) 1678 endif 1679 if(testval.eq."test") then 1680 test=1 1681 endif 1682 1683 return 1684 end 1685C----------------------------------------------------------------------- 1686