1C Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at 2C the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights 3C reserved. See files LICENSE and NOTICE for details. 4C 5C This file is part of CEED, a collection of benchmarks, miniapps, software 6C libraries and APIs for efficient high-order finite element and spectral 7C element discretizations for exascale applications. For more information and 8C source code availability see http://github.com/ceed. 9C 10C The CEED research is supported by the Exascale Computing Project (17-SC-20-SC) 11C a collaborative effort of two U.S. Department of Energy organizations (Office 12C of Science and the National Nuclear Security Administration) responsible for 13C the planning and preparation of a capable exascale ecosystem, including 14C software, applications, hardware, advanced system engineering and early 15C testbed platforms, in support of the nation's exascale computing imperative. 16 17C> @file 18C> Mass and diffusion operators examples using Nek5000 19C TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test 20 21C----------------------------------------------------------------------- 22 subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 23 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 24 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 25C Set up mass operator 26C Input: u1,u2,u3,q Output: v1,v2,ierr 27 integer q,ierr 28 real*8 ctx(1) 29 real*8 u1(3*q) 30 real*8 u2(9*q) 31 real*8 u3(q) 32 real*8 v1(q) 33 real*8 v2(q) 34 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 35 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 36 real*8 jacmq 37 38 do i=1,q 39 a11=u2(i+q*0) 40 a12=u2(i+q*3) 41 a13=u2(i+q*6) 42 43 a21=u2(i+q*1) 44 a22=u2(i+q*4) 45 a23=u2(i+q*7) 46 47 a31=u2(i+q*2) 48 a32=u2(i+q*5) 49 a33=u2(i+q*8) 50 51 g11 = (a22*a33-a23*a32) 52 g12 = (a13*a32-a33*a12) 53 g13 = (a12*a23-a22*a13) 54 55 g21 = (a23*a31-a21*a33) 56 g22 = (a11*a33-a31*a13) 57 g23 = (a13*a21-a23*a11) 58 59 g31 = (a21*a32-a22*a31) 60 g32 = (a12*a31-a32*a11) 61 g33 = (a11*a22-a21*a12) 62 63 jacmq = a11*g11+a21*g12+a31*g13 64 65C Rho 66 v1(i)=u3(i)*jacmq 67 68C RHS 69 v2(i)=u3(i)*jacmq 70 $ *dsqrt(u1(i+q*0)*u1(i+q*0) 71 $ +u1(i+q*1)*u1(i+q*1) 72 $ +u1(i+q*2)*u1(i+q*2)) 73 enddo 74 75 ierr=0 76 end 77C----------------------------------------------------------------------- 78 subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 79 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 80 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 81C Apply mass operator 82C Input: u1,u2,q Output: v1,ierr 83 integer q,ierr 84 real*8 ctx(1) 85 real*8 u1(q) 86 real*8 u2(q) 87 real*8 v1(q) 88 89 do i=1,q 90 v1(i)=u2(i)*u1(i) 91 enddo 92 93 ierr=0 94 end 95C----------------------------------------------------------------------- 96 subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 97 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 98 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 99C Set up diffusion operator 100C Input: u1,u2,u3,q Output: v1,v2,ierr 101 integer q,ierr 102 real*8 ctx(1) 103 real*8 u1(3*q) 104 real*8 u2(9*q) 105 real*8 u3(q) 106 real*8 v1(6*q) 107 real*8 v2(q) 108 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 109 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 110 real*8 jacmq,scl 111 real*8 c(3),k(3) 112 113 do i=1,q 114 pi = 3.14159265358979323846 115 116 c(1)=0. 117 c(2)=1. 118 c(3)=2. 119 k(1)=1. 120 k(2)=2. 121 k(3)=3. 122 123 a11=u2(i+q*0) 124 a12=u2(i+q*3) 125 a13=u2(i+q*6) 126 127 a21=u2(i+q*1) 128 a22=u2(i+q*4) 129 a23=u2(i+q*7) 130 131 a31=u2(i+q*2) 132 a32=u2(i+q*5) 133 a33=u2(i+q*8) 134 135 g11 = (a22*a33-a23*a32) 136 g12 = (a13*a32-a33*a12) 137 g13 = (a12*a23-a22*a13) 138 139 g21 = (a23*a31-a21*a33) 140 g22 = (a11*a33-a31*a13) 141 g23 = (a13*a21-a23*a11) 142 143 g31 = (a21*a32-a22*a31) 144 g32 = (a12*a31-a32*a11) 145 g33 = (a11*a22-a21*a12) 146 147 jacmq = a11*g11+a21*g12+a31*g13 148 149 scl = u3(i)/jacmq 150 151C Geometric factors 152 v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr 153 v1(i+1*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs 154 v1(i+2*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt 155 v1(i+3*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss 156 v1(i+4*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst 157 v1(i+5*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt 158 159C RHS 160 v2(i) = u3(i)*jacmq*pi*pi 161 $ *dsin(pi*(c(1)+k(1)*u1(i+0*q))) 162 $ *dsin(pi*(c(2)+k(2)*u1(i+1*q))) 163 $ *dsin(pi*(c(3)+k(3)*u1(i+2*q))) 164 $ *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3)) 165 166 enddo 167 168 ierr=0 169 end 170C----------------------------------------------------------------------- 171 subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 172 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 173 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 174C Apply diffusion operator 175C Input: u1,u2,q Output: v1,ierr 176 integer q,ierr 177 real*8 ctx(1) 178 real*8 u1(3*q) 179 real*8 u2(6*q) 180 real*8 v1(3*q) 181 182 do i=1,q 183 v1(i+0*q)= 184 $ u2(i+0*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q) 185 v1(i+1*q)= 186 $ u2(i+1*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q) 187 v1(i+2*q)= 188 $ u2(i+2*q)*u1(i)+u2(i+4*q)*u1(i+q)+u2(i+5*q)*u1(i+2*q) 189 enddo 190 191 ierr=0 192 end 193C----------------------------------------------------------------------- 194 subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq) 195C Set h2 as rhoJac 196C Input: bmq,nxq Output: h2 197 include 'SIZE' 198 real*8 h2(1),bmq(1) 199 200 common /ctmp77/ wd(lxd),zd(lxd) 201 integer e,i,L 202 203 call zwgl(zd,wd,nxq) ! nxq = number of points 204 205 q = 1.0 ! Later, this can be a function of position... 206 207 L = 0 208 do e=1,lelt 209 do i=1,nxq**ldim 210 L=L+1 211 h2(L) = q*bmq(L) 212 enddo 213 enddo 214 215 return 216 end 217C----------------------------------------------------------------------- 218 subroutine dist_fld_h1(e) 219C Set distance initial condition for BP1 220C Input: Output: e 221 include 'SIZE' 222 include 'TOTAL' 223 real*8 x,y,z 224 real*8 e(1) 225 226 n=lx1*ly1*lz1*nelt 227 228 do i=1,n 229 x=xm1(i,1,1,1) 230 y=ym1(i,1,1,1) 231 z=zm1(i,1,1,1) 232 233 e(i) = dsqrt(x*x+y*y+z*z) 234 enddo 235 236 call dsavg(e) ! This is requisite for random fields 237 238 return 239 end 240C----------------------------------------------------------------------- 241 subroutine sin_fld_h1(e) 242C Set sine initial condition for BP3 243C Input: Output: e 244 include 'SIZE' 245 include 'TOTAL' 246 real*8 x,y,z 247 real*8 e(1) 248 real*8 c(3),k(3) 249 250 n=lx1*ly1*lz1*nelt 251 pi = 3.14159265358979323846 252 253 c(1)=0. 254 c(2)=1. 255 c(3)=2. 256 k(1)=1. 257 k(2)=2. 258 k(3)=3. 259 260 do i=1,n 261 x=xm1(i,1,1,1) 262 y=ym1(i,1,1,1) 263 z=zm1(i,1,1,1) 264 265 e(i) = dsin(pi*(c(1)+k(1)*x)) 266 & *dsin(pi*(c(2)+k(2)*y)) 267 & *dsin(pi*(c(3)+k(3)*z)) 268 269 enddo 270 271 call dsavg(e) ! This is requisite for random fields 272 273 return 274 end 275C----------------------------------------------------------------------- 276 subroutine uservp(ix,iy,iz,eg) ! set variable properties 277 include 'SIZE' 278 include 'TOTAL' 279 include 'NEKUSE' 280 integer e,f,eg 281C e = gllel(eg) 282 283 udiff = 0.0 284 utrans = 0.0 285 286 return 287 end 288C----------------------------------------------------------------------- 289 subroutine userf(ix,iy,iz,eg) ! set acceleration term 290C 291C Note: this is an acceleration term, NOT a force! 292C Thus, ffx will subsequently be multiplied by rho(x,t). 293C 294 include 'SIZE' 295 include 'TOTAL' 296 include 'NEKUSE' 297 integer e,f,eg 298C e = gllel(eg) 299 300 ffx = 0.0 301 ffy = 0.0 302 ffz = 0.0 303 304 return 305 end 306C----------------------------------------------------------------------- 307 subroutine userq(i,j,k,eg) ! set source term 308 include 'SIZE' 309 include 'TOTAL' 310 include 'NEKUSE' 311 integer e,f,eg 312 e = gllel(eg) 313 314 qvol = 0 315 316 return 317 end 318C----------------------------------------------------------------------- 319 subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions 320C NOTE ::: This subroutine MAY NOT be called by every process 321 include 'SIZE' 322 include 'TOTAL' 323 include 'NEKUSE' 324 integer e,f,eg 325 326 ux = 0.0 327 uy = 0.0 328 uz = 0.0 329 temp = 0.0 330 331 return 332 end 333C----------------------------------------------------------------------- 334 subroutine useric(ix,iy,iz,eg) ! set up initial conditions 335 include 'SIZE' 336 include 'TOTAL' 337 include 'NEKUSE' 338 integer e,f,eg 339 340 ux = 0.0 341 uy = 0.0 342 uz = 0.0 343 temp = 0.0 344 345 return 346 end 347C----------------------------------------------------------------------- 348 subroutine usrdat ! This routine to modify element vertices 349 include 'SIZE' 350 include 'TOTAL' 351 352 return 353 end 354C----------------------------------------------------------------------- 355 subroutine usrdat2 ! This routine to modify mesh coordinates 356 include 'SIZE' 357 include 'TOTAL' 358 359 x0 = 0 360 x1 = 1 361 call rescale_x(xm1,x0,x1) 362 call rescale_x(ym1,x0,x1) 363 call rescale_x(zm1,x0,x1) 364 365 param(59)=1 ! Force Nek to use the "deformed element" formulation 366 367 return 368 end 369C----------------------------------------------------------------------- 370 subroutine usrdat3 371 include 'SIZE' 372 include 'TOTAL' 373 374 return 375 end 376C----------------------------------------------------------------------- 377 subroutine xmask1 (r1,h2,nel) 378C Apply zero Dirichlet boundary conditions 379C Input: h2,nel Output: r1 380 include 'SIZE' 381 include 'TOTAL' 382 real*8 r1(1),h2(1) 383 384 n=nx1*ny1*nz1*nel 385 do i=1,n 386 r1(i)=r1(i)*h2(i) 387 enddo 388 389 return 390 end 391C----------------------------------------------------------------------- 392 function glrdif(x,y,n) 393C Compute Linfty norm of (x-y) 394C Input: x,y Output: n 395 real*8 x(n),y(n) 396 397 dmx=0 398 xmx=0 399 ymx=0 400 401 do i=1,n 402 diff=abs(x(i)-y(i)) 403 dmx =max(dmx,diff) 404 xmx =max(xmx,x(i)) 405 ymx =max(ymx,y(i)) 406 enddo 407 408 xmx = max(xmx,ymx) 409 dmx = glmax(dmx,1) ! max across processors 410 xmx = glmax(xmx,1) 411 412 if (xmx.gt.0) then 413 glrdif = dmx/xmx 414 else 415 glrdif = -dmx ! Negative indicates something strange 416 endif 417 418 return 419 end 420C----------------------------------------------------------------------- 421 subroutine loc_grad3(ur,us,ut,u,N,D,Dt) 422C 3D transpose of local gradient 423C Input: u,N,D,Dt Output: ur,us,ut 424 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 425 real*8 u (0:N,0:N,0:N) 426 real*8 D (0:N,0:N),Dt(0:N,0:N) 427 428 m1 = N+1 429 m2 = m1*m1 430 431 call mxm(D ,m1,u(0,0,0),m1,ur,m2) 432 do k=0,N 433 call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1) 434 enddo 435 call mxm(u(0,0,0),m2,Dt,m1,ut,m1) 436 437 return 438 end 439c----------------------------------------------------------------------- 440 subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w) 441C 3D transpose of local gradient 442C Input: ur,us,ut,N,D,Dt Output: u 443 real*8 u (0:N,0:N,0:N) 444 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 445 real*8 D (0:N,0:N),Dt(0:N,0:N) 446 real*8 w (0:N,0:N,0:N) 447 448 m1 = N+1 449 m2 = m1*m1 450 m3 = m1*m1*m1 451 452 call mxm(Dt,m1,ur,m1,u(0,0,0),m2) 453 do k=0,N 454 call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1) 455 enddo 456 call add2(u(0,0,0),w,m3) 457 call mxm(ut,m2,D ,m1,w,m1) 458 call add2(u(0,0,0),w,m3) 459 460 return 461 end 462C----------------------------------------------------------------------- 463 subroutine geodatq(gf,bmq,w3mq,nxq) 464C Routine to generate elemental geometric matrices on mesh 1 465C (Gauss-Legendre Lobatto mesh). 466 include 'SIZE' 467 include 'TOTAL' 468 469 parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim) 470 471 real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq) 472 473 common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd) 474 common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd) 475 $ , zr(lxyd),zs(lxyd),zt(lxyd) 476 477 common /ctmp77/ wd(lxd),zd(lxd) 478 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 479 480 integer e 481 real*8 tmp(lxyd) 482 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 483 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 484 real*8 jacmq 485 486 if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq) 487 488 call zwgl (zd,wd,lzq) ! nxq = number of points 489 call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp) 490 491 do k=1,nxq 492 do j=1,nxq 493 do i=1,nxq 494 w3mq(i,j,k) = wd(i)*wd(j)*wd(k) 495 enddo 496 enddo 497 enddo 498 499 nxyzq = nxq**ldim 500 nxqm1 = lzq-1 501 502 do e=1,nelt 503 call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation 504 call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq) 505 506 call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0) 507 call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq) 508 509 call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0) 510 call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq) 511 512 do i=1,nxyzq 513 a11 = xr(i) 514 a12 = xs(i) 515 a13 = xt(i) 516 517 a21 = yr(i) 518 a22 = ys(i) 519 a23 = yt(i) 520 521 a31 = zr(i) 522 a32 = zs(i) 523 a33 = zt(i) 524 525 g11 = (a22*a33-a23*a32) 526 g12 = (a13*a32-a33*a12) 527 g13 = (a12*a23-a22*a13) 528 529 g21 = (a23*a31-a21*a33) 530 g22 = (a11*a33-a31*a13) 531 g23 = (a13*a21-a23*a11) 532 533 g31 = (a21*a32-a22*a31) 534 g32 = (a12*a31-a32*a11) 535 g33 = (a11*a22-a21*a12) 536 537 jacmq = a11*g11+a21*g12+a31*g13 538 539 bmq(i,e) = w3mq(i,1,1)*jacmq 540 scale = w3mq(i,1,1)/jacmq 541 542 gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr 543 gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs 544 gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt 545 gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss 546 gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst 547 gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt 548 549 enddo 550 enddo 551 552 return 553 end 554C----------------------------------------------------------------------- 555 subroutine setprecn_bp1 (d,h1,h2) 556C Generate diagonal preconditioner for Helmholtz operator 557C Input: h1,h2 Output: d 558 include 'SIZE' 559 include 'TOTAL' 560 561 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 562 563 real*8 d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt) 564 integer e 565 566 real*8 gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data 567 equivalence (gf,g1m1) ! layout to g1m1...g6m1 568 569 real*8 ysm1(ly1) 570 571 nel = nelfld(ifield) 572 n = nel*lx1*ly1*lz1 573 nxyz = lx1*ly1*lz1 574 575 call copy (d,bm1,n) ! Mass matrix preconditioning full mass matrix 576 call dssum (d,nx1,ny1,nz1) 577 call invcol1 (d,n) 578 return 579 580 call dsset(lx1,ly1,lz1) 581 582 do 1000 e=1,nel 583 584 call rzero(d(1,1,1,e),nxyz) 585 586 do 320 iz=1,lz1 587 do 320 iy=1,ly1 588 do 320 ix=1,lx1 589 do 320 iq=1,lx1 590 d(ix,iy,iz,e) = d(ix,iy,iz,e) 591 $ + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2 592 $ + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2 593 $ + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2 594 320 continue 595C 596C Add cross terms if element is deformed. 597C 598 if (lxyz.gt.0) then 599 600 do i2=1,ly1,ly1-1 601 do i1=1,lx1,lx1-1 602 d(1,i1,i2,e) = d(1,i1,i2,e) 603 $ + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1) 604 $ + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2) 605 d(lx1,i1,i2,e) = d(lx1,i1,i2,e) 606 $ + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1) 607 $ + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2) 608 d(i1,1,i2,e) = d(i1,1,i2,e) 609 $ + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1) 610 $ + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2) 611 d(i1,ly1,i2,e) = d(i1,ly1,i2,e) 612 $ + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1) 613 $ + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2) 614 d(i1,i2,1,e) = d(i1,i2,1,e) 615 $ + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1) 616 $ + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2) 617 d(i1,i2,lz1,e) = d(i1,i2,lz1,e) 618 $ + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1) 619 $ + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2) 620 621 enddo 622 enddo 623 endif 624 625 do i=1,lxyz 626 d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e) 627 enddo 628 629 1000 continue ! element loop 630 631C If axisymmetric, add a diagonal term in the radial direction (ISD=2) 632 633 if (ifaxis.and.(isd.eq.2)) then 634 do 1200 e=1,nel 635 if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1) 636 k=0 637 do 1190 j=1,ly1 638 do 1190 i=1,lx1 639 k=k+1 640 if (ym1(i,j,1,e).ne.0.) then 641 term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2 642 if (ifrzer(e)) then 643 term2 = wxm1(i)*wam1(1)*dam1(1,j) 644 $ *jacm1(i,1,1,e)/ysm1(i) 645 else 646 term2 = 0. 647 endif 648 d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2) 649 endif 650 1190 continue 651 1200 continue 652 653 endif 654 call dssum (d,nx1,ny1,nz1) 655 call invcol1 (d,n) 656 657 if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1) 658 1 format(i9,1p4e12.4,' diag prec') 659 660 return 661 end 662C----------------------------------------------------------------------- 663 subroutine setprecn_bp3 (d,h1,h2) 664C Generate dummy diagonal preconditioner for Helmholtz operator 665C Input: h1,h2 Output: d 666 include 'SIZE' 667 include 'TOTAL' 668 669 parameter (n=lx1*ly1*lz1*lelt) 670 real*8 d(n),h1(n),h2(n) 671 672 call rone (d,n) 673 674 return 675 end 676C----------------------------------------------------------------------- 677 subroutine userchk 678 include 'SIZE' 679 include 'TOTAL' 680 681 integer bp 682 683 call get_bp(bp) 684 685 if (bp==1) then 686 if (istep.gt.0) call bp1 687 elseif (bp==3) then 688 if (istep.gt.0) call bp3 689 else 690 write(6,*) "INVALID BP SPECIFICED" 691 endif 692 693 return 694 end 695C----------------------------------------------------------------------- 696 subroutine bp1 697C Solution to BP1 using libCEED 698 include 'SIZE' 699 include 'TOTAL' 700 include 'CTIMER' ! ifsync 701 include 'FDMH1' 702 include 'ceedf.h' 703 704 parameter (lzq=lx1+1) 705 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 706 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 707 708 parameter (lt=lx1*ly1*lz1*lelt) 709 parameter (ld=lxd*lyd*lzd*lelt) 710 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 711 common /vcrny/ e1(lt) 712 common /vcrvh/ h1(lt),h2(lx*lelt),pap(3) 713 real*8 coords(ldim*lx*lelt) 714 715 logical ifh3 716 integer*8 ndof 717 integer ceed,err,test 718 character*64 spec 719 720 integer p,q,ncomp,edof,ldof 721 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 722 integer erstrctu,erstrctx,erstrctw 723 integer basisu,basisx 724 integer qf_mass,qf_setup 725 integer op_mass,op_setup 726 real*8 x,y,z 727 integer*8 offset 728 729 external massf,masssetupf 730 731 ifield = 1 732 nxq = nx1+1 733 n = nx1*ny1*nz1*nelt 734 735 ifsync = .false. 736 737C Set up coordinates 738 ii=0 739 do j=0,nelt-1 740 do i=1,lx 741 ii=ii+1 742 x = xm1(ii,1,1,1) 743 y = ym1(ii,1,1,1) 744 z = zm1(ii,1,1,1) 745 coords(i+0*lx+3*j*lx)=x 746 coords(i+1*lx+3*j*lx)=y 747 coords(i+2*lx+3*j*lx)=z 748 enddo 749 enddo 750 751C Init ceed library 752 call get_spec(spec) 753 call ceedinit(trim(spec)//char(0),ceed,err) 754 755 call get_test(test) 756 757C Set up Nek geometry data 758 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 759 call set_h2_as_rhoJac_GL(h2,bmq,nxq) 760 761C Set up true soln 762 call dist_fld_h1 (e1) 763 call copy (h1,e1,n) ! Save exact soln in h1 764 765C Set up solver parameters 766 tol = 1e-10 767 param(22) = tol 768 maxit = 100 769 770 call nekgsync() 771 772C Create ceed basis for mesh and computation 773 p=nx1 774 q=p+1 775 ncomp=1 776 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q, 777 $ ceed_gauss,basisx,err) 778 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncomp,p,q, 779 $ ceed_gauss,basisu,err) 780 781C Create ceed element restrictions for mesh and computation 782 edof=p**ldim 783 ldof=edof*nelt*ncomp 784 call ceedelemrestrictioncreateidentity(ceed,nelt,edof,ldof, 785 $ ldim,erstrctx,err) 786 call ceedelemrestrictioncreateidentity(ceed,nelt,edof,ldof, 787 $ ncomp,erstrctu,err) 788 call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim, 789 $ nelt*q**ldim,1,erstrctw,err) 790 791C Create ceed vectors 792 call ceedvectorcreate(ceed,ldof,vec_p1,err) 793 call ceedvectorcreate(ceed,ldof,vec_ap1,err) 794 call ceedvectorcreate(ceed,ldof,vec_rhs,err) 795 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 796 call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err) 797 798 offset=0 799 call ceedvectorsetarray(vec_coords,ceed_mem_host, 800 $ ceed_use_pointer,coords,offset,err) 801 802C Create ceed qfunctions for masssetupf and massf 803 call ceedqfunctioncreateinterior(ceed,1,masssetupf, 804 $ __FILE__ 805 $ //':masssetupf',qf_setup,err) 806 call ceedqfunctionaddinput(qf_setup,'x',ldim, 807 $ ceed_eval_interp,err) 808 call ceedqfunctionaddinput(qf_setup,'dx',ldim, 809 $ ceed_eval_grad,err) 810 call ceedqfunctionaddinput(qf_setup,'weight',1, 811 $ ceed_eval_weight,err) 812 call ceedqfunctionaddoutput(qf_setup,'rho',1, 813 $ ceed_eval_none,err) 814 call ceedqfunctionaddoutput(qf_setup,'rhs',1, 815 $ ceed_eval_interp,err) 816 817 call ceedqfunctioncreateinterior(ceed,1,massf, 818 $ __FILE__ 819 $ //':massf',qf_mass,err) 820 call ceedqfunctionaddinput(qf_mass,'u',1, 821 $ ceed_eval_interp,err) 822 call ceedqfunctionaddinput(qf_mass,'rho',1, 823 $ ceed_eval_none,err) 824 call ceedqfunctionaddoutput(qf_mass,'v',1, 825 $ ceed_eval_interp,err) 826 827C Create ceed operators 828 call ceedoperatorcreate(ceed,qf_setup, 829 $ ceed_null,ceed_null,op_setup,err) 830 call ceedoperatorsetfield(op_setup,'x',erstrctx, 831 $ ceed_notranspose,basisx,ceed_vector_active,err) 832 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 833 $ ceed_notranspose,basisx,ceed_vector_active,err) 834 call ceedoperatorsetfield(op_setup,'weight',erstrctx, 835 $ ceed_notranspose,basisx,ceed_vector_none,err) 836 call ceedoperatorsetfield(op_setup,'rho',erstrctw, 837 $ ceed_notranspose,ceed_basis_collocated, 838 $ ceed_vector_active,err) 839 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 840 $ ceed_notranspose,basisu,vec_rhs,err) 841 842 call ceedoperatorcreate(ceed,qf_mass, 843 $ ceed_null,ceed_null,op_mass,err) 844 call ceedoperatorsetfield(op_mass,'u',erstrctu, 845 $ ceed_notranspose,basisu,ceed_vector_active,err) 846 call ceedoperatorsetfield(op_mass,'rho',erstrctw, 847 $ ceed_notranspose,ceed_basis_collocated, 848 $ vec_qdata,err) 849 call ceedoperatorsetfield(op_mass,'v',erstrctu, 850 $ ceed_notranspose,basisu,ceed_vector_active,err) 851 852C Compute setup data 853 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 854 $ ceed_use_pointer,r1,offset,err) 855 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 856 $ ceed_request_immediate,err) 857 858C Set up true RHS 859 call dssum (r1,nx1,ny1,nz1) ! r1 860 861C Set up algebraic RHS with libCEED 862 call ceedvectorsetarray(vec_p1,ceed_mem_host, 863 $ ceed_use_pointer,h1,offset,err) 864 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 865 $ ceed_use_pointer,r2,offset,err) 866 call ceedoperatorapply(op_mass,vec_p1,vec_ap1, 867 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 868 call dssum (r2,nx1,ny1,nz1) 869 870C Set up algebraic RHS with Nek5000 871 call axhm1 (pap,r3,h1,h1,h2,'bp1') ! r3 = A_nek5k*h1 872 call dssum (r3,nx1,ny1,nz1) 873 874 call nekgsync() 875 876C Solve true RHS 877 if (nid.eq.0) write (6,*) "libCEED true RHS" 878 tstart = dnekclock() 879 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass, 880 $ vec_p1,vec_ap1,maxit,'bp1') 881 tstop = dnekclock() 882 883C Output 884 telaps = (tstop-tstart) 885 maxits = maxit 886 er1 = glrdif(u1,e1,n) 887 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 888 889 if (test.eq.1.and.nid.eq.0) then 890 if (maxit>=100) then 891 write(6,*) "UNCONVERGED CG" 892 endif 893 if (dabs(er1)>5e-3) then 894 write(6,*) "ERROR IS TOO LARGE" 895 endif 896 endif 897 898 nx = nx1-1 899 ndof = nelgt ! ndofs 900 ndof = ndof*(nx**ldim) ! ndofs 901 nppp = ndof/np ! ndofs/proc 902 903 dofpss = ndof/telaps ! DOF/sec - scalar form 904 titers = telaps/maxits ! time per iteration 905 tppp_s = titers/nppp ! time per iteraton per local point 906 907 if (nid.eq.0) write(6,1) 'case scalar:' 908 $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s 909 910C Solve libCEED algebraic RHS 911 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 912 maxit = 100 913 tstart = dnekclock() 914 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass, 915 $ vec_p1,vec_ap1,maxit,'bp1') 916 tstop = dnekclock() 917 918C Output 919 telaps = (tstop-tstart) 920 maxits = maxit 921 er1 = glrdif(u1,e1,n) 922 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 923 924 if (test.eq.1.and.nid.eq.0) then 925 if (maxit>=100) then 926 write(6,*) "UNCONVERGED CG" 927 endif 928 if (dabs(er1)>1e-5) then 929 write(6,*) "ERROR IS TOO LARGE" 930 endif 931 endif 932 933 nx = nx1-1 934 ndof = nelgt ! ndofs 935 ndof = ndof*(nx**ldim) ! ndofs 936 nppp = ndof/np ! ndofs/proc 937 938 dofpss = ndof/telaps ! DOF/sec - scalar form 939 titers = telaps/maxits ! time per iteration 940 tppp_s = titers/nppp ! time per iteraton per local point 941 942 if (nid.eq.0) write(6,1) 'case scalar:' 943 $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s 944 945C Solve Nek5000 algebraic RHS 946 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 947 maxit = 100 948 tstart = dnekclock() 949 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass, 950 $ vec_p1,vec_ap1,maxit,'bp1') 951 tstop = dnekclock() 952 953C Output 954 telaps = (tstop-tstart) 955 maxits = maxit 956 er1 = glrdif(u1,e1,n) 957 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 958 959 if (test.eq.1.and.nid.eq.0) then 960 if (maxit>=100) then 961 write(6,*) "UNCONVERGED CG" 962 endif 963 if (dabs(er1)>1e-5) then 964 write(6,*) "ERROR IS TOO LARGE" 965 endif 966 endif 967 968 nx = nx1-1 969 ndof = nelgt ! ndofs 970 ndof = ndof*(nx**ldim) ! ndofs 971 nppp = ndof/np ! ndofs/proc 972 973 dofpss = ndof/telaps ! DOF/sec - scalar form 974 titers = telaps/maxits ! time per iteration 975 tppp_s = titers/nppp ! time per iteraton per local point 976 977 if (nid.eq.0) write(6,1) 'case scalar:' 978 $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s 979 980 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 981 3 format(i3,i9,e12.4,1x,a8,i9) 982 983C Destroy ceed handles 984 call ceedvectordestroy(vec_p1,err) 985 call ceedvectordestroy(vec_ap1,err) 986 call ceedvectordestroy(vec_rhs,err) 987 call ceedvectordestroy(vec_qdata,err) 988 call ceedvectordestroy(vec_coords,err) 989 call ceedelemrestrictiondestroy(erstrctu,err) 990 call ceedelemrestrictiondestroy(erstrctx,err) 991 call ceedelemrestrictiondestroy(erstrctw,err) 992 call ceedbasisdestroy(basisu,err) 993 call ceedbasisdestroy(basisx,err) 994 call ceedqfunctiondestroy(qf_setup,err) 995 call ceedqfunctiondestroy(qf_mass,err) 996 call ceedoperatordestroy(op_setup,err) 997 call ceedoperatordestroy(op_mass,err) 998 call ceeddestroy(ceed,err) 999 1000 return 1001 end 1002C----------------------------------------------------------------------- 1003 subroutine bp3 1004C Solution to BP3 using libCEED 1005 include 'SIZE' 1006 include 'TOTAL' 1007 include 'CTIMER' ! ifsync 1008 include 'FDMH1' 1009 include 'ceedf.h' 1010 1011 parameter (lzq=lx1+1) 1012 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1013 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 1014 1015 parameter (lt=lx1*ly1*lz1*lelt) 1016 parameter (ld=lxd*lyd*lzd*lelt) 1017 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 1018 common /vcrny/ e1(lt) 1019 common /vcrvh/ h1(lt),h2(ld),pap(3) 1020 real*8 coords(ldim*lx*lelt) 1021 1022 logical ifh3 1023 integer*8 ndof 1024 integer ceed,err,test 1025 character*64 spec 1026 1027 integer p,q,ncomp,edof,ldof 1028 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 1029 integer erstrctu,erstrctx,erstrctw 1030 integer basisu,basisx 1031 integer qf_diffusion,qf_setup 1032 integer op_diffusion,op_setup 1033 integer ii,i,ngeo 1034 real*8 x,y,z 1035 integer*8 offset 1036 1037 external diffusionf,diffsetupf 1038 1039 ifield = 1 1040 nxq = nx1+1 1041 n = nx1*ny1*nz1*nelt 1042 1043 ifsync = .false. 1044 1045C Set up coordinates and mask 1046 ii=0 1047 do j=0,nelt-1 1048 do i=1,lx 1049 ii=ii+1 1050 x = xm1(ii,1,1,1) 1051 y = ym1(ii,1,1,1) 1052 z = zm1(ii,1,1,1) 1053 coords(i+0*lx+3*j*lx)=x 1054 coords(i+1*lx+3*j*lx)=y 1055 coords(i+2*lx+3*j*lx)=z 1056 if ( x.eq.0.or.x.eq.1 1057 $ .or.y.eq.0.or.y.eq.1 1058 $ .or.z.eq.0.or.z.eq.1 ) then 1059 h2(ii)=0. 1060 else 1061 h2(ii)=1. 1062 endif 1063 enddo 1064 enddo 1065 1066C Init ceed library 1067 call get_spec(spec) 1068 call ceedinit(trim(spec)//char(0),ceed,err) 1069 1070 call get_test(test) 1071 1072C Set up Nek geometry data 1073 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 1074 1075C Set up true soln 1076 call sin_fld_h1 (e1) 1077 call xmask1 (e1,h2,nelt) 1078 call copy (h1,e1,n) ! Save exact soln in h1 1079 1080C Set up solver parameters 1081 tol = 1e-10 1082 param(22) = tol 1083 maxit = 100 1084 1085 call nekgsync() 1086 1087C Create ceed basis for mesh and computation 1088 p=nx1 1089 q=p+1 1090 ncomp=1 1091 call ceedbasiscreatetensorh1lagrange(ceed,ldim,3*ncomp,p,q, 1092 $ ceed_gauss,basisx,err) 1093 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncomp,p,q, 1094 $ ceed_gauss,basisu,err) 1095 1096C Create ceed element restrictions for mesh and computation 1097 edof=p**ldim 1098 ldof=edof*nelt*ncomp 1099 ngeo=(ldim*(ldim+1))/2 1100 call ceedelemrestrictioncreateidentity(ceed,nelt,edof,ldof, 1101 $ ldim,erstrctx,err) 1102 call ceedelemrestrictioncreateidentity(ceed,nelt,edof,ldof, 1103 $ ncomp,erstrctu,err) 1104 call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim, 1105 $ nelt*q**ldim,ngeo,erstrctw,err) 1106 1107C Create ceed vectors 1108 call ceedvectorcreate(ceed,ldof,vec_p1,err) 1109 call ceedvectorcreate(ceed,ldof,vec_ap1,err) 1110 call ceedvectorcreate(ceed,ldof,vec_rhs,err) 1111 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 1112 call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err) 1113 1114 offset=0 1115 call ceedvectorsetarray(vec_coords,ceed_mem_host, 1116 $ ceed_use_pointer,coords,offset,err) 1117 1118C Create ceed qfunctions for diffsetupf and diffusionf 1119 call ceedqfunctioncreateinterior(ceed,1,diffsetupf, 1120 $ __FILE__ 1121 $ //':diffsetupf'//char(0),qf_setup,err) 1122 call ceedqfunctionaddinput(qf_setup,'x',ldim, 1123 $ ceed_eval_interp,err) 1124 call ceedqfunctionaddinput(qf_setup,'dx',ldim, 1125 $ ceed_eval_grad,err) 1126 call ceedqfunctionaddinput(qf_setup,'weight',1, 1127 $ ceed_eval_weight,err) 1128 call ceedqfunctionaddoutput(qf_setup,'rho',ngeo, 1129 $ ceed_eval_none,err) 1130 call ceedqfunctionaddoutput(qf_setup,'rhs',1, 1131 $ ceed_eval_interp,err) 1132 1133 call ceedqfunctioncreateinterior(ceed,1,diffusionf, 1134 $ __FILE__ 1135 $ //':diffusionf'//char(0),qf_diffusion,err) 1136 call ceedqfunctionaddinput(qf_diffusion,'u',1, 1137 $ ceed_eval_grad,err) 1138 call ceedqfunctionaddinput(qf_diffusion,'rho',ngeo, 1139 $ ceed_eval_none,err) 1140 call ceedqfunctionaddoutput(qf_diffusion,'v',1, 1141 $ ceed_eval_grad,err) 1142 1143C Create ceed operators 1144 call ceedoperatorcreate(ceed,qf_setup, 1145 $ ceed_null,ceed_null,op_setup,err) 1146 call ceedoperatorsetfield(op_setup,'x',erstrctx, 1147 $ ceed_notranspose,basisx,ceed_vector_active,err) 1148 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 1149 $ ceed_notranspose,basisx,ceed_vector_active,err) 1150 call ceedoperatorsetfield(op_setup,'weight',erstrctx, 1151 $ ceed_notranspose,basisx,ceed_vector_none,err) 1152 call ceedoperatorsetfield(op_setup,'rho',erstrctw, 1153 $ ceed_notranspose,ceed_basis_collocated, 1154 $ ceed_vector_active,err) 1155 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 1156 $ ceed_notranspose,basisu,vec_rhs,err) 1157 1158 call ceedoperatorcreate(ceed,qf_diffusion, 1159 $ ceed_null,ceed_null,op_diffusion,err) 1160 call ceedoperatorsetfield(op_diffusion,'u',erstrctu, 1161 $ ceed_notranspose,basisu,ceed_vector_active,err) 1162 call ceedoperatorsetfield(op_diffusion,'rho',erstrctw, 1163 $ ceed_notranspose,ceed_basis_collocated, 1164 $ vec_qdata,err) 1165 call ceedoperatorsetfield(op_diffusion,'v',erstrctu, 1166 $ ceed_notranspose,basisu,ceed_vector_active,err) 1167 1168C Compute setup data 1169 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 1170 $ ceed_use_pointer,r1,offset,err) 1171 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 1172 $ ceed_request_immediate,err) 1173 1174C Set up true RHS 1175 call dssum (r1,nx1,ny1,nz1) ! r1 1176 call xmask1 (r1,h2,nelt) 1177 1178C Set up algebraic RHS with libCEED 1179 call ceedvectorsetarray(vec_p1,ceed_mem_host, 1180 $ ceed_use_pointer,h1,offset,err) 1181 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 1182 $ ceed_use_pointer,r2,offset,err) 1183 call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1, 1184 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 1185 call dssum (r2,nx1,ny1,nz1) 1186 call xmask1 (r2,h2,nelt) 1187 1188C Set up algebraic RHS with Nek5000 1189 call axhm1 (pap,r3,h1,h1,h2,'bp3') ! r3 = A_nek5k*h1 1190 call dssum (r3,nx1,ny1,nz1) 1191 call xmask1 (r3,h2,nelt) 1192 1193 call nekgsync() 1194 1195C Solve true RHS 1196 if (nid.eq.0) write (6,*) "libCEED true RHS" 1197 tstart = dnekclock() 1198 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1199 $ vec_p1,vec_ap1,maxit,'bp3') 1200 tstop = dnekclock() 1201 1202C Output 1203 telaps = (tstop-tstart) 1204 maxits = maxit 1205 er1 = glrdif(u1,e1,n) 1206 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1207 1208 if (test.eq.1.and.nid.eq.0) then 1209 if (maxit>=100) then 1210 write(6,*) "UNCONVERGED CG" 1211 endif 1212 if (dabs(er1)>1e-3) then 1213 write(6,*) "ERROR IS TOO LARGE" 1214 endif 1215 endif 1216 1217 nx = nx1-1 1218 ndof = nelgt ! ndofs 1219 ndof = ndof*(nx**ldim) ! ndofs 1220 nppp = ndof/np ! ndofs/proc 1221 1222 dofpss = ndof/telaps ! DOF/sec - scalar form 1223 titers = telaps/maxits ! time per iteration 1224 tppp_s = titers/nppp ! time per iteraton per local point 1225 1226 if (nid.eq.0) write(6,1) 'case scalar:' 1227 $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s 1228 1229C Solve libCEED algebraic RHS 1230 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 1231 maxit = 100 1232 tstart = dnekclock() 1233 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1234 $ vec_p1,vec_ap1,maxit,'bp3') 1235 tstop = dnekclock() 1236 1237C Output 1238 telaps = (tstop-tstart) 1239 maxits = maxit 1240 er1 = glrdif(u1,e1,n) 1241 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1242 1243 if (test.eq.1.and.nid.eq.0) then 1244 if (maxit>=100) then 1245 write(6,*) "UNCONVERGED CG" 1246 endif 1247 if (dabs(er1)>1e-9) then 1248 write(6,*) "ERROR IS TOO LARGE" 1249 endif 1250 endif 1251 1252 nx = nx1-1 1253 ndof = nelgt ! ndofs 1254 ndof = ndof*(nx**ldim) ! ndofs 1255 nppp = ndof/np ! ndofs/proc 1256 1257 dofpss = ndof/telaps ! DOF/sec - scalar form 1258 titers = telaps/maxits ! time per iteration 1259 tppp_s = titers/nppp ! time per iteraton per local point 1260 1261 if (nid.eq.0) write(6,1) 'case scalar:' 1262 $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s 1263 1264C Solve Nek5000 algebraic RHS 1265 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 1266 maxit = 100 1267 tstart = dnekclock() 1268 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1269 $ vec_p1,vec_ap1,maxit,'bp3') 1270 tstop = dnekclock() 1271 1272C Output 1273 telaps = (tstop-tstart) 1274 maxits = maxit 1275 er1 = glrdif(u1,e1,n) 1276 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1277 1278 if (test.eq.1.and.nid.eq.0) then 1279 if (maxit>=100) then 1280 write(6,*) "UNCONVERGED CG" 1281 endif 1282 if (dabs(er1)>1e-9) then 1283 write(6,*) "ERROR IS TOO LARGE" 1284 endif 1285 endif 1286 1287 nx = nx1-1 1288 ndof = nelgt ! ndofs 1289 ndof = ndof*(nx**ldim) ! ndofs 1290 nppp = ndof/np ! ndofs/proc 1291 1292 dofpss = ndof/telaps ! DOF/sec - scalar form 1293 titers = telaps/maxits ! time per iteration 1294 tppp_s = titers/nppp ! time per iteraton per local point 1295 1296 if (nid.eq.0) write(6,1) 'case scalar:' 1297 $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s 1298 1299 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 1300 3 format(i3,i9,e12.4,1x,a8,i9) 1301 1302C Destroy ceed handles 1303 call ceedvectordestroy(vec_p1,err) 1304 call ceedvectordestroy(vec_ap1,err) 1305 call ceedvectordestroy(vec_rhs,err) 1306 call ceedvectordestroy(vec_qdata,err) 1307 call ceedvectordestroy(vec_coords,err) 1308 call ceedelemrestrictiondestroy(erstrctu,err) 1309 call ceedelemrestrictiondestroy(erstrctx,err) 1310 call ceedelemrestrictiondestroy(erstrctw,err) 1311 call ceedbasisdestroy(basisu,err) 1312 call ceedbasisdestroy(basisx,err) 1313 call ceedqfunctiondestroy(qf_setup,err) 1314 call ceedqfunctiondestroy(qf_diffusion,err) 1315 call ceedoperatordestroy(op_setup,err) 1316 call ceedoperatordestroy(op_diffusion,err) 1317 call ceeddestroy(ceed,err) 1318 1319 return 1320 end 1321C----------------------------------------------------------------------- 1322 subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1, 1323 $ vec_ap1,maxit,bpname) 1324C Scalar conjugate gradient iteration for solution of uncoupled 1325C Helmholtz equations 1326C Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname 1327C Output: u1,maxit 1328 include 'SIZE' 1329 include 'TOTAL' 1330 include 'DOMAIN' 1331 include 'FDMH1' 1332 character*3 bpname 1333 1334C INPUT: rhs1 - rhs 1335C h1 - exact solution 1336 1337 parameter (lt=lx1*ly1*lz1*lelt) 1338 parameter (ld=lxd*lyd*lzd*lelt) 1339 real*8 u1(lt),r1(lt),h1(lt),h2(lt) 1340 real*8 rmult(1),binv(1) 1341 integer ceed,ceed_op,vec_ap1,vec_p1 1342 common /scrcg/ dpc(lt),p1(lt),z1(lt) 1343 common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4) 1344 1345 real*8 ap1(lt) 1346 equivalence (ap1,z1) 1347 1348 vol = volfld(ifield) 1349 nel = nelfld(ifield) 1350 nxyz = lx1*ly1*lz1 1351 n = nxyz*nel 1352 nx = nx1-1 ! Polynomial order (just for i/o) 1353 1354 tol=tin 1355 1356 if(bpname.ne.'bp1') then 1357 call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner 1358 else 1359 call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner 1360 endif 1361 1362 call rzero (u1,n) ! Initialize solution 1363 1364 wv(1)=0 1365 do i=1,n 1366 s=rmult(i) ! -1 1367 p1(i)=dpc(i)*r1(i) ! p = M r T 1368 wv(1)=wv(1)+s*p1(i)*r1(i) ! r p 1369 enddo 1370 call gop(wv(1),wk,'+ ',1) 1371 rpp1(1) = wv (1) 1372 1373 do 1000 iter=1,maxit 1374 call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op, 1375 $ vec_ap1,vec_p1) 1376 call dssum (ap1,nx1,ny1,nz1) 1377 if (bpname.ne.'bp1') call xmask1(ap1,h2,nel) 1378 1379 call gop (pap,wk,'+ ',1) 1380 alph(1) = rpp1(1)/pap(1) 1381 1382 do i=1,n 1383 u1(i)=u1(i)+alph(1)* p1(i) 1384 r1(i)=r1(i)-alph(1)*ap1(i) 1385 enddo 1386 1387C tolerance check here 1388 call rzero(wv,2) 1389 do i=1,n 1390 wv(1)=wv(1)+r1(i)*r1(i) ! L2 error estimate 1391 z1(i)=dpc(i)*r1(i) ! z = M r 1392 wv(2)=wv(2)+rmult(i)*z1(i)*r1(i) ! r z 1393 enddo 1394 call gop(wv,wk,'+ ',2) 1395 1396C if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1) 1397 1 format(i2,i9,i5,i4,1p1e12.4,' cggos') 1398 1399 enorm=sqrt(wv(1)) 1400 if (enorm.lt.tol) then 1401 ifin = iter 1402 if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol 1403 goto 9999 1404 endif 1405C if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha' 1406 2 format(i5,1p3e12.4,2x,a5) 1407 1408 rpp2(1)=rpp1(1) 1409 rpp1(1)=wv (2) 1410 beta1 =rpp1(1)/rpp2(1) 1411 do i=1,n 1412 p1(i)=z1(i) + beta1*p1(i) 1413 enddo 1414 1415 1000 continue 1416 1417 rbnorm=sqrt(wv(1)) 1418 if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol 1419 iter = iter-1 1420 1421 9999 continue 1422 1423 maxit=iter 1424 1425 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4) 1426 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6) 1427 1428 return 1429 end 1430C----------------------------------------------------------------------- 1431 subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op, 1432 $ vec_ap1,vec_p1) 1433C Vector conjugate gradient matvec for solution of uncoupled 1434C Helmholtz equations 1435C Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1 1436C Output: ap1 1437 include 'SIZE' 1438 include 'TOTAL' 1439 include 'ceedf.h' 1440 1441 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1442 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1443 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1444 1445 real*8 pap(3) 1446 real*8 ap1(lx,lelt) 1447 real*8 p1(lx,lelt) 1448 real*8 h1(lx,lelt),h2(lx,lelt) 1449 integer ceed,ceed_op,vec_ap1,vec_p1,err 1450 integer i,e 1451 integer*8 offset 1452 1453 offset=0 1454 call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer, 1455 $ p1,offset,err) 1456 call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer, 1457 $ ap1,offset,err) 1458 1459 call ceedoperatorapply(ceed_op,vec_p1,vec_ap1, 1460 $ ceed_request_immediate,err) 1461 1462 call ceedvectorsyncarray(vec_ap1,ceed_mem_host,err) 1463 1464 pap(1)=0. 1465 1466 do e=1,nelt 1467 do i=1,lx 1468 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1469 enddo 1470 enddo 1471 1472 return 1473 end 1474C----------------------------------------------------------------------- 1475 subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut) 1476C Local matrix-vector for solution of BP3 (stiffness matrix) 1477C Input: u,g,h1,h2,b,ju,us,ut Output: w 1478 include 'SIZE' 1479 include 'TOTAL' 1480 1481 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1482 real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz) 1483 real*8 ju(lxyz),us(lxyz),ut(lxyz) 1484 1485 nxq = nx1+1 ! Number of quadrature points 1486 1487 lxyzq = nxq**ldim 1488 1489 call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation 1490 do i=1,lxyzq 1491 ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts 1492 enddo 1493 call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u 1494 1495 return 1496 end 1497C----------------------------------------------------------------------- 1498 subroutine axhm1_bp1(pap,ap1,p1,h1,h2) 1499C Vector conjugate gradient matvec for solution of BP1 (mass matrix) 1500C Input: pap,p1,h1,h2 Output: ap1 1501 include 'SIZE' 1502 include 'TOTAL' 1503 1504 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1505 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1506 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1507 1508 real*8 pap(3) 1509 real*8 ap1(lx,lelt) 1510 real*8 p1(lx,lelt) 1511 real*8 h1(lx,lelt),h2(lx,lelt) 1512 1513 real*8 ur(lx),us(lx),ut(lx) 1514 common /ctmp1/ ur,us,ut 1515 1516 integer e 1517 1518 pap(1)=0. 1519 1520 k=1 1521 nxq = nx1+1 1522 1523 do e=1,nelt 1524 1525 call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1) 1526 $ ,bm1(1,1,1,e),ur,us,ut) 1527 do i=1,lx 1528 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1529 enddo 1530 k=k+nxq*nxq*nxq 1531 1532 enddo 1533 1534 return 1535 end 1536C----------------------------------------------------------------------- 1537 subroutine ax_e_bp3(w,u,g,ur,us,ut,wk) 1538C Local matrix-vector for solution of BP3 (stiffness matrix) 1539C Input: u,g,ur,us,ut,wk Output: w 1540 include 'SIZE' 1541 include 'TOTAL' 1542 1543 parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq) 1544 1545 common /ctmp0/ tmp(lxyzq) 1546 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 1547 1548 real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq) 1549 real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq) 1550 1551 n = lzq-1 1552 1553 call intp_rstd (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation 1554 call loc_grad3 (ur,us,ut,wk,n,dxmq,dxtmq) 1555 1556 do i=1,lxyzq 1557 wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i) 1558 ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i) 1559 wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i) 1560 ur(i) = wr 1561 us(i) = ws 1562 ut(i) = wt 1563 enddo 1564 1565 call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp) 1566 call intp_rstd (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u 1567 1568 return 1569 end 1570C----------------------------------------------------------------------- 1571 subroutine axhm1_bp3(pap,ap1,p1,h1,h2) 1572C Vector conjugate gradient matvec for solution of BP3 (stiffness matrix) 1573C Input: pap,p1,h1,h2 Output: ap1 1574 include 'SIZE' 1575 include 'TOTAL' 1576 1577 parameter (lzq=lx1+1) 1578 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1579 common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq) 1580 1581 real*8 pap(3) 1582 real*8 ap1(lx,lelt) 1583 real*8 p1(lx,lelt) 1584 real*8 h1(lx,lelt),h2(lx,lelt) 1585 1586 common /ctmp1/ ur,us,ut,wk 1587 real*8 ur(lq),us(lq),ut(lq),wk(lq) 1588 1589 integer e 1590 1591 pap(1)=0. 1592 1593 do e=1,nelt 1594 1595 call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk) 1596 do i=1,lx 1597 pap(1)=pap(1)+p1(i,e)*ap1(i,e) 1598 enddo 1599 1600 enddo 1601 1602 return 1603 end 1604C----------------------------------------------------------------------- 1605 subroutine axhm1(pap,ap1,p1,h1,h2,bpname) 1606C Vector conjugate gradient matvec for solution of uncoupled 1607C Helmholtz equations 1608C Input: pap,p1,h1,h2,bpname Output: ap1 1609 include 'SIZE' 1610 include 'TOTAL' 1611 1612 parameter (lx=lx1*ly1*lz1) 1613 1614 real*8 pap(3),ap1(lx,lelt),p1(lx,lelt) 1615 real*8 h1(lx,lelt),h2(lx,lelt) 1616 1617 character*3 bpname 1618 1619 if (bpname.eq.'bp1') then 1620 call axhm1_bp1(pap,ap1,p1,h1,h2) 1621 1622 elseif (bpname.eq.'bp3') then 1623 call axhm1_bp3(pap,ap1,p1,h1,h2) 1624 1625 else 1626 write(6,*) bpname,' axhm1 bpname error' 1627 stop 1628 1629 endif 1630 1631 return 1632 end 1633C----------------------------------------------------------------------- 1634 subroutine get_bp(bp) 1635C Get BP to run 1636C Input: Output: bp 1637 integer i,bp 1638 character*64 bpval 1639 1640 bp=0 1641 if(iargc().ge.1) then 1642 call getarg(1,bpval) 1643 endif 1644 if(bpval.eq."bp1") then 1645 bp=1 1646 elseif(bpval.eq."bp3") then 1647 bp=3 1648 endif 1649 1650 return 1651 end 1652C----------------------------------------------------------------------- 1653 subroutine get_spec(spec) 1654C Get CEED backend specification 1655C Input: Output: spec 1656 integer i 1657 character*64 spec 1658 1659 spec = '/cpu/self' 1660 if(iargc().ge.2) then 1661 call getarg(2,spec) 1662 endif 1663 1664 return 1665 end 1666C----------------------------------------------------------------------- 1667 subroutine get_test(test) 1668C Get test mode flag 1669C Input: Output: test 1670 integer i,test 1671 character*64 testval 1672 1673 test=0 1674 if(iargc().ge.3) then 1675 call getarg(3,testval) 1676 endif 1677 if(testval.eq."test") then 1678 test=1 1679 endif 1680 1681 return 1682 end 1683C----------------------------------------------------------------------- 1684