1C Copyright (c) 2017-2026, Lawrence Livermore National Security, LLC and other CEED contributors 2C All Rights Reserved. See the top-level COPYRIGHT and NOTICE files for details. 3C 4C SPDX-License-Identifier: (BSD-2-Clause) 5C 6C This file is part of CEED: http://github.com/ceed 7 8C> @file 9C> Mass and diffusion operators examples using Nek5000 10C_TESTARGS(name="BP1") -c {ceed_resource} -e bp1 -n 1 -b 4 -test 11C_TESTARGS(name="BP3") -c {ceed_resource} -e bp3 -n 1 -b 4 -test 12 13C----------------------------------------------------------------------- 14 subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 15 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 16 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 17C Set up mass operator 18C Input: u1,u2,u3,q Output: v1,v2,ierr 19 integer q,ierr 20 real*8 ctx(1) 21 real*8 u1(3*q) 22 real*8 u2(9*q) 23 real*8 u3(q) 24 real*8 v1(q) 25 real*8 v2(q) 26 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 27 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 28 real*8 jacmq 29 30C Quadrature Point Loop 31 do i=1,q 32 a11=u2(i+q*0) 33 a12=u2(i+q*3) 34 a13=u2(i+q*6) 35 36 a21=u2(i+q*1) 37 a22=u2(i+q*4) 38 a23=u2(i+q*7) 39 40 a31=u2(i+q*2) 41 a32=u2(i+q*5) 42 a33=u2(i+q*8) 43 44 g11 = (a22*a33-a23*a32) 45 g12 = (a13*a32-a33*a12) 46 g13 = (a12*a23-a22*a13) 47 48 g21 = (a23*a31-a21*a33) 49 g22 = (a11*a33-a31*a13) 50 g23 = (a13*a21-a23*a11) 51 52 g31 = (a21*a32-a22*a31) 53 g32 = (a12*a31-a32*a11) 54 g33 = (a11*a22-a21*a12) 55 56 jacmq = a11*g11+a21*g12+a31*g13 57 58C Rho 59 v1(i)=u3(i)*jacmq 60 61C RHS 62 v2(i)=u3(i)*jacmq 63 $ *dsqrt(u1(i+q*0)*u1(i+q*0) 64 $ +u1(i+q*1)*u1(i+q*1) 65 $ +u1(i+q*2)*u1(i+q*2)) 66 enddo 67 68 ierr=0 69 end 70C----------------------------------------------------------------------- 71 subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 72 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 73 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 74C Apply mass operator 75C Input: u1,u2,q Output: v1,ierr 76 integer q,ierr 77 real*8 ctx(1) 78 real*8 u1(q) 79 real*8 u2(q) 80 real*8 v1(q) 81 82C Quadrature Point Loop 83 do i=1,q 84 v1(i)=u2(i)*u1(i) 85 enddo 86 87 ierr=0 88 end 89C----------------------------------------------------------------------- 90 subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 91 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 92 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 93C Set up diffusion operator 94C Input: u1,u2,u3,q Output: v1,v2,ierr 95 integer q,ierr 96 real*8 ctx(1) 97 real*8 u1(3*q) 98 real*8 u2(9*q) 99 real*8 u3(q) 100 real*8 v1(6*q) 101 real*8 v2(q) 102 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 103 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 104 real*8 jacmq,scl 105 real*8 c(3),k(3) 106 107C Quadrature Point Loop 108 do i=1,q 109 pi = 3.14159265358979323846 110 111 c(1)=0. 112 c(2)=1. 113 c(3)=2. 114 k(1)=1. 115 k(2)=2. 116 k(3)=3. 117 118 a11=u2(i+q*0) 119 a12=u2(i+q*3) 120 a13=u2(i+q*6) 121 122 a21=u2(i+q*1) 123 a22=u2(i+q*4) 124 a23=u2(i+q*7) 125 126 a31=u2(i+q*2) 127 a32=u2(i+q*5) 128 a33=u2(i+q*8) 129 130 g11 = (a22*a33-a23*a32) 131 g12 = (a13*a32-a33*a12) 132 g13 = (a12*a23-a22*a13) 133 134 g21 = (a23*a31-a21*a33) 135 g22 = (a11*a33-a31*a13) 136 g23 = (a13*a21-a23*a11) 137 138 g31 = (a21*a32-a22*a31) 139 g32 = (a12*a31-a32*a11) 140 g33 = (a11*a22-a21*a12) 141 142 jacmq = a11*g11+a21*g12+a31*g13 143 144 scl = u3(i)/jacmq 145 146C Geometric factors 147C Stored in Voigt convention 148C 0 5 4 149C 5 1 3 150C 4 3 2 151 v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr 152 v1(i+1*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss 153 v1(i+2*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt 154 v1(i+3*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst 155 v1(i+4*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt 156 v1(i+5*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs 157 158C RHS 159 v2(i) = u3(i)*jacmq*pi*pi 160 $ *dsin(pi*(c(1)+k(1)*u1(i+0*q))) 161 $ *dsin(pi*(c(2)+k(2)*u1(i+1*q))) 162 $ *dsin(pi*(c(3)+k(3)*u1(i+2*q))) 163 $ *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3)) 164 165 enddo 166 167 ierr=0 168 end 169C----------------------------------------------------------------------- 170 subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7, 171 $ u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8, 172 $ v9,v10,v11,v12,v13,v14,v15,v16,ierr) 173C Apply diffusion operator 174C Input: u1,u2,q Output: v1,ierr 175 integer q,ierr 176 real*8 ctx(1) 177 real*8 u1(3*q) 178 real*8 u2(6*q) 179 real*8 v1(3*q) 180 181C Quadrature Point Loop 182 do i=1,q 183 v1(i+0*q)= 184 $ u2(i+0*q)*u1(i)+u2(i+5*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q) 185 v1(i+1*q)= 186 $ u2(i+5*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+3*q)*u1(i+2*q) 187 v1(i+2*q)= 188 $ u2(i+4*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q) 189 enddo 190 191 ierr=0 192 end 193C----------------------------------------------------------------------- 194 subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq) 195C Set h2 as rhoJac 196C Input: bmq,nxq Output: h2 197 include 'SIZE' 198 real*8 h2(1),bmq(1) 199 200 common /ctmp77/ wd(lxd),zd(lxd) 201 integer e,i,L 202 203 call zwgl(zd,wd,nxq) ! nxq = number of points 204 205 q = 1.0 ! Later, this can be a function of position... 206 207 L = 0 208 do e=1,lelt 209 do i=1,nxq**ldim 210 L=L+1 211 h2(L) = q*bmq(L) 212 enddo 213 enddo 214 215 return 216 end 217C----------------------------------------------------------------------- 218 subroutine dist_fld_h1(e) 219C Set distance initial condition for BP1 220C Input: Output: e 221 include 'SIZE' 222 include 'TOTAL' 223 real*8 x,y,z 224 real*8 e(1) 225 226 n=lx1*ly1*lz1*nelt 227 228 do i=1,n 229 x=xm1(i,1,1,1) 230 y=ym1(i,1,1,1) 231 z=zm1(i,1,1,1) 232 233 e(i) = dsqrt(x*x+y*y+z*z) 234 enddo 235 236 call dsavg(e) ! This is requisite for random fields 237 238 return 239 end 240C----------------------------------------------------------------------- 241 subroutine sin_fld_h1(e) 242C Set sine initial condition for BP3 243C Input: Output: e 244 include 'SIZE' 245 include 'TOTAL' 246 real*8 x,y,z 247 real*8 e(1) 248 real*8 c(3),k(3) 249 250 n=lx1*ly1*lz1*nelt 251 pi = 3.14159265358979323846 252 253 c(1)=0. 254 c(2)=1. 255 c(3)=2. 256 k(1)=1. 257 k(2)=2. 258 k(3)=3. 259 260 do i=1,n 261 x=xm1(i,1,1,1) 262 y=ym1(i,1,1,1) 263 z=zm1(i,1,1,1) 264 265 e(i) = dsin(pi*(c(1)+k(1)*x)) 266 & *dsin(pi*(c(2)+k(2)*y)) 267 & *dsin(pi*(c(3)+k(3)*z)) 268 269 enddo 270 271 call dsavg(e) ! This is requisite for random fields 272 273 return 274 end 275C----------------------------------------------------------------------- 276 subroutine uservp(ix,iy,iz,eg) ! set variable properties 277 include 'SIZE' 278 include 'TOTAL' 279 include 'NEKUSE' 280 integer e,f,eg 281C e = gllel(eg) 282 283 udiff = 0.0 284 utrans = 0.0 285 286 return 287 end 288C----------------------------------------------------------------------- 289 subroutine userf(ix,iy,iz,eg) ! set acceleration term 290C 291C Note: this is an acceleration term, NOT a force! 292C Thus, ffx will subsequently be multiplied by rho(x,t). 293C 294 include 'SIZE' 295 include 'TOTAL' 296 include 'NEKUSE' 297 integer e,f,eg 298C e = gllel(eg) 299 300 ffx = 0.0 301 ffy = 0.0 302 ffz = 0.0 303 304 return 305 end 306C----------------------------------------------------------------------- 307 subroutine userq(i,j,k,eg) ! set source term 308 include 'SIZE' 309 include 'TOTAL' 310 include 'NEKUSE' 311 integer e,f,eg 312 e = gllel(eg) 313 314 qvol = 0 315 316 return 317 end 318C----------------------------------------------------------------------- 319 subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions 320C NOTE ::: This subroutine MAY NOT be called by every process 321 include 'SIZE' 322 include 'TOTAL' 323 include 'NEKUSE' 324 integer e,f,eg 325 326 ux = 0.0 327 uy = 0.0 328 uz = 0.0 329 temp = 0.0 330 331 return 332 end 333C----------------------------------------------------------------------- 334 subroutine useric(ix,iy,iz,eg) ! set up initial conditions 335 include 'SIZE' 336 include 'TOTAL' 337 include 'NEKUSE' 338 integer e,f,eg 339 340 ux = 0.0 341 uy = 0.0 342 uz = 0.0 343 temp = 0.0 344 345 return 346 end 347C----------------------------------------------------------------------- 348 subroutine usrdat ! This routine to modify element vertices 349 include 'SIZE' 350 include 'TOTAL' 351 352 return 353 end 354C----------------------------------------------------------------------- 355 subroutine usrdat2 ! This routine to modify mesh coordinates 356 include 'SIZE' 357 include 'TOTAL' 358 359 x0 = 0 360 x1 = 1 361 call rescale_x(xm1,x0,x1) 362 call rescale_x(ym1,x0,x1) 363 call rescale_x(zm1,x0,x1) 364 365 param(59)=1 ! Force Nek to use the "deformed element" formulation 366 367 return 368 end 369C----------------------------------------------------------------------- 370 subroutine usrdat3 371 include 'SIZE' 372 include 'TOTAL' 373 374 return 375 end 376C----------------------------------------------------------------------- 377 subroutine xmask1 (r1,h2,nel) 378C Apply zero Dirichlet boundary conditions 379C Input: h2,nel Output: r1 380 include 'SIZE' 381 include 'TOTAL' 382 real*8 r1(1),h2(1) 383 384 n=nx1*ny1*nz1*nel 385 do i=1,n 386 r1(i)=r1(i)*h2(i) 387 enddo 388 389 return 390 end 391C----------------------------------------------------------------------- 392 function glrdif(x,y,n) 393C Compute Linfty norm of (x-y) 394C Input: x,y Output: n 395 real*8 x(n),y(n) 396 397 dmx=0 398 xmx=0 399 ymx=0 400 401 do i=1,n 402 diff=abs(x(i)-y(i)) 403 dmx =max(dmx,diff) 404 xmx =max(xmx,x(i)) 405 ymx =max(ymx,y(i)) 406 enddo 407 408 xmx = max(xmx,ymx) 409 dmx = glmax(dmx,1) ! max across processors 410 xmx = glmax(xmx,1) 411 412 if (xmx.gt.0) then 413 glrdif = dmx/xmx 414 else 415 glrdif = -dmx ! Negative indicates something strange 416 endif 417 418 return 419 end 420C----------------------------------------------------------------------- 421 subroutine loc_grad3(ur,us,ut,u,N,D,Dt) 422C 3D transpose of local gradient 423C Input: u,N,D,Dt Output: ur,us,ut 424 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 425 real*8 u (0:N,0:N,0:N) 426 real*8 D (0:N,0:N),Dt(0:N,0:N) 427 428 m1 = N+1 429 m2 = m1*m1 430 431 call mxm(D ,m1,u(0,0,0),m1,ur,m2) 432 do k=0,N 433 call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1) 434 enddo 435 call mxm(u(0,0,0),m2,Dt,m1,ut,m1) 436 437 return 438 end 439c----------------------------------------------------------------------- 440 subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w) 441C 3D transpose of local gradient 442C Input: ur,us,ut,N,D,Dt Output: u 443 real*8 u (0:N,0:N,0:N) 444 real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N) 445 real*8 D (0:N,0:N),Dt(0:N,0:N) 446 real*8 w (0:N,0:N,0:N) 447 448 m1 = N+1 449 m2 = m1*m1 450 m3 = m1*m1*m1 451 452 call mxm(Dt,m1,ur,m1,u(0,0,0),m2) 453 do k=0,N 454 call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1) 455 enddo 456 call add2(u(0,0,0),w,m3) 457 call mxm(ut,m2,D ,m1,w,m1) 458 call add2(u(0,0,0),w,m3) 459 460 return 461 end 462C----------------------------------------------------------------------- 463 subroutine geodatq(gf,bmq,w3mq,nxq) 464C Routine to generate elemental geometric matrices on mesh 1 465C (Gauss-Legendre Lobatto mesh). 466 include 'SIZE' 467 include 'TOTAL' 468 469 parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim) 470 471 real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq) 472 473 common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd) 474 common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd) 475 $ , zr(lxyd),zs(lxyd),zt(lxyd) 476 477 common /ctmp77/ wd(lxd),zd(lxd) 478 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 479 480 integer e 481 real*8 tmp(lxyd) 482 real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33 483 real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33 484 real*8 jacmq 485 486 if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq) 487 488 call zwgl (zd,wd,lzq) ! nxq = number of points 489 call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp) 490 491 do k=1,nxq 492 do j=1,nxq 493 do i=1,nxq 494 w3mq(i,j,k) = wd(i)*wd(j)*wd(k) 495 enddo 496 enddo 497 enddo 498 499 nxyzq = nxq**ldim 500 nxqm1 = lzq-1 501 502 do e=1,nelt 503 call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation 504 call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq) 505 506 call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0) 507 call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq) 508 509 call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0) 510 call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq) 511 512 do i=1,nxyzq 513 a11 = xr(i) 514 a12 = xs(i) 515 a13 = xt(i) 516 517 a21 = yr(i) 518 a22 = ys(i) 519 a23 = yt(i) 520 521 a31 = zr(i) 522 a32 = zs(i) 523 a33 = zt(i) 524 525 g11 = (a22*a33-a23*a32) 526 g12 = (a13*a32-a33*a12) 527 g13 = (a12*a23-a22*a13) 528 529 g21 = (a23*a31-a21*a33) 530 g22 = (a11*a33-a31*a13) 531 g23 = (a13*a21-a23*a11) 532 533 g31 = (a21*a32-a22*a31) 534 g32 = (a12*a31-a32*a11) 535 g33 = (a11*a22-a21*a12) 536 537 jacmq = a11*g11+a21*g12+a31*g13 538 539 bmq(i,e) = w3mq(i,1,1)*jacmq 540 scale = w3mq(i,1,1)/jacmq 541 542 gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr 543 gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs 544 gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt 545 gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss 546 gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst 547 gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt 548 549 enddo 550 enddo 551 552 return 553 end 554C----------------------------------------------------------------------- 555 subroutine setprecn_bp1 (d,h1,h2) 556C Generate diagonal preconditioner for Helmholtz operator 557C Input: h1,h2 Output: d 558 include 'SIZE' 559 include 'TOTAL' 560 561 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 562 563 real*8 d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt) 564 integer e 565 566 real*8 gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data 567 equivalence (gf,g1m1) ! layout to g1m1...g6m1 568 569 real*8 ysm1(ly1) 570 571 nel = nelfld(ifield) 572 n = nel*lx1*ly1*lz1 573 nxyz = lx1*ly1*lz1 574 575 call copy (d,bm1,n) ! Mass matrix preconditioning full mass matrix 576 call dssum (d,nx1,ny1,nz1) 577 call invcol1 (d,n) 578 return 579 580 call dsset(lx1,ly1,lz1) 581 582 do 1000 e=1,nel 583 584 call rzero(d(1,1,1,e),nxyz) 585 586 do 320 iz=1,lz1 587 do 320 iy=1,ly1 588 do 320 ix=1,lx1 589 do 320 iq=1,lx1 590 d(ix,iy,iz,e) = d(ix,iy,iz,e) 591 $ + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2 592 $ + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2 593 $ + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2 594 320 continue 595C 596C Add cross terms if element is deformed. 597C 598 if (lxyz.gt.0) then 599 600 do i2=1,ly1,ly1-1 601 do i1=1,lx1,lx1-1 602 d(1,i1,i2,e) = d(1,i1,i2,e) 603 $ + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1) 604 $ + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2) 605 d(lx1,i1,i2,e) = d(lx1,i1,i2,e) 606 $ + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1) 607 $ + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2) 608 d(i1,1,i2,e) = d(i1,1,i2,e) 609 $ + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1) 610 $ + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2) 611 d(i1,ly1,i2,e) = d(i1,ly1,i2,e) 612 $ + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1) 613 $ + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2) 614 d(i1,i2,1,e) = d(i1,i2,1,e) 615 $ + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1) 616 $ + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2) 617 d(i1,i2,lz1,e) = d(i1,i2,lz1,e) 618 $ + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1) 619 $ + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2) 620 621 enddo 622 enddo 623 endif 624 625 do i=1,lxyz 626 d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e) 627 enddo 628 629 1000 continue ! element loop 630 631C If axisymmetric, add a diagonal term in the radial direction (ISD=2) 632 633 if (ifaxis.and.(isd.eq.2)) then 634 do 1200 e=1,nel 635 if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1) 636 k=0 637 do 1190 j=1,ly1 638 do 1190 i=1,lx1 639 k=k+1 640 if (ym1(i,j,1,e).ne.0.) then 641 term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2 642 if (ifrzer(e)) then 643 term2 = wxm1(i)*wam1(1)*dam1(1,j) 644 $ *jacm1(i,1,1,e)/ysm1(i) 645 else 646 term2 = 0. 647 endif 648 d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2) 649 endif 650 1190 continue 651 1200 continue 652 653 endif 654 call dssum (d,nx1,ny1,nz1) 655 call invcol1 (d,n) 656 657 if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1) 658 1 format(i9,1p4e12.4,' diag prec') 659 660 return 661 end 662C----------------------------------------------------------------------- 663 subroutine setprecn_bp3 (d,h1,h2) 664C Generate dummy diagonal preconditioner for Helmholtz operator 665C Input: h1,h2 Output: d 666 include 'SIZE' 667 include 'TOTAL' 668 669 parameter (n=lx1*ly1*lz1*lelt) 670 real*8 d(n),h1(n),h2(n) 671 672 call rone (d,n) 673 674 return 675 end 676C----------------------------------------------------------------------- 677 subroutine userchk 678 include 'SIZE' 679 include 'TOTAL' 680 681 integer bp 682 683 call get_bp(bp) 684 685 if (bp==1) then 686 if (istep.gt.0) call bp1 687 elseif (bp==3) then 688 if (istep.gt.0) call bp3 689 else 690 write(6,*) "INVALID BP SPECIFICED" 691 endif 692 693 return 694 end 695C----------------------------------------------------------------------- 696 subroutine bp1 697C Solution to BP1 using libCEED 698 include 'SIZE' 699 include 'TOTAL' 700 include 'CTIMER' ! ifsync 701 include 'FDMH1' 702 include 'ceed/fortran.h' 703 704 parameter (lzq=lx1+1) 705 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 706 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 707 708 parameter (lt=lx1*ly1*lz1*lelt) 709 parameter (ld=lxd*lyd*lzd*lelt) 710 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 711 common /vcrny/ e1(lt) 712 common /vcrvh/ h1(lt),h2(lx*lelt),pap(3) 713 real*8 coords(ldim*lx*lelt) 714 715 logical ifh3 716 integer*8 nnode 717 integer ceed,err,test 718 character*64 spec 719 720 integer p,q,ncompx,ncompu,enode,lnode 721 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 722 integer stridesu(3),stridesx(3),stridesw(3) 723 integer erstrctu,erstrctx,erstrctw 724 integer basisu,basisx 725 integer qf_mass,qf_setup 726 integer op_mass,op_setup 727 real*8 x,y,z 728 integer*8 offset 729 730 external massf,masssetupf 731 732 ifield = 1 733 nxq = nx1+1 734 n = nx1*ny1*nz1*nelt 735 736 ifsync = .false. 737 738C Set up coordinates 739 ii=0 740 do j=0,nelt-1 741 do i=1,lx 742 ii=ii+1 743 x = xm1(ii,1,1,1) 744 y = ym1(ii,1,1,1) 745 z = zm1(ii,1,1,1) 746 coords(i+0*lx+3*j*lx)=x 747 coords(i+1*lx+3*j*lx)=y 748 coords(i+2*lx+3*j*lx)=z 749 enddo 750 enddo 751 752C Init ceed library 753 call get_spec(spec) 754 call ceedinit(trim(spec)//char(0),ceed,err) 755 756 call get_test(test) 757 758C Set up Nek geometry data 759 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 760 call set_h2_as_rhoJac_GL(h2,bmq,nxq) 761 762C Set up true soln 763 call dist_fld_h1 (e1) 764 call copy (h1,e1,n) ! Save exact soln in h1 765 766C Set up solver parameters 767 tol = 1e-10 768 param(22) = tol 769 maxit = 100 770 771 call nekgsync() 772 773C Create ceed basis for mesh and computation 774 p=nx1 775 q=p+1 776 ncompu=1 777 ncompx=ldim 778 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q, 779 $ ceed_gauss,basisx,err) 780 call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q, 781 $ ceed_gauss,basisu,err) 782 783C Create ceed element restrictions for mesh and computation 784 enode=p**ldim 785 lnode=enode*nelt*ncompu 786 stridesx=[1,enode,enode*ldim] 787 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim, 788 $ ldim*lnode,stridesx,erstrctx,err) 789 stridesu=[1,enode,enode*ncompu] 790 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu, 791 $ ncompu*lnode,stridesu,erstrctu,err) 792 call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim, 793 $ 1,nelt*q**ldim,ceed_strides_backend,erstrctw,err) 794 795C Create ceed vectors 796 call ceedvectorcreate(ceed,lnode,vec_p1,err) 797 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 798 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 799 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 800 call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err) 801 802 offset=0 803 call ceedvectorsetarray(vec_coords,ceed_mem_host, 804 $ ceed_use_pointer,coords,offset,err) 805 806C Create ceed qfunctions for masssetupf and massf 807 call ceedqfunctioncreateinterior(ceed,1,masssetupf, 808 $ EXAMPLE_DIR 809 $ //'bps/bps.h:masssetupf',qf_setup,err) 810 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 811 $ ceed_eval_interp,err) 812 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 813 $ ceed_eval_grad,err) 814 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 815 $ ceed_eval_weight,err) 816 call ceedqfunctionaddoutput(qf_setup,'qdata',ncompu, 817 $ ceed_eval_none,err) 818 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 819 $ ceed_eval_interp,err) 820 821 call ceedqfunctioncreateinterior(ceed,1,massf, 822 $ EXAMPLE_DIR 823 $ //'bps/bps.h:massf',qf_mass,err) 824 call ceedqfunctionaddinput(qf_mass,'u',ncompu, 825 $ ceed_eval_interp,err) 826 call ceedqfunctionaddinput(qf_mass,'qdata',ncompu, 827 $ ceed_eval_none,err) 828 call ceedqfunctionaddoutput(qf_mass,'v',ncompu, 829 $ ceed_eval_interp,err) 830 831C Create ceed operators 832 call ceedoperatorcreate(ceed,qf_setup, 833 $ ceed_qfunction_none,ceed_qfunction_none,op_setup,err) 834 call ceedoperatorsetfield(op_setup,'x',erstrctx, 835 $ basisx,ceed_vector_active,err) 836 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 837 $ basisx,ceed_vector_active,err) 838 call ceedoperatorsetfield(op_setup,'weight', 839 $ ceed_elemrestriction_none,basisx,ceed_vector_none,err) 840 call ceedoperatorsetfield(op_setup,'qdata',erstrctw, 841 $ CEED_BASIS_NONE,ceed_vector_active,err) 842 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 843 $ basisu,vec_rhs,err) 844 845 call ceedoperatorcreate(ceed,qf_mass, 846 $ ceed_qfunction_none,ceed_qfunction_none,op_mass,err) 847 call ceedoperatorsetfield(op_mass,'u',erstrctu, 848 $ basisu,ceed_vector_active,err) 849 call ceedoperatorsetfield(op_mass,'qdata',erstrctw, 850 $ CEED_BASIS_NONE,vec_qdata,err) 851 call ceedoperatorsetfield(op_mass,'v',erstrctu, 852 $ basisu,ceed_vector_active,err) 853 854C Compute setup data 855 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 856 $ ceed_use_pointer,r1,offset,err) 857 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 858 $ ceed_request_immediate,err) 859 860C Set up true RHS 861 call dssum (r1,nx1,ny1,nz1) ! r1 862 863C Set up algebraic RHS with libCEED 864 call ceedvectorsetarray(vec_p1,ceed_mem_host, 865 $ ceed_use_pointer,h1,offset,err) 866 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 867 $ ceed_use_pointer,r2,offset,err) 868 call ceedoperatorapply(op_mass,vec_p1,vec_ap1, 869 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 870 call dssum (r2,nx1,ny1,nz1) 871 872C Set up algebraic RHS with Nek5000 873 call axhm1 (pap,r3,h1,h1,h2,'bp1') ! r3 = A_nek5k*h1 874 call dssum (r3,nx1,ny1,nz1) 875 876 call nekgsync() 877 878C Solve true RHS 879 if (nid.eq.0) write (6,*) "libCEED true RHS" 880 tstart = dnekclock() 881 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass, 882 $ vec_p1,vec_ap1,maxit,'bp1') 883 tstop = dnekclock() 884 885C Output 886 telaps = (tstop-tstart) 887 maxits = maxit 888 er1 = glrdif(u1,e1,n) 889 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 890 891 if (test.eq.1.and.nid.eq.0) then 892 if (maxit>=100) then 893 write(6,*) "UNCONVERGED CG" 894 endif 895 if (dabs(er1)>5e-3) then 896 write(6,*) "ERROR IS TOO LARGE" 897 endif 898 endif 899 900 nx = nx1-1 901 nnode = nelgt ! nnodes 902 nnode = nnode*(nx**ldim) ! nnodes 903 nppp = nnode/np ! nnodes/proc 904 905 dofps = nnode/telaps ! DOF/sec - scalar form 906 titers = telaps/maxits ! time per iteration 907 tppp_s = titers/nppp ! time per iteraton per local point 908 909 if (nid.eq.0) write(6,1) 'case scalar:' 910 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 911 912C Solve libCEED algebraic RHS 913 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 914 maxit = 100 915 tstart = dnekclock() 916 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass, 917 $ vec_p1,vec_ap1,maxit,'bp1') 918 tstop = dnekclock() 919 920C Output 921 telaps = (tstop-tstart) 922 maxits = maxit 923 er1 = glrdif(u1,e1,n) 924 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 925 926 if (test.eq.1.and.nid.eq.0) then 927 if (maxit>=100) then 928 write(6,*) "UNCONVERGED CG" 929 endif 930 if (dabs(er1)>1e-5) then 931 write(6,*) "ERROR IS TOO LARGE" 932 endif 933 endif 934 935 nx = nx1-1 936 nnode = nelgt ! nnodes 937 nnode = nnode*(nx**ldim) ! nnodes 938 nppp = nnode/np ! nnodes/proc 939 940 dofps = nnode/telaps ! DOF/sec - scalar form 941 titers = telaps/maxits ! time per iteration 942 tppp_s = titers/nppp ! time per iteraton per local point 943 944 if (nid.eq.0) write(6,1) 'case scalar:' 945 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 946 947C Solve Nek5000 algebraic RHS 948 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 949 maxit = 100 950 tstart = dnekclock() 951 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass, 952 $ vec_p1,vec_ap1,maxit,'bp1') 953 tstop = dnekclock() 954 955C Output 956 telaps = (tstop-tstart) 957 maxits = maxit 958 er1 = glrdif(u1,e1,n) 959 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 960 961 if (test.eq.1.and.nid.eq.0) then 962 if (maxit>=100) then 963 write(6,*) "UNCONVERGED CG" 964 endif 965 if (dabs(er1)>1e-5) then 966 write(6,*) "ERROR IS TOO LARGE" 967 endif 968 endif 969 970 nx = nx1-1 971 nnode = nelgt ! nnodes 972 nnode = nnode*(nx**ldim) ! nnodes 973 nppp = nnode/np ! nnodes/proc 974 975 dofps = nnode/telaps ! DOF/sec - scalar form 976 titers = telaps/maxits ! time per iteration 977 tppp_s = titers/nppp ! time per iteraton per local point 978 979 if (nid.eq.0) write(6,1) 'case scalar:' 980 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 981 982 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 983 3 format(i3,i9,e12.4,1x,a8,i9) 984 985C Destroy ceed handles 986 call ceedvectordestroy(vec_p1,err) 987 call ceedvectordestroy(vec_ap1,err) 988 call ceedvectordestroy(vec_rhs,err) 989 call ceedvectordestroy(vec_qdata,err) 990 call ceedvectordestroy(vec_coords,err) 991 call ceedelemrestrictiondestroy(erstrctu,err) 992 call ceedelemrestrictiondestroy(erstrctx,err) 993 call ceedelemrestrictiondestroy(erstrctw,err) 994 call ceedbasisdestroy(basisu,err) 995 call ceedbasisdestroy(basisx,err) 996 call ceedqfunctiondestroy(qf_setup,err) 997 call ceedqfunctiondestroy(qf_mass,err) 998 call ceedoperatordestroy(op_setup,err) 999 call ceedoperatordestroy(op_mass,err) 1000 call ceeddestroy(ceed,err) 1001 1002 return 1003 end 1004C----------------------------------------------------------------------- 1005 subroutine bp3 1006C Solution to BP3 using libCEED 1007 include 'SIZE' 1008 include 'TOTAL' 1009 include 'CTIMER' ! ifsync 1010 include 'FDMH1' 1011 include 'ceed/fortran.h' 1012 1013 parameter (lzq=lx1+1) 1014 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1015 common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq) 1016 1017 parameter (lt=lx1*ly1*lz1*lelt) 1018 parameter (ld=lxd*lyd*lzd*lelt) 1019 common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt) 1020 common /vcrny/ e1(lt) 1021 common /vcrvh/ h1(lt),h2(ld),pap(3) 1022 real*8 coords(ldim*lx*lelt) 1023 1024 logical ifh3 1025 integer*8 nnode 1026 integer ceed,err,test 1027 character*64 spec 1028 1029 integer p,q,ncompx,ncompu,enode,lnode 1030 integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs 1031 integer stridesu(3),stridesx(3),stridesw(3) 1032 integer erstrctu,erstrctx,erstrctw 1033 integer basisu,basisx 1034 integer qf_diffusion,qf_setup 1035 integer op_diffusion,op_setup 1036 integer ii,i,ngeo 1037 real*8 x,y,z 1038 integer*8 offset 1039 1040 external diffusionf,diffsetupf 1041 1042 ifield = 1 1043 nxq = nx1+1 1044 n = nx1*ny1*nz1*nelt 1045 1046 ifsync = .false. 1047 1048C Set up coordinates and mask 1049 ii=0 1050 do j=0,nelt-1 1051 do i=1,lx 1052 ii=ii+1 1053 x = xm1(ii,1,1,1) 1054 y = ym1(ii,1,1,1) 1055 z = zm1(ii,1,1,1) 1056 coords(i+0*lx+3*j*lx)=x 1057 coords(i+1*lx+3*j*lx)=y 1058 coords(i+2*lx+3*j*lx)=z 1059 if ( x.eq.0.or.x.eq.1 1060 $ .or.y.eq.0.or.y.eq.1 1061 $ .or.z.eq.0.or.z.eq.1 ) then 1062 h2(ii)=0. 1063 else 1064 h2(ii)=1. 1065 endif 1066 enddo 1067 enddo 1068 1069C Init ceed library 1070 call get_spec(spec) 1071 call ceedinit(trim(spec)//char(0),ceed,err) 1072 1073 call get_test(test) 1074 1075C Set up Nek geometry data 1076 call geodatq (gf,bmq,w3mq,nxq) ! Set up gf() arrays 1077 1078C Set up true soln 1079 call sin_fld_h1 (e1) 1080 call xmask1 (e1,h2,nelt) 1081 call copy (h1,e1,n) ! Save exact soln in h1 1082 1083C Set up solver parameters 1084 tol = 1e-10 1085 param(22) = tol 1086 maxit = 100 1087 1088 call nekgsync() 1089 1090C Create ceed basis for mesh and computation 1091 p=nx1 1092 q=p+1 1093 ncompu=1 1094 ncompx=ldim 1095 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q, 1096 $ ceed_gauss,basisx,err) 1097 call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q, 1098 $ ceed_gauss,basisu,err) 1099 1100C Create ceed element restrictions for mesh and computation 1101 enode=p**ldim 1102 lnode=enode*nelt*ncompu 1103 ngeo=(ldim*(ldim+1))/2 1104 stridesx=[1,enode,enode*ldim] 1105 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim, 1106 $ ldim*lnode,stridesx,erstrctx,err) 1107 stridesu=[1,enode,enode*ncompu] 1108 call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu, 1109 $ ncompu*lnode,stridesu,erstrctu,err) 1110 stridesw=[1,q**ldim,ngeo*q**ldim] 1111 call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim, 1112 $ ngeo,nelt*q**ldim*ngeo,stridesw,erstrctw,err) 1113 1114C Create ceed vectors 1115 call ceedvectorcreate(ceed,lnode,vec_p1,err) 1116 call ceedvectorcreate(ceed,lnode,vec_ap1,err) 1117 call ceedvectorcreate(ceed,lnode,vec_rhs,err) 1118 call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err) 1119 call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err) 1120 1121 offset=0 1122 call ceedvectorsetarray(vec_coords,ceed_mem_host, 1123 $ ceed_use_pointer,coords,offset,err) 1124 1125C Create ceed qfunctions for diffsetupf and diffusionf 1126 call ceedqfunctioncreateinterior(ceed,1,diffsetupf, 1127 $ EXAMPLE_DIR 1128 $ //'bps/bps.h:diffsetupf'//char(0),qf_setup,err) 1129 call ceedqfunctionaddinput(qf_setup,'x',ncompx, 1130 $ ceed_eval_interp,err) 1131 call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim, 1132 $ ceed_eval_grad,err) 1133 call ceedqfunctionaddinput(qf_setup,'weight',ncompu, 1134 $ ceed_eval_weight,err) 1135 call ceedqfunctionaddoutput(qf_setup,'qdata',ngeo, 1136 $ ceed_eval_none,err) 1137 call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu, 1138 $ ceed_eval_interp,err) 1139 1140 call ceedqfunctioncreateinterior(ceed,1,diffusionf, 1141 $ EXAMPLE_DIR 1142 $ //'bps/bps.h:diffusionf'//char(0),qf_diffusion,err) 1143 call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim, 1144 $ ceed_eval_grad,err) 1145 call ceedqfunctionaddinput(qf_diffusion,'qdata',ngeo, 1146 $ ceed_eval_none,err) 1147 call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim, 1148 $ ceed_eval_grad,err) 1149 1150C Create ceed operators 1151 call ceedoperatorcreate(ceed,qf_setup, 1152 $ ceed_qfunction_none,ceed_qfunction_none,op_setup,err) 1153 call ceedoperatorsetfield(op_setup,'x',erstrctx, 1154 $ basisx,ceed_vector_active,err) 1155 call ceedoperatorsetfield(op_setup,'dx',erstrctx, 1156 $ basisx,ceed_vector_active,err) 1157 call ceedoperatorsetfield(op_setup,'weight', 1158 $ ceed_elemrestriction_none,basisx,ceed_vector_none,err) 1159 call ceedoperatorsetfield(op_setup,'qdata',erstrctw, 1160 $ CEED_BASIS_NONE,ceed_vector_active,err) 1161 call ceedoperatorsetfield(op_setup,'rhs',erstrctu, 1162 $ basisu,vec_rhs,err) 1163 1164 call ceedoperatorcreate(ceed,qf_diffusion, 1165 $ ceed_qfunction_none,ceed_qfunction_none,op_diffusion,err) 1166 call ceedoperatorsetfield(op_diffusion,'u',erstrctu, 1167 $ basisu,ceed_vector_active,err) 1168 call ceedoperatorsetfield(op_diffusion,'qdata',erstrctw, 1169 $ CEED_BASIS_NONE,vec_qdata,err) 1170 call ceedoperatorsetfield(op_diffusion,'v',erstrctu, 1171 $ basisu,ceed_vector_active,err) 1172 1173C Compute setup data 1174 call ceedvectorsetarray(vec_rhs,ceed_mem_host, 1175 $ ceed_use_pointer,r1,offset,err) 1176 call ceedoperatorapply(op_setup,vec_coords,vec_qdata, 1177 $ ceed_request_immediate,err) 1178 1179C Set up true RHS 1180 call dssum (r1,nx1,ny1,nz1) ! r1 1181 call xmask1 (r1,h2,nelt) 1182 1183C Set up algebraic RHS with libCEED 1184 call ceedvectorsetarray(vec_p1,ceed_mem_host, 1185 $ ceed_use_pointer,h1,offset,err) 1186 call ceedvectorsetarray(vec_ap1,ceed_mem_host, 1187 $ ceed_use_pointer,r2,offset,err) 1188 call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1, 1189 $ ceed_request_immediate,err) ! r2 = A_ceed*h1 1190 call dssum (r2,nx1,ny1,nz1) 1191 call xmask1 (r2,h2,nelt) 1192 1193C Set up algebraic RHS with Nek5000 1194 call axhm1 (pap,r3,h1,h1,h2,'bp3') ! r3 = A_nek5k*h1 1195 call dssum (r3,nx1,ny1,nz1) 1196 call xmask1 (r3,h2,nelt) 1197 1198 call nekgsync() 1199 1200C Solve true RHS 1201 if (nid.eq.0) write (6,*) "libCEED true RHS" 1202 tstart = dnekclock() 1203 call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1204 $ vec_p1,vec_ap1,maxit,'bp3') 1205 tstop = dnekclock() 1206 1207C Output 1208 telaps = (tstop-tstart) 1209 maxits = maxit 1210 er1 = glrdif(u1,e1,n) 1211 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1212 1213 if (test.eq.1.and.nid.eq.0) then 1214 if (maxit>=100) then 1215 write(6,*) "UNCONVERGED CG" 1216 endif 1217 if (dabs(er1)>1e-3) then 1218 write(6,*) "ERROR IS TOO LARGE" 1219 endif 1220 endif 1221 1222 nx = nx1-1 1223 nnode = nelgt ! nnodes 1224 nnode = nnode*(nx**ldim) ! nnodes 1225 nppp = nnode/np ! nnodes/proc 1226 1227 dofps = nnode/telaps ! DOF/sec - scalar form 1228 titers = telaps/maxits ! time per iteration 1229 tppp_s = titers/nppp ! time per iteraton per local point 1230 1231 if (nid.eq.0) write(6,1) 'case scalar:' 1232 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1233 1234C Solve libCEED algebraic RHS 1235 if (nid.eq.0) write (6,*) "libCEED algebraic RHS" 1236 maxit = 100 1237 tstart = dnekclock() 1238 call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1239 $ vec_p1,vec_ap1,maxit,'bp3') 1240 tstop = dnekclock() 1241 1242C Output 1243 telaps = (tstop-tstart) 1244 maxits = maxit 1245 er1 = glrdif(u1,e1,n) 1246 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1247 1248 if (test.eq.1.and.nid.eq.0) then 1249 if (maxit>=100) then 1250 write(6,*) "UNCONVERGED CG" 1251 endif 1252 if (dabs(er1)>1e-9) then 1253 write(6,*) "ERROR IS TOO LARGE" 1254 endif 1255 endif 1256 1257 nx = nx1-1 1258 nnode = nelgt ! nnodes 1259 nnode = nnode*(nx**ldim) ! nnodes 1260 nppp = nnode/np ! nnodes/proc 1261 1262 dofps = nnode/telaps ! DOF/sec - scalar form 1263 titers = telaps/maxits ! time per iteration 1264 tppp_s = titers/nppp ! time per iteraton per local point 1265 1266 if (nid.eq.0) write(6,1) 'case scalar:' 1267 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1268 1269C Solve Nek5000 algebraic RHS 1270 if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS" 1271 maxit = 100 1272 tstart = dnekclock() 1273 call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion, 1274 $ vec_p1,vec_ap1,maxit,'bp3') 1275 tstop = dnekclock() 1276 1277C Output 1278 telaps = (tstop-tstart) 1279 maxits = maxit 1280 er1 = glrdif(u1,e1,n) 1281 if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit 1282 1283 if (test.eq.1.and.nid.eq.0) then 1284 if (maxit>=100) then 1285 write(6,*) "UNCONVERGED CG" 1286 endif 1287 if (dabs(er1)>1e-9) then 1288 write(6,*) "ERROR IS TOO LARGE" 1289 endif 1290 endif 1291 1292 nx = nx1-1 1293 nnode = nelgt ! nnodes 1294 nnode = nnode*(nx**ldim) ! nnodes 1295 nppp = nnode/np ! nnodes/proc 1296 1297 dofps = nnode/telaps ! DOF/sec - scalar form 1298 titers = telaps/maxits ! time per iteration 1299 tppp_s = titers/nppp ! time per iteraton per local point 1300 1301 if (nid.eq.0) write(6,1) 'case scalar:' 1302 $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s 1303 1304 1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5) 1305 3 format(i3,i9,e12.4,1x,a8,i9) 1306 1307C Destroy ceed handles 1308 call ceedvectordestroy(vec_p1,err) 1309 call ceedvectordestroy(vec_ap1,err) 1310 call ceedvectordestroy(vec_rhs,err) 1311 call ceedvectordestroy(vec_qdata,err) 1312 call ceedvectordestroy(vec_coords,err) 1313 call ceedelemrestrictiondestroy(erstrctu,err) 1314 call ceedelemrestrictiondestroy(erstrctx,err) 1315 call ceedelemrestrictiondestroy(erstrctw,err) 1316 call ceedbasisdestroy(basisu,err) 1317 call ceedbasisdestroy(basisx,err) 1318 call ceedqfunctiondestroy(qf_setup,err) 1319 call ceedqfunctiondestroy(qf_diffusion,err) 1320 call ceedoperatordestroy(op_setup,err) 1321 call ceedoperatordestroy(op_diffusion,err) 1322 call ceeddestroy(ceed,err) 1323 1324 return 1325 end 1326C----------------------------------------------------------------------- 1327 subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1, 1328 $ vec_ap1,maxit,bpname) 1329C Scalar conjugate gradient iteration for solution of uncoupled 1330C Helmholtz equations 1331C Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname 1332C Output: u1,maxit 1333 include 'SIZE' 1334 include 'TOTAL' 1335 include 'DOMAIN' 1336 include 'FDMH1' 1337 character*3 bpname 1338 1339C INPUT: rhs1 - rhs 1340C h1 - exact solution 1341 1342 parameter (lt=lx1*ly1*lz1*lelt) 1343 parameter (ld=lxd*lyd*lzd*lelt) 1344 real*8 u1(lt),r1(lt),h1(lt),h2(lt) 1345 real*8 rmult(1),binv(1) 1346 integer ceed,ceed_op,vec_ap1,vec_p1 1347 common /scrcg/ dpc(lt),p1(lt),z1(lt) 1348 common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4) 1349 1350 real*8 ap1(lt) 1351 equivalence (ap1,z1) 1352 1353 vol = volfld(ifield) 1354 nel = nelfld(ifield) 1355 nxyz = lx1*ly1*lz1 1356 n = nxyz*nel 1357 nx = nx1-1 ! Polynomial order (just for i/o) 1358 1359 tol=tin 1360 1361 if(bpname.ne.'bp1') then 1362 call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner 1363 else 1364 call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner 1365 endif 1366 1367 call rzero (u1,n) ! Initialize solution 1368 1369 wv(1)=0 1370 do i=1,n 1371 s=rmult(i) ! -1 1372 p1(i)=dpc(i)*r1(i) ! p = M r T 1373 wv(1)=wv(1)+s*p1(i)*r1(i) ! r p 1374 enddo 1375 call gop(wv(1),wk,'+ ',1) 1376 rpp1(1) = wv (1) 1377 1378 do 1000 iter=1,maxit 1379 call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op, 1380 $ vec_ap1,vec_p1) 1381 call dssum (ap1,nx1,ny1,nz1) 1382 if (bpname.ne.'bp1') call xmask1(ap1,h2,nel) 1383 1384 call gop (pap,wk,'+ ',1) 1385 alph(1) = rpp1(1)/pap(1) 1386 1387 do i=1,n 1388 u1(i)=u1(i)+alph(1)* p1(i) 1389 r1(i)=r1(i)-alph(1)*ap1(i) 1390 enddo 1391 1392C tolerance check here 1393 call rzero(wv,2) 1394 do i=1,n 1395 wv(1)=wv(1)+r1(i)*r1(i) ! L2 error estimate 1396 z1(i)=dpc(i)*r1(i) ! z = M r 1397 wv(2)=wv(2)+rmult(i)*z1(i)*r1(i) ! r z 1398 enddo 1399 call gop(wv,wk,'+ ',2) 1400 1401C if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1) 1402 1 format(i2,i9,i5,i4,1p1e12.4,' cggos') 1403 1404 enorm=sqrt(wv(1)) 1405 if (enorm.lt.tol) then 1406 ifin = iter 1407 if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol 1408 goto 9999 1409 endif 1410C if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha' 1411 2 format(i5,1p3e12.4,2x,a5) 1412 1413 rpp2(1)=rpp1(1) 1414 rpp1(1)=wv (2) 1415 beta1 =rpp1(1)/rpp2(1) 1416 do i=1,n 1417 p1(i)=z1(i) + beta1*p1(i) 1418 enddo 1419 1420 1000 continue 1421 1422 rbnorm=sqrt(wv(1)) 1423 if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol 1424 iter = iter-1 1425 1426 9999 continue 1427 1428 maxit=iter 1429 1430 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4) 1431 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6) 1432 1433 return 1434 end 1435C----------------------------------------------------------------------- 1436 subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op, 1437 $ vec_ap1,vec_p1) 1438C Vector conjugate gradient matvec for solution of uncoupled 1439C Helmholtz equations 1440C Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1 1441C Output: ap1 1442 include 'SIZE' 1443 include 'TOTAL' 1444 include 'ceed/fortran.h' 1445 1446 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1447 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1448 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1449 1450 real*8 pap(3) 1451 real*8 ap1(lx,lelt) 1452 real*8 p1(lx,lelt) 1453 real*8 h1(lx,lelt),h2(lx,lelt) 1454 integer ceed,ceed_op,vec_ap1,vec_p1,err 1455 integer i,e 1456 integer*8 offset 1457 1458 offset=0 1459 call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer, 1460 $ p1,offset,err) 1461 call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer, 1462 $ ap1,offset,err) 1463 1464 call ceedoperatorapply(ceed_op,vec_p1,vec_ap1, 1465 $ ceed_request_immediate,err) 1466 1467 call ceedvectortakearray(vec_p1,ceed_mem_host,0,offset,err) 1468 call ceedvectortakearray(vec_ap1,ceed_mem_host,0,offset,err) 1469 1470 pap(1)=0. 1471 1472 do e=1,nelt 1473 do i=1,lx 1474 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1475 enddo 1476 enddo 1477 1478 return 1479 end 1480C----------------------------------------------------------------------- 1481 subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut) 1482C Local matrix-vector for solution of BP3 (stiffness matrix) 1483C Input: u,g,h1,h2,b,ju,us,ut Output: w 1484 include 'SIZE' 1485 include 'TOTAL' 1486 1487 parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1488 real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz) 1489 real*8 ju(lxyz),us(lxyz),ut(lxyz) 1490 1491 nxq = nx1+1 ! Number of quadrature points 1492 1493 lxyzq = nxq**ldim 1494 1495 call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation 1496 do i=1,lxyzq 1497 ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts 1498 enddo 1499 call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u 1500 1501 return 1502 end 1503C----------------------------------------------------------------------- 1504 subroutine axhm1_bp1(pap,ap1,p1,h1,h2) 1505C Vector conjugate gradient matvec for solution of BP1 (mass matrix) 1506C Input: pap,p1,h1,h2 Output: ap1 1507 include 'SIZE' 1508 include 'TOTAL' 1509 1510 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2)) 1511 real*8 gf(lg,lx,lelt) ! Equivalence new gf() data 1512 equivalence (gf,g1m1) ! layout to g1m1...g6m1 1513 1514 real*8 pap(3) 1515 real*8 ap1(lx,lelt) 1516 real*8 p1(lx,lelt) 1517 real*8 h1(lx,lelt),h2(lx,lelt) 1518 1519 real*8 ur(lx),us(lx),ut(lx) 1520 common /ctmp1/ ur,us,ut 1521 1522 integer e 1523 1524 pap(1)=0. 1525 1526 k=1 1527 nxq = nx1+1 1528 1529 do e=1,nelt 1530 1531 call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1) 1532 $ ,bm1(1,1,1,e),ur,us,ut) 1533 do i=1,lx 1534 pap(1)=pap(1)+ap1(i,e)*p1(i,e) 1535 enddo 1536 k=k+nxq*nxq*nxq 1537 1538 enddo 1539 1540 return 1541 end 1542C----------------------------------------------------------------------- 1543 subroutine ax_e_bp3(w,u,g,ur,us,ut,wk) 1544C Local matrix-vector for solution of BP3 (stiffness matrix) 1545C Input: u,g,ur,us,ut,wk Output: w 1546 include 'SIZE' 1547 include 'TOTAL' 1548 1549 parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq) 1550 1551 common /ctmp0/ tmp(lxyzq) 1552 common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq) 1553 1554 real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq) 1555 real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq) 1556 1557 n = lzq-1 1558 1559 call intp_rstd (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation 1560 call loc_grad3 (ur,us,ut,wk,n,dxmq,dxtmq) 1561 1562 do i=1,lxyzq 1563 wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i) 1564 ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i) 1565 wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i) 1566 ur(i) = wr 1567 us(i) = ws 1568 ut(i) = wt 1569 enddo 1570 1571 call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp) 1572 call intp_rstd (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u 1573 1574 return 1575 end 1576C----------------------------------------------------------------------- 1577 subroutine axhm1_bp3(pap,ap1,p1,h1,h2) 1578C Vector conjugate gradient matvec for solution of BP3 (stiffness matrix) 1579C Input: pap,p1,h1,h2 Output: ap1 1580 include 'SIZE' 1581 include 'TOTAL' 1582 1583 parameter (lzq=lx1+1) 1584 parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim) 1585 common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq) 1586 1587 real*8 pap(3) 1588 real*8 ap1(lx,lelt) 1589 real*8 p1(lx,lelt) 1590 real*8 h1(lx,lelt),h2(lx,lelt) 1591 1592 common /ctmp1/ ur,us,ut,wk 1593 real*8 ur(lq),us(lq),ut(lq),wk(lq) 1594 1595 integer e 1596 1597 pap(1)=0. 1598 1599 do e=1,nelt 1600 1601 call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk) 1602 do i=1,lx 1603 pap(1)=pap(1)+p1(i,e)*ap1(i,e) 1604 enddo 1605 1606 enddo 1607 1608 return 1609 end 1610C----------------------------------------------------------------------- 1611 subroutine axhm1(pap,ap1,p1,h1,h2,bpname) 1612C Vector conjugate gradient matvec for solution of uncoupled 1613C Helmholtz equations 1614C Input: pap,p1,h1,h2,bpname Output: ap1 1615 include 'SIZE' 1616 include 'TOTAL' 1617 1618 parameter (lx=lx1*ly1*lz1) 1619 1620 real*8 pap(3),ap1(lx,lelt),p1(lx,lelt) 1621 real*8 h1(lx,lelt),h2(lx,lelt) 1622 1623 character*3 bpname 1624 1625 if (bpname.eq.'bp1') then 1626 call axhm1_bp1(pap,ap1,p1,h1,h2) 1627 1628 elseif (bpname.eq.'bp3') then 1629 call axhm1_bp3(pap,ap1,p1,h1,h2) 1630 1631 else 1632 write(6,*) bpname,' axhm1 bpname error' 1633 stop 1634 1635 endif 1636 1637 return 1638 end 1639C----------------------------------------------------------------------- 1640 subroutine get_bp(bp) 1641C Get BP to run 1642C Input: Output: bp 1643 integer i,bp 1644 character*64 bpval 1645 1646 bp=0 1647 if(iargc().ge.1) then 1648 call getarg(1,bpval) 1649 endif 1650 if(bpval.eq."bp1") then 1651 bp=1 1652 elseif(bpval.eq."bp3") then 1653 bp=3 1654 endif 1655 1656 return 1657 end 1658C----------------------------------------------------------------------- 1659 subroutine get_spec(spec) 1660C Get CEED backend specification 1661C Input: Output: spec 1662 integer i 1663 character*64 spec 1664 1665 spec = '/cpu/self' 1666 if(iargc().ge.2) then 1667 call getarg(2,spec) 1668 endif 1669 1670 return 1671 end 1672C----------------------------------------------------------------------- 1673 subroutine get_test(test) 1674C Get test mode flag 1675C Input: Output: test 1676 integer i,test 1677 character*64 testval 1678 1679 test=0 1680 if(iargc().ge.3) then 1681 call getarg(3,testval) 1682 endif 1683 if(testval.eq."test") then 1684 test=1 1685 endif 1686 1687 return 1688 end 1689C----------------------------------------------------------------------- 1690 1691