xref: /libCEED/examples/nek/bps/bps.usr (revision e1ef875599c3a6b02a7bf1f21ab905966273ff45)
1C Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2C the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3C reserved. See files LICENSE and NOTICE for details.
4C
5C This file is part of CEED, a collection of benchmarks, miniapps, software
6C libraries and APIs for efficient high-order finite element and spectral
7C element discretizations for exascale applications. For more information and
8C source code availability see http://github.com/ceed.
9C
10C The CEED research is supported by the Exascale Computing Project (17-SC-20-SC)
11C a collaborative effort of two U.S. Department of Energy organizations (Office
12C of Science and the National Nuclear Security Administration) responsible for
13C the planning and preparation of a capable exascale ecosystem, including
14C software, applications, hardware, advanced system engineering and early
15C testbed platforms, in support of the nation's exascale computing imperative.
16
17C> @file
18C> Mass and diffusion operators examples using Nek5000
19C_TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test
20C_TESTARGS -c {ceed_resource} -e bp3 -n 1 -b 4 -test
21
22C-----------------------------------------------------------------------
23      subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
24     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
25     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
26C     Set up mass operator
27C     Input: u1,u2,u3,q             Output: v1,v2,ierr
28      integer q,ierr
29      real*8 ctx(1)
30      real*8 u1(3*q)
31      real*8 u2(9*q)
32      real*8 u3(q)
33      real*8 v1(q)
34      real*8 v2(q)
35      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
36      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
37      real*8 jacmq
38
39C     Quadrature Point Loop
40      do i=1,q
41        a11=u2(i+q*0)
42        a12=u2(i+q*3)
43        a13=u2(i+q*6)
44
45        a21=u2(i+q*1)
46        a22=u2(i+q*4)
47        a23=u2(i+q*7)
48
49        a31=u2(i+q*2)
50        a32=u2(i+q*5)
51        a33=u2(i+q*8)
52
53        g11 = (a22*a33-a23*a32)
54        g12 = (a13*a32-a33*a12)
55        g13 = (a12*a23-a22*a13)
56
57        g21 = (a23*a31-a21*a33)
58        g22 = (a11*a33-a31*a13)
59        g23 = (a13*a21-a23*a11)
60
61        g31 = (a21*a32-a22*a31)
62        g32 = (a12*a31-a32*a11)
63        g33 = (a11*a22-a21*a12)
64
65        jacmq = a11*g11+a21*g12+a31*g13
66
67C       Rho
68        v1(i)=u3(i)*jacmq
69
70C       RHS
71        v2(i)=u3(i)*jacmq
72     $             *dsqrt(u1(i+q*0)*u1(i+q*0)
73     $                   +u1(i+q*1)*u1(i+q*1)
74     $                   +u1(i+q*2)*u1(i+q*2))
75      enddo
76
77      ierr=0
78      end
79C-----------------------------------------------------------------------
80      subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
81     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
82     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
83C     Apply mass operator
84C     Input: u1,u2,q                Output: v1,ierr
85      integer q,ierr
86      real*8 ctx(1)
87      real*8 u1(q)
88      real*8 u2(q)
89      real*8 v1(q)
90
91C     Quadrature Point Loop
92      do i=1,q
93        v1(i)=u2(i)*u1(i)
94      enddo
95
96      ierr=0
97      end
98C-----------------------------------------------------------------------
99      subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
100     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
101     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
102C     Set up diffusion operator
103C     Input: u1,u2,u3,q             Output: v1,v2,ierr
104      integer q,ierr
105      real*8 ctx(1)
106      real*8 u1(3*q)
107      real*8 u2(9*q)
108      real*8 u3(q)
109      real*8 v1(6*q)
110      real*8 v2(q)
111      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
112      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
113      real*8 jacmq,scl
114      real*8 c(3),k(3)
115
116C     Quadrature Point Loop
117      do i=1,q
118        pi = 3.14159265358979323846
119
120        c(1)=0.
121        c(2)=1.
122        c(3)=2.
123        k(1)=1.
124        k(2)=2.
125        k(3)=3.
126
127        a11=u2(i+q*0)
128        a12=u2(i+q*3)
129        a13=u2(i+q*6)
130
131        a21=u2(i+q*1)
132        a22=u2(i+q*4)
133        a23=u2(i+q*7)
134
135        a31=u2(i+q*2)
136        a32=u2(i+q*5)
137        a33=u2(i+q*8)
138
139        g11 = (a22*a33-a23*a32)
140        g12 = (a13*a32-a33*a12)
141        g13 = (a12*a23-a22*a13)
142
143        g21 = (a23*a31-a21*a33)
144        g22 = (a11*a33-a31*a13)
145        g23 = (a13*a21-a23*a11)
146
147        g31 = (a21*a32-a22*a31)
148        g32 = (a12*a31-a32*a11)
149        g33 = (a11*a22-a21*a12)
150
151        jacmq = a11*g11+a21*g12+a31*g13
152
153        scl = u3(i)/jacmq
154
155C       Geometric factors
156C       Stored in Voigt convention
157C       0 5 4
158C       5 1 3
159C       4 3 2
160        v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr
161        v1(i+1*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss
162        v1(i+2*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt
163        v1(i+3*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst
164        v1(i+4*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt
165        v1(i+5*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs
166
167C       RHS
168        v2(i) = u3(i)*jacmq*pi*pi
169     $            *dsin(pi*(c(1)+k(1)*u1(i+0*q)))
170     $            *dsin(pi*(c(2)+k(2)*u1(i+1*q)))
171     $            *dsin(pi*(c(3)+k(3)*u1(i+2*q)))
172     $            *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3))
173
174      enddo
175
176      ierr=0
177      end
178C-----------------------------------------------------------------------
179      subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
180     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
181     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
182C     Apply diffusion operator
183C     Input: u1,u2,q                Output: v1,ierr
184      integer q,ierr
185      real*8 ctx(1)
186      real*8 u1(3*q)
187      real*8 u2(6*q)
188      real*8 v1(3*q)
189
190C     Quadrature Point Loop
191      do i=1,q
192        v1(i+0*q)=
193     $     u2(i+0*q)*u1(i)+u2(i+5*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q)
194        v1(i+1*q)=
195     $     u2(i+5*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+3*q)*u1(i+2*q)
196        v1(i+2*q)=
197     $     u2(i+4*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q)
198      enddo
199
200      ierr=0
201      end
202C-----------------------------------------------------------------------
203      subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq)
204C     Set h2 as rhoJac
205C     Input: bmq,nxq                Output: h2
206      include 'SIZE'
207      real*8 h2(1),bmq(1)
208
209      common /ctmp77/ wd(lxd),zd(lxd)
210      integer e,i,L
211
212      call zwgl(zd,wd,nxq)  ! nxq = number of points
213
214      q = 1.0               ! Later, this can be a function of position...
215
216      L = 0
217      do e=1,lelt
218      do i=1,nxq**ldim
219         L=L+1
220         h2(L) = q*bmq(L)
221      enddo
222      enddo
223
224      return
225      end
226C-----------------------------------------------------------------------
227      subroutine dist_fld_h1(e)
228C     Set distance initial condition for BP1
229C     Input:                        Output: e
230      include 'SIZE'
231      include 'TOTAL'
232      real*8 x,y,z
233      real*8 e(1)
234
235      n=lx1*ly1*lz1*nelt
236
237      do i=1,n
238        x=xm1(i,1,1,1)
239        y=ym1(i,1,1,1)
240        z=zm1(i,1,1,1)
241
242        e(i) = dsqrt(x*x+y*y+z*z)
243      enddo
244
245      call dsavg(e)  ! This is requisite for random fields
246
247      return
248      end
249C-----------------------------------------------------------------------
250      subroutine sin_fld_h1(e)
251C     Set sine initial condition for BP3
252C     Input:                        Output: e
253      include 'SIZE'
254      include 'TOTAL'
255      real*8 x,y,z
256      real*8 e(1)
257      real*8 c(3),k(3)
258
259      n=lx1*ly1*lz1*nelt
260      pi = 3.14159265358979323846
261
262      c(1)=0.
263      c(2)=1.
264      c(3)=2.
265      k(1)=1.
266      k(2)=2.
267      k(3)=3.
268
269      do i=1,n
270        x=xm1(i,1,1,1)
271        y=ym1(i,1,1,1)
272        z=zm1(i,1,1,1)
273
274        e(i) = dsin(pi*(c(1)+k(1)*x))
275     &        *dsin(pi*(c(2)+k(2)*y))
276     &        *dsin(pi*(c(3)+k(3)*z))
277
278      enddo
279
280      call dsavg(e)  ! This is requisite for random fields
281
282      return
283      end
284C-----------------------------------------------------------------------
285      subroutine uservp(ix,iy,iz,eg) ! set variable properties
286      include 'SIZE'
287      include 'TOTAL'
288      include 'NEKUSE'
289      integer e,f,eg
290C     e = gllel(eg)
291
292      udiff  = 0.0
293      utrans = 0.0
294
295      return
296      end
297C-----------------------------------------------------------------------
298      subroutine userf(ix,iy,iz,eg) ! set acceleration term
299C
300C     Note: this is an acceleration term, NOT a force!
301C     Thus, ffx will subsequently be multiplied by rho(x,t).
302C
303      include 'SIZE'
304      include 'TOTAL'
305      include 'NEKUSE'
306      integer e,f,eg
307C     e = gllel(eg)
308
309      ffx = 0.0
310      ffy = 0.0
311      ffz = 0.0
312
313      return
314      end
315C-----------------------------------------------------------------------
316      subroutine userq(i,j,k,eg) ! set source term
317      include 'SIZE'
318      include 'TOTAL'
319      include 'NEKUSE'
320      integer e,f,eg
321      e = gllel(eg)
322
323      qvol   = 0
324
325      return
326      end
327C-----------------------------------------------------------------------
328      subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions
329C     NOTE ::: This subroutine MAY NOT be called by every process
330      include 'SIZE'
331      include 'TOTAL'
332      include 'NEKUSE'
333      integer e,f,eg
334
335      ux   = 0.0
336      uy   = 0.0
337      uz   = 0.0
338      temp = 0.0
339
340      return
341      end
342C-----------------------------------------------------------------------
343      subroutine useric(ix,iy,iz,eg) ! set up initial conditions
344      include 'SIZE'
345      include 'TOTAL'
346      include 'NEKUSE'
347      integer e,f,eg
348
349      ux   = 0.0
350      uy   = 0.0
351      uz   = 0.0
352      temp = 0.0
353
354      return
355      end
356C-----------------------------------------------------------------------
357      subroutine usrdat   ! This routine to modify element vertices
358      include 'SIZE'
359      include 'TOTAL'
360
361      return
362      end
363C-----------------------------------------------------------------------
364      subroutine usrdat2  ! This routine to modify mesh coordinates
365      include 'SIZE'
366      include 'TOTAL'
367
368      x0 = 0
369      x1 = 1
370      call rescale_x(xm1,x0,x1)
371      call rescale_x(ym1,x0,x1)
372      call rescale_x(zm1,x0,x1)
373
374      param(59)=1  ! Force Nek to use the "deformed element" formulation
375
376      return
377      end
378C-----------------------------------------------------------------------
379      subroutine usrdat3
380      include 'SIZE'
381      include 'TOTAL'
382
383      return
384      end
385C-----------------------------------------------------------------------
386      subroutine xmask1   (r1,h2,nel)
387C     Apply zero Dirichlet boundary conditions
388C     Input: h2,nel                 Output: r1
389      include 'SIZE'
390      include 'TOTAL'
391      real*8 r1(1),h2(1)
392
393      n=nx1*ny1*nz1*nel
394      do i=1,n
395         r1(i)=r1(i)*h2(i)
396      enddo
397
398      return
399      end
400C-----------------------------------------------------------------------
401      function glrdif(x,y,n)
402C     Compute Linfty norm of (x-y)
403C     Input: x,y                    Output: n
404      real*8 x(n),y(n)
405
406      dmx=0
407      xmx=0
408      ymx=0
409
410      do i=1,n
411         diff=abs(x(i)-y(i))
412         dmx =max(dmx,diff)
413         xmx =max(xmx,x(i))
414         ymx =max(ymx,y(i))
415      enddo
416
417      xmx = max(xmx,ymx)
418      dmx = glmax(dmx,1) ! max across processors
419      xmx = glmax(xmx,1)
420
421      if (xmx.gt.0) then
422         glrdif = dmx/xmx
423      else
424         glrdif = -dmx   ! Negative indicates something strange
425      endif
426
427      return
428      end
429C-----------------------------------------------------------------------
430      subroutine loc_grad3(ur,us,ut,u,N,D,Dt)
431C     3D transpose of local gradient
432C     Input: u,N,D,Dt               Output: ur,us,ut
433      real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
434      real*8 u (0:N,0:N,0:N)
435      real*8 D (0:N,0:N),Dt(0:N,0:N)
436
437      m1 = N+1
438      m2 = m1*m1
439
440      call mxm(D ,m1,u(0,0,0),m1,ur,m2)
441      do k=0,N
442         call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1)
443      enddo
444      call mxm(u(0,0,0),m2,Dt,m1,ut,m1)
445
446      return
447      end
448c-----------------------------------------------------------------------
449      subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w)
450C     3D transpose of local gradient
451C     Input: ur,us,ut,N,D,Dt        Output: u
452       real*8 u (0:N,0:N,0:N)
453       real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
454       real*8 D (0:N,0:N),Dt(0:N,0:N)
455       real*8 w (0:N,0:N,0:N)
456
457       m1 = N+1
458       m2 = m1*m1
459       m3 = m1*m1*m1
460
461       call mxm(Dt,m1,ur,m1,u(0,0,0),m2)
462       do k=0,N
463          call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1)
464       enddo
465       call add2(u(0,0,0),w,m3)
466       call mxm(ut,m2,D ,m1,w,m1)
467       call add2(u(0,0,0),w,m3)
468
469      return
470      end
471C-----------------------------------------------------------------------
472      subroutine geodatq(gf,bmq,w3mq,nxq)
473C     Routine to generate elemental geometric matrices on mesh 1
474C     (Gauss-Legendre Lobatto mesh).
475      include 'SIZE'
476      include 'TOTAL'
477
478      parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim)
479
480      real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq)
481
482      common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd)
483      common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd)
484     $ ,             zr(lxyd),zs(lxyd),zt(lxyd)
485
486      common /ctmp77/ wd(lxd),zd(lxd)
487      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
488
489      integer e
490      real*8 tmp(lxyd)
491      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
492      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
493      real*8 jacmq
494
495      if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq)
496
497      call zwgl    (zd,wd,lzq)                            ! nxq = number of points
498      call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp)
499
500      do k=1,nxq
501      do j=1,nxq
502      do i=1,nxq
503         w3mq(i,j,k) = wd(i)*wd(j)*wd(k)
504      enddo
505      enddo
506      enddo
507
508      nxyzq = nxq**ldim
509      nxqm1 = lzq-1
510
511      do e=1,nelt
512         call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation
513         call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq)
514
515         call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0)
516         call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq)
517
518         call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0)
519         call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq)
520
521         do i=1,nxyzq
522            a11 = xr(i)
523            a12 = xs(i)
524            a13 = xt(i)
525
526            a21 = yr(i)
527            a22 = ys(i)
528            a23 = yt(i)
529
530            a31 = zr(i)
531            a32 = zs(i)
532            a33 = zt(i)
533
534            g11 = (a22*a33-a23*a32)
535            g12 = (a13*a32-a33*a12)
536            g13 = (a12*a23-a22*a13)
537
538            g21 = (a23*a31-a21*a33)
539            g22 = (a11*a33-a31*a13)
540            g23 = (a13*a21-a23*a11)
541
542            g31 = (a21*a32-a22*a31)
543            g32 = (a12*a31-a32*a11)
544            g33 = (a11*a22-a21*a12)
545
546            jacmq = a11*g11+a21*g12+a31*g13
547
548            bmq(i,e)  = w3mq(i,1,1)*jacmq
549            scale     = w3mq(i,1,1)/jacmq
550
551            gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr
552            gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs
553            gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt
554            gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss
555            gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst
556            gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt
557
558         enddo
559      enddo
560
561      return
562      end
563C-----------------------------------------------------------------------
564      subroutine setprecn_bp1 (d,h1,h2)
565C     Generate diagonal preconditioner for Helmholtz operator
566C     Input: h1,h2                  Output: d
567      include 'SIZE'
568      include 'TOTAL'
569
570      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
571
572      real*8    d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt)
573      integer e
574
575      real*8         gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data
576      equivalence (gf,g1m1)                  ! layout to g1m1...g6m1
577
578      real*8 ysm1(ly1)
579
580      nel   = nelfld(ifield)
581      n     = nel*lx1*ly1*lz1
582      nxyz  = lx1*ly1*lz1
583
584      call copy    (d,bm1,n)   ! Mass matrix preconditioning full mass matrix
585      call dssum   (d,nx1,ny1,nz1)
586      call invcol1 (d,n)
587      return
588
589      call dsset(lx1,ly1,lz1)
590
591      do 1000 e=1,nel
592
593        call rzero(d(1,1,1,e),nxyz)
594
595        do 320 iz=1,lz1
596         do 320 iy=1,ly1
597         do 320 ix=1,lx1
598         do 320 iq=1,lx1
599           d(ix,iy,iz,e) = d(ix,iy,iz,e)
600     $                   + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2
601     $                   + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2
602     $                   + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2
603  320    continue
604C
605C        Add cross terms if element is deformed.
606C
607         if (lxyz.gt.0) then
608
609           do i2=1,ly1,ly1-1
610           do i1=1,lx1,lx1-1
611              d(1,i1,i2,e) = d(1,i1,i2,e)
612     $            + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1)
613     $            + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2)
614              d(lx1,i1,i2,e) = d(lx1,i1,i2,e)
615     $            + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1)
616     $            + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2)
617              d(i1,1,i2,e) = d(i1,1,i2,e)
618     $            + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1)
619     $            + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2)
620              d(i1,ly1,i2,e) = d(i1,ly1,i2,e)
621     $            + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1)
622     $            + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2)
623              d(i1,i2,1,e) = d(i1,i2,1,e)
624     $            + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1)
625     $            + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2)
626              d(i1,i2,lz1,e) = d(i1,i2,lz1,e)
627     $            + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1)
628     $            + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2)
629
630           enddo
631           enddo
632         endif
633
634        do i=1,lxyz
635           d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e)
636        enddo
637
638 1000 continue ! element loop
639
640C     If axisymmetric, add a diagonal term in the radial direction (ISD=2)
641
642      if (ifaxis.and.(isd.eq.2)) then
643         do 1200 e=1,nel
644            if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1)
645            k=0
646            do 1190 j=1,ly1
647            do 1190 i=1,lx1
648               k=k+1
649               if (ym1(i,j,1,e).ne.0.) then
650                  term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2
651                  if (ifrzer(e)) then
652                     term2 =  wxm1(i)*wam1(1)*dam1(1,j)
653     $                       *jacm1(i,1,1,e)/ysm1(i)
654                  else
655                     term2 = 0.
656                  endif
657                  d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2)
658               endif
659 1190       continue
660 1200    continue
661
662      endif
663      call dssum   (d,nx1,ny1,nz1)
664      call invcol1 (d,n)
665
666      if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1)
667   1  format(i9,1p4e12.4,' diag prec')
668
669      return
670      end
671C-----------------------------------------------------------------------
672      subroutine setprecn_bp3 (d,h1,h2)
673C     Generate dummy diagonal preconditioner for Helmholtz operator
674C     Input: h1,h2                  Output: d
675      include 'SIZE'
676      include 'TOTAL'
677
678      parameter (n=lx1*ly1*lz1*lelt)
679      real*8 d(n),h1(n),h2(n)
680
681      call rone (d,n)
682
683      return
684      end
685C-----------------------------------------------------------------------
686      subroutine userchk
687      include 'SIZE'
688      include 'TOTAL'
689
690      integer bp
691
692      call get_bp(bp)
693
694      if (bp==1) then
695        if (istep.gt.0) call bp1
696      elseif (bp==3) then
697        if (istep.gt.0) call bp3
698      else
699        write(6,*) "INVALID BP SPECIFICED"
700      endif
701
702      return
703      end
704C-----------------------------------------------------------------------
705      subroutine bp1
706C     Solution to BP1 using libCEED
707      include 'SIZE'
708      include 'TOTAL'
709      include 'CTIMER'  ! ifsync
710      include 'FDMH1'
711      include 'ceedf.h'
712
713      parameter (lzq=lx1+1)
714      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
715      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
716
717      parameter (lt=lx1*ly1*lz1*lelt)
718      parameter (ld=lxd*lyd*lzd*lelt)
719      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
720      common /vcrny/ e1(lt)
721      common /vcrvh/ h1(lt),h2(lx*lelt),pap(3)
722      real*8 coords(ldim*lx*lelt)
723
724      logical ifh3
725      integer*8 nnode
726      integer ceed,err,test
727      character*64 spec
728
729      integer p,q,ncompx,ncompu,enode,lnode
730      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
731      integer stridesu(3),stridesx(3),stridesw(3)
732      integer erstrctu,erstrctx,erstrctw
733      integer basisu,basisx
734      integer qf_mass,qf_setup
735      integer op_mass,op_setup
736      real*8  x,y,z
737      integer*8 offset
738
739      external massf,masssetupf
740
741      ifield = 1
742      nxq    = nx1+1
743      n = nx1*ny1*nz1*nelt
744
745      ifsync = .false.
746
747C     Set up coordinates
748      ii=0
749      do j=0,nelt-1
750      do i=1,lx
751        ii=ii+1
752        x = xm1(ii,1,1,1)
753        y = ym1(ii,1,1,1)
754        z = zm1(ii,1,1,1)
755        coords(i+0*lx+3*j*lx)=x
756        coords(i+1*lx+3*j*lx)=y
757        coords(i+2*lx+3*j*lx)=z
758      enddo
759      enddo
760
761C     Init ceed library
762      call get_spec(spec)
763      call ceedinit(trim(spec)//char(0),ceed,err)
764
765      call get_test(test)
766
767C     Set up Nek geometry data
768      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
769      call set_h2_as_rhoJac_GL(h2,bmq,nxq)
770
771C     Set up true soln
772      call dist_fld_h1   (e1)
773      call copy          (h1,e1,n)                 ! Save exact soln in h1
774
775C     Set up solver parameters
776      tol       = 1e-10
777      param(22) = tol
778      maxit     = 100
779
780      call nekgsync()
781
782C     Create ceed basis for mesh and computation
783      p=nx1
784      q=p+1
785      ncompu=1
786      ncompx=ldim
787      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q,
788     $  ceed_gauss,basisx,err)
789      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q,
790     $  ceed_gauss,basisu,err)
791
792C     Create ceed element restrictions for mesh and computation
793      enode=p**ldim
794      lnode=enode*nelt*ncompu
795      stridesx=[1,enode,enode*ldim]
796      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim,
797     $  ldim*lnode,stridesx,erstrctx,err)
798      stridesu=[1,enode,enode*ncompu]
799      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu,
800     $  ncompu*lnode,stridesu,erstrctu,err)
801      call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim,
802     $  1,nelt*q**ldim,ceed_strides_backend,erstrctw,err)
803
804C     Create ceed vectors
805      call ceedvectorcreate(ceed,lnode,vec_p1,err)
806      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
807      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
808      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
809      call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err)
810
811      offset=0
812      call ceedvectorsetarray(vec_coords,ceed_mem_host,
813     $  ceed_use_pointer,coords,offset,err)
814
815C     Create ceed qfunctions for masssetupf and massf
816      call ceedqfunctioncreateinterior(ceed,1,masssetupf,
817     $  EXAMPLE_DIR
818     $  //'bps/bps.h:masssetupf',qf_setup,err)
819      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
820     $  ceed_eval_interp,err)
821      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
822     $  ceed_eval_grad,err)
823      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
824     $  ceed_eval_weight,err)
825      call ceedqfunctionaddoutput(qf_setup,'qdata',ncompu,
826     $  ceed_eval_none,err)
827      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
828     $  ceed_eval_interp,err)
829
830      call ceedqfunctioncreateinterior(ceed,1,massf,
831     $  EXAMPLE_DIR
832     $  //'bps/bps.h:massf',qf_mass,err)
833      call ceedqfunctionaddinput(qf_mass,'u',ncompu,
834     $  ceed_eval_interp,err)
835      call ceedqfunctionaddinput(qf_mass,'qdata',ncompu,
836     $  ceed_eval_none,err)
837      call ceedqfunctionaddoutput(qf_mass,'v',ncompu,
838     $  ceed_eval_interp,err)
839
840C     Create ceed operators
841      call ceedoperatorcreate(ceed,qf_setup,
842     $  ceed_qfunction_none,ceed_qfunction_none,op_setup,err)
843      call ceedoperatorsetfield(op_setup,'x',erstrctx,
844     $  basisx,ceed_vector_active,err)
845      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
846     $  basisx,ceed_vector_active,err)
847      call ceedoperatorsetfield(op_setup,'weight',
848     $  ceed_elemrestriction_none,basisx,ceed_vector_none,err)
849      call ceedoperatorsetfield(op_setup,'qdata',erstrctw,
850     $  ceed_basis_collocated,ceed_vector_active,err)
851      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
852     $  basisu,vec_rhs,err)
853
854      call ceedoperatorcreate(ceed,qf_mass,
855     $  ceed_qfunction_none,ceed_qfunction_none,op_mass,err)
856      call ceedoperatorsetfield(op_mass,'u',erstrctu,
857     $  basisu,ceed_vector_active,err)
858      call ceedoperatorsetfield(op_mass,'qdata',erstrctw,
859     $  ceed_basis_collocated,vec_qdata,err)
860      call ceedoperatorsetfield(op_mass,'v',erstrctu,
861     $  basisu,ceed_vector_active,err)
862
863C     Compute setup data
864      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
865     $  ceed_use_pointer,r1,offset,err)
866      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
867     $  ceed_request_immediate,err)
868
869C     Set up true RHS
870      call dssum         (r1,nx1,ny1,nz1)          ! r1
871
872C     Set up algebraic RHS with libCEED
873      call ceedvectorsetarray(vec_p1,ceed_mem_host,
874     $  ceed_use_pointer,h1,offset,err)
875      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
876     $  ceed_use_pointer,r2,offset,err)
877      call ceedoperatorapply(op_mass,vec_p1,vec_ap1,
878     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
879      call dssum         (r2,nx1,ny1,nz1)
880
881C     Set up algebraic RHS with Nek5000
882      call axhm1         (pap,r3,h1,h1,h2,'bp1')   ! r3 = A_nek5k*h1
883      call dssum         (r3,nx1,ny1,nz1)
884
885      call nekgsync()
886
887C     Solve true RHS
888      if (nid.eq.0) write (6,*) "libCEED true RHS"
889      tstart = dnekclock()
890      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass,
891     $  vec_p1,vec_ap1,maxit,'bp1')
892      tstop  = dnekclock()
893
894C     Output
895      telaps = (tstop-tstart)
896      maxits = maxit
897      er1 = glrdif(u1,e1,n)
898      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
899
900      if (test.eq.1.and.nid.eq.0) then
901        if (maxit>=100) then
902          write(6,*) "UNCONVERGED CG"
903        endif
904        if (dabs(er1)>5e-3) then
905          write(6,*) "ERROR IS TOO LARGE"
906        endif
907      endif
908
909      nx     = nx1-1
910      nnode  = nelgt            ! nnodes
911      nnode  = nnode*(nx**ldim) ! nnodes
912      nppp   = nnode/np         ! nnodes/proc
913
914      dofps  = nnode/telaps     ! DOF/sec - scalar form
915      titers = telaps/maxits    ! time per iteration
916      tppp_s = titers/nppp      ! time per iteraton per local point
917
918      if (nid.eq.0) write(6,1) 'case scalar:'
919     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
920
921C     Solve libCEED algebraic RHS
922      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
923      maxit = 100
924      tstart = dnekclock()
925      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass,
926     $  vec_p1,vec_ap1,maxit,'bp1')
927      tstop  = dnekclock()
928
929C     Output
930      telaps = (tstop-tstart)
931      maxits = maxit
932      er1 = glrdif(u1,e1,n)
933      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
934
935      if (test.eq.1.and.nid.eq.0) then
936        if (maxit>=100) then
937          write(6,*) "UNCONVERGED CG"
938        endif
939        if (dabs(er1)>1e-5) then
940          write(6,*) "ERROR IS TOO LARGE"
941        endif
942      endif
943
944      nx      = nx1-1
945      nnode   = nelgt            ! nnodes
946      nnode   = nnode*(nx**ldim) ! nnodes
947      nppp    = nnode/np         ! nnodes/proc
948
949      dofps   = nnode/telaps     ! DOF/sec - scalar form
950      titers  = telaps/maxits    ! time per iteration
951      tppp_s  = titers/nppp      ! time per iteraton per local point
952
953      if (nid.eq.0) write(6,1) 'case scalar:'
954     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
955
956C     Solve Nek5000 algebraic RHS
957      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
958      maxit = 100
959      tstart = dnekclock()
960      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass,
961     $  vec_p1,vec_ap1,maxit,'bp1')
962      tstop  = dnekclock()
963
964C     Output
965      telaps = (tstop-tstart)
966      maxits = maxit
967      er1 = glrdif(u1,e1,n)
968      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
969
970      if (test.eq.1.and.nid.eq.0) then
971        if (maxit>=100) then
972          write(6,*) "UNCONVERGED CG"
973        endif
974        if (dabs(er1)>1e-5) then
975          write(6,*) "ERROR IS TOO LARGE"
976        endif
977      endif
978
979      nx      = nx1-1
980      nnode   = nelgt            ! nnodes
981      nnode   = nnode*(nx**ldim) ! nnodes
982      nppp    = nnode/np         ! nnodes/proc
983
984      dofps   = nnode/telaps     ! DOF/sec - scalar form
985      titers  = telaps/maxits    ! time per iteration
986      tppp_s  = titers/nppp      ! time per iteraton per local point
987
988      if (nid.eq.0) write(6,1) 'case scalar:'
989     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
990
991    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
992    3 format(i3,i9,e12.4,1x,a8,i9)
993
994C     Destroy ceed handles
995      call ceedvectordestroy(vec_p1,err)
996      call ceedvectordestroy(vec_ap1,err)
997      call ceedvectordestroy(vec_rhs,err)
998      call ceedvectordestroy(vec_qdata,err)
999      call ceedvectordestroy(vec_coords,err)
1000      call ceedelemrestrictiondestroy(erstrctu,err)
1001      call ceedelemrestrictiondestroy(erstrctx,err)
1002      call ceedelemrestrictiondestroy(erstrctw,err)
1003      call ceedbasisdestroy(basisu,err)
1004      call ceedbasisdestroy(basisx,err)
1005      call ceedqfunctiondestroy(qf_setup,err)
1006      call ceedqfunctiondestroy(qf_mass,err)
1007      call ceedoperatordestroy(op_setup,err)
1008      call ceedoperatordestroy(op_mass,err)
1009      call ceeddestroy(ceed,err)
1010
1011      return
1012      end
1013C-----------------------------------------------------------------------
1014      subroutine bp3
1015C     Solution to BP3 using libCEED
1016      include 'SIZE'
1017      include 'TOTAL'
1018      include 'CTIMER'  ! ifsync
1019      include 'FDMH1'
1020      include 'ceedf.h'
1021
1022      parameter (lzq=lx1+1)
1023      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
1024      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
1025
1026      parameter (lt=lx1*ly1*lz1*lelt)
1027      parameter (ld=lxd*lyd*lzd*lelt)
1028      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
1029      common /vcrny/ e1(lt)
1030      common /vcrvh/ h1(lt),h2(ld),pap(3)
1031      real*8 coords(ldim*lx*lelt)
1032
1033      logical ifh3
1034      integer*8 nnode
1035      integer ceed,err,test
1036      character*64 spec
1037
1038      integer p,q,ncompx,ncompu,enode,lnode
1039      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
1040      integer stridesu(3),stridesx(3),stridesw(3)
1041      integer erstrctu,erstrctx,erstrctw
1042      integer basisu,basisx
1043      integer qf_diffusion,qf_setup
1044      integer op_diffusion,op_setup
1045      integer ii,i,ngeo
1046      real*8  x,y,z
1047      integer*8 offset
1048
1049      external diffusionf,diffsetupf
1050
1051      ifield = 1
1052      nxq    = nx1+1
1053      n = nx1*ny1*nz1*nelt
1054
1055      ifsync = .false.
1056
1057C     Set up coordinates and mask
1058      ii=0
1059      do j=0,nelt-1
1060      do i=1,lx
1061        ii=ii+1
1062        x = xm1(ii,1,1,1)
1063        y = ym1(ii,1,1,1)
1064        z = zm1(ii,1,1,1)
1065        coords(i+0*lx+3*j*lx)=x
1066        coords(i+1*lx+3*j*lx)=y
1067        coords(i+2*lx+3*j*lx)=z
1068        if ( x.eq.0.or.x.eq.1
1069     $   .or.y.eq.0.or.y.eq.1
1070     $   .or.z.eq.0.or.z.eq.1 ) then
1071          h2(ii)=0.
1072        else
1073          h2(ii)=1.
1074        endif
1075      enddo
1076      enddo
1077
1078C     Init ceed library
1079      call get_spec(spec)
1080      call ceedinit(trim(spec)//char(0),ceed,err)
1081
1082      call get_test(test)
1083
1084C     Set up Nek geometry data
1085      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
1086
1087C     Set up true soln
1088      call sin_fld_h1    (e1)
1089      call xmask1        (e1,h2,nelt)
1090      call copy          (h1,e1,n)                 ! Save exact soln in h1
1091
1092C     Set up solver parameters
1093      tol       = 1e-10
1094      param(22) = tol
1095      maxit     = 100
1096
1097      call nekgsync()
1098
1099C     Create ceed basis for mesh and computation
1100      p=nx1
1101      q=p+1
1102      ncompu=1
1103      ncompx=ldim
1104      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q,
1105     $  ceed_gauss,basisx,err)
1106      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q,
1107     $  ceed_gauss,basisu,err)
1108
1109C     Create ceed element restrictions for mesh and computation
1110      enode=p**ldim
1111      lnode=enode*nelt*ncompu
1112      ngeo=(ldim*(ldim+1))/2
1113      stridesx=[1,enode,enode*ldim]
1114      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim,
1115     $  ldim*lnode,stridesx,erstrctx,err)
1116      stridesu=[1,enode,enode*ncompu]
1117      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu,
1118     $  ncompu*lnode,stridesu,erstrctu,err)
1119      stridesw=[1,q**ldim,ngeo*q**ldim]
1120      call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim,
1121     $  ngeo,nelt*q**ldim*ngeo,stridesw,erstrctw,err)
1122
1123C     Create ceed vectors
1124      call ceedvectorcreate(ceed,lnode,vec_p1,err)
1125      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
1126      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
1127      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
1128      call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err)
1129
1130      offset=0
1131      call ceedvectorsetarray(vec_coords,ceed_mem_host,
1132     $  ceed_use_pointer,coords,offset,err)
1133
1134C     Create ceed qfunctions for diffsetupf and diffusionf
1135      call ceedqfunctioncreateinterior(ceed,1,diffsetupf,
1136     $  EXAMPLE_DIR
1137     $  //'bps/bps.h:diffsetupf'//char(0),qf_setup,err)
1138      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
1139     $  ceed_eval_interp,err)
1140      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
1141     $  ceed_eval_grad,err)
1142      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
1143     $  ceed_eval_weight,err)
1144      call ceedqfunctionaddoutput(qf_setup,'qdata',ngeo,
1145     $  ceed_eval_none,err)
1146      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
1147     $  ceed_eval_interp,err)
1148
1149      call ceedqfunctioncreateinterior(ceed,1,diffusionf,
1150     $  EXAMPLE_DIR
1151     $  //'bps/bps.h:diffusionf'//char(0),qf_diffusion,err)
1152      call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim,
1153     $  ceed_eval_grad,err)
1154      call ceedqfunctionaddinput(qf_diffusion,'qdata',ngeo,
1155     $  ceed_eval_none,err)
1156      call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim,
1157     $  ceed_eval_grad,err)
1158
1159C     Create ceed operators
1160      call ceedoperatorcreate(ceed,qf_setup,
1161     $  ceed_qfunction_none,ceed_qfunction_none,op_setup,err)
1162      call ceedoperatorsetfield(op_setup,'x',erstrctx,
1163     $  basisx,ceed_vector_active,err)
1164      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
1165     $  basisx,ceed_vector_active,err)
1166      call ceedoperatorsetfield(op_setup,'weight',
1167     $  ceed_elemrestriction_none,basisx,ceed_vector_none,err)
1168      call ceedoperatorsetfield(op_setup,'qdata',erstrctw,
1169     $  ceed_basis_collocated,ceed_vector_active,err)
1170      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
1171     $  basisu,vec_rhs,err)
1172
1173      call ceedoperatorcreate(ceed,qf_diffusion,
1174     $  ceed_qfunction_none,ceed_qfunction_none,op_diffusion,err)
1175      call ceedoperatorsetfield(op_diffusion,'u',erstrctu,
1176     $  basisu,ceed_vector_active,err)
1177      call ceedoperatorsetfield(op_diffusion,'qdata',erstrctw,
1178     $  ceed_basis_collocated,vec_qdata,err)
1179      call ceedoperatorsetfield(op_diffusion,'v',erstrctu,
1180     $  basisu,ceed_vector_active,err)
1181
1182C     Compute setup data
1183      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
1184     $  ceed_use_pointer,r1,offset,err)
1185      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
1186     $  ceed_request_immediate,err)
1187
1188C     Set up true RHS
1189      call dssum         (r1,nx1,ny1,nz1)          ! r1
1190      call xmask1        (r1,h2,nelt)
1191
1192C     Set up algebraic RHS with libCEED
1193      call ceedvectorsetarray(vec_p1,ceed_mem_host,
1194     $  ceed_use_pointer,h1,offset,err)
1195      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
1196     $  ceed_use_pointer,r2,offset,err)
1197      call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1,
1198     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
1199      call dssum         (r2,nx1,ny1,nz1)
1200      call xmask1        (r2,h2,nelt)
1201
1202C     Set up algebraic RHS with Nek5000
1203      call axhm1         (pap,r3,h1,h1,h2,'bp3')   ! r3 = A_nek5k*h1
1204      call dssum         (r3,nx1,ny1,nz1)
1205      call xmask1        (r3,h2,nelt)
1206
1207      call nekgsync()
1208
1209C     Solve true RHS
1210      if (nid.eq.0) write (6,*) "libCEED true RHS"
1211      tstart = dnekclock()
1212      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1213     $  vec_p1,vec_ap1,maxit,'bp3')
1214      tstop  = dnekclock()
1215
1216C     Output
1217      telaps = (tstop-tstart)
1218      maxits = maxit
1219      er1 = glrdif(u1,e1,n)
1220      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1221
1222      if (test.eq.1.and.nid.eq.0) then
1223        if (maxit>=100) then
1224          write(6,*) "UNCONVERGED CG"
1225        endif
1226        if (dabs(er1)>1e-3) then
1227          write(6,*) "ERROR IS TOO LARGE"
1228        endif
1229      endif
1230
1231      nx      = nx1-1
1232      nnode   = nelgt            ! nnodes
1233      nnode   = nnode*(nx**ldim) ! nnodes
1234      nppp    = nnode/np         ! nnodes/proc
1235
1236      dofps   = nnode/telaps     ! DOF/sec - scalar form
1237      titers  = telaps/maxits    ! time per iteration
1238      tppp_s  = titers/nppp      ! time per iteraton per local point
1239
1240      if (nid.eq.0) write(6,1) 'case scalar:'
1241     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
1242
1243C     Solve libCEED algebraic RHS
1244      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
1245      maxit = 100
1246      tstart = dnekclock()
1247      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1248     $  vec_p1,vec_ap1,maxit,'bp3')
1249      tstop  = dnekclock()
1250
1251C     Output
1252      telaps = (tstop-tstart)
1253      maxits = maxit
1254      er1 = glrdif(u1,e1,n)
1255      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1256
1257      if (test.eq.1.and.nid.eq.0) then
1258        if (maxit>=100) then
1259          write(6,*) "UNCONVERGED CG"
1260        endif
1261        if (dabs(er1)>1e-9) then
1262          write(6,*) "ERROR IS TOO LARGE"
1263        endif
1264      endif
1265
1266      nx      = nx1-1
1267      nnode   = nelgt            ! nnodes
1268      nnode   = nnode*(nx**ldim) ! nnodes
1269      nppp    = nnode/np         ! nnodes/proc
1270
1271      dofps   = nnode/telaps     ! DOF/sec - scalar form
1272      titers  = telaps/maxits    ! time per iteration
1273      tppp_s  = titers/nppp      ! time per iteraton per local point
1274
1275      if (nid.eq.0) write(6,1) 'case scalar:'
1276     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
1277
1278C     Solve Nek5000 algebraic RHS
1279      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
1280      maxit = 100
1281      tstart = dnekclock()
1282      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1283     $  vec_p1,vec_ap1,maxit,'bp3')
1284      tstop  = dnekclock()
1285
1286C     Output
1287      telaps = (tstop-tstart)
1288      maxits = maxit
1289      er1 = glrdif(u1,e1,n)
1290      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1291
1292      if (test.eq.1.and.nid.eq.0) then
1293        if (maxit>=100) then
1294          write(6,*) "UNCONVERGED CG"
1295        endif
1296        if (dabs(er1)>1e-9) then
1297          write(6,*) "ERROR IS TOO LARGE"
1298        endif
1299      endif
1300
1301      nx      = nx1-1
1302      nnode   = nelgt            ! nnodes
1303      nnode   = nnode*(nx**ldim) ! nnodes
1304      nppp    = nnode/np         ! nnodes/proc
1305
1306      dofps   = nnode/telaps     ! DOF/sec - scalar form
1307      titers  = telaps/maxits    ! time per iteration
1308      tppp_s  = titers/nppp      ! time per iteraton per local point
1309
1310      if (nid.eq.0) write(6,1) 'case scalar:'
1311     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
1312
1313    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
1314    3 format(i3,i9,e12.4,1x,a8,i9)
1315
1316C     Destroy ceed handles
1317      call ceedvectordestroy(vec_p1,err)
1318      call ceedvectordestroy(vec_ap1,err)
1319      call ceedvectordestroy(vec_rhs,err)
1320      call ceedvectordestroy(vec_qdata,err)
1321      call ceedvectordestroy(vec_coords,err)
1322      call ceedelemrestrictiondestroy(erstrctu,err)
1323      call ceedelemrestrictiondestroy(erstrctx,err)
1324      call ceedelemrestrictiondestroy(erstrctw,err)
1325      call ceedbasisdestroy(basisu,err)
1326      call ceedbasisdestroy(basisx,err)
1327      call ceedqfunctiondestroy(qf_setup,err)
1328      call ceedqfunctiondestroy(qf_diffusion,err)
1329      call ceedoperatordestroy(op_setup,err)
1330      call ceedoperatordestroy(op_diffusion,err)
1331      call ceeddestroy(ceed,err)
1332
1333      return
1334      end
1335C-----------------------------------------------------------------------
1336      subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,
1337     $  vec_ap1,maxit,bpname)
1338C     Scalar conjugate gradient iteration for solution of uncoupled
1339C     Helmholtz equations
1340C     Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname
1341C     Output: u1,maxit
1342      include 'SIZE'
1343      include 'TOTAL'
1344      include 'DOMAIN'
1345      include 'FDMH1'
1346      character*3 bpname
1347
1348C     INPUT:  rhs1 - rhs
1349C             h1   - exact solution
1350
1351      parameter (lt=lx1*ly1*lz1*lelt)
1352      parameter (ld=lxd*lyd*lzd*lelt)
1353      real*8 u1(lt),r1(lt),h1(lt),h2(lt)
1354      real*8 rmult(1),binv(1)
1355      integer ceed,ceed_op,vec_ap1,vec_p1
1356      common /scrcg/ dpc(lt),p1(lt),z1(lt)
1357      common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4)
1358
1359      real*8 ap1(lt)
1360      equivalence (ap1,z1)
1361
1362      vol   = volfld(ifield)
1363      nel   = nelfld(ifield)
1364      nxyz  = lx1*ly1*lz1
1365      n     = nxyz*nel
1366      nx    = nx1-1                  ! Polynomial order (just for i/o)
1367
1368      tol=tin
1369
1370      if(bpname.ne.'bp1') then
1371        call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner
1372      else
1373        call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner
1374      endif
1375
1376      call rzero         (u1,n)      ! Initialize solution
1377
1378      wv(1)=0
1379      do i=1,n
1380         s=rmult(i)                  !      -1
1381         p1(i)=dpc(i)*r1(i)          ! p = M  r      T
1382         wv(1)=wv(1)+s*p1(i)*r1(i)   !              r p
1383      enddo
1384      call gop(wv(1),wk,'+  ',1)
1385      rpp1(1) = wv  (1)
1386
1387      do 1000 iter=1,maxit
1388         call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op,
1389     $     vec_ap1,vec_p1)
1390         call dssum    (ap1,nx1,ny1,nz1)
1391         if (bpname.ne.'bp1') call xmask1(ap1,h2,nel)
1392
1393         call gop      (pap,wk,'+  ',1)
1394         alph(1) = rpp1(1)/pap(1)
1395
1396         do i=1,n
1397            u1(i)=u1(i)+alph(1)* p1(i)
1398            r1(i)=r1(i)-alph(1)*ap1(i)
1399         enddo
1400
1401C        tolerance check here
1402         call rzero(wv,2)
1403         do i=1,n
1404            wv(1)=wv(1)+r1(i)*r1(i)            ! L2 error estimate
1405            z1(i)=dpc(i)*r1(i)                 ! z = M  r
1406            wv(2)=wv(2)+rmult(i)*z1(i)*r1(i)   ! r z
1407         enddo
1408         call gop(wv,wk,'+  ',2)
1409
1410C        if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1)
1411  1     format(i2,i9,i5,i4,1p1e12.4,' cggos')
1412
1413         enorm=sqrt(wv(1))
1414         if (enorm.lt.tol) then
1415            ifin = iter
1416            if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol
1417            goto 9999
1418         endif
1419C        if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha'
1420 2      format(i5,1p3e12.4,2x,a5)
1421
1422         rpp2(1)=rpp1(1)
1423         rpp1(1)=wv  (2)
1424         beta1  =rpp1(1)/rpp2(1)
1425         do i=1,n
1426            p1(i)=z1(i) + beta1*p1(i)
1427         enddo
1428
1429 1000 continue
1430
1431      rbnorm=sqrt(wv(1))
1432      if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol
1433      iter = iter-1
1434
1435 9999 continue
1436
1437      maxit=iter
1438
1439 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4)
1440 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6)
1441
1442      return
1443      end
1444C-----------------------------------------------------------------------
1445      subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op,
1446     $  vec_ap1,vec_p1)
1447C     Vector conjugate gradient matvec for solution of uncoupled
1448C     Helmholtz equations
1449C     Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1
1450C     Output: ap1
1451      include 'SIZE'
1452      include 'TOTAL'
1453      include 'ceedf.h'
1454
1455      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
1456      real*8       gf(lg,lx,lelt)             ! Equivalence new gf() data
1457      equivalence (gf,g1m1)                   ! layout to g1m1...g6m1
1458
1459      real*8   pap(3)
1460      real*8   ap1(lx,lelt)
1461      real*8    p1(lx,lelt)
1462      real*8    h1(lx,lelt),h2(lx,lelt)
1463      integer ceed,ceed_op,vec_ap1,vec_p1,err
1464      integer i,e
1465      integer*8 offset
1466
1467      offset=0
1468      call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer,
1469     $  p1,offset,err)
1470      call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer,
1471     $  ap1,offset,err)
1472
1473      call ceedoperatorapply(ceed_op,vec_p1,vec_ap1,
1474     $  ceed_request_immediate,err)
1475
1476      call ceedvectorsyncarray(vec_ap1,ceed_mem_host,err)
1477
1478      pap(1)=0.
1479
1480      do e=1,nelt
1481         do i=1,lx
1482           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
1483         enddo
1484      enddo
1485
1486      return
1487      end
1488C-----------------------------------------------------------------------
1489      subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut)
1490C     Local matrix-vector for solution of BP3 (stiffness matrix)
1491C     Input: u,g,h1,h2,b,ju,us,ut   Output: w
1492      include 'SIZE'
1493      include 'TOTAL'
1494
1495      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
1496      real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz)
1497      real*8 ju(lxyz),us(lxyz),ut(lxyz)
1498
1499      nxq = nx1+1 ! Number of quadrature points
1500
1501      lxyzq = nxq**ldim
1502
1503      call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation
1504      do i=1,lxyzq
1505         ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts
1506      enddo
1507      call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u
1508
1509      return
1510      end
1511C-----------------------------------------------------------------------
1512      subroutine axhm1_bp1(pap,ap1,p1,h1,h2)
1513C     Vector conjugate gradient matvec for solution of BP1 (mass matrix)
1514C     Input: pap,p1,h1,h2           Output: ap1
1515      include 'SIZE'
1516      include 'TOTAL'
1517
1518      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
1519      real*8         gf(lg,lx,lelt)             ! Equivalence new gf() data
1520      equivalence (gf,g1m1)                     ! layout to g1m1...g6m1
1521
1522      real*8 pap(3)
1523      real*8 ap1(lx,lelt)
1524      real*8  p1(lx,lelt)
1525      real*8  h1(lx,lelt),h2(lx,lelt)
1526
1527      real*8 ur(lx),us(lx),ut(lx)
1528      common /ctmp1/ ur,us,ut
1529
1530      integer e
1531
1532      pap(1)=0.
1533
1534      k=1
1535      nxq = nx1+1
1536
1537      do e=1,nelt
1538
1539         call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1)
1540     $                                          ,bm1(1,1,1,e),ur,us,ut)
1541         do i=1,lx
1542           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
1543         enddo
1544         k=k+nxq*nxq*nxq
1545
1546      enddo
1547
1548      return
1549      end
1550C-----------------------------------------------------------------------
1551      subroutine ax_e_bp3(w,u,g,ur,us,ut,wk)
1552C     Local matrix-vector for solution of BP3 (stiffness matrix)
1553C     Input: u,g,ur,us,ut,wk        Output: w
1554      include 'SIZE'
1555      include 'TOTAL'
1556
1557      parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq)
1558
1559      common /ctmp0/ tmp(lxyzq)
1560      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
1561
1562      real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq)
1563      real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq)
1564
1565      n = lzq-1
1566
1567      call intp_rstd  (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation
1568      call loc_grad3  (ur,us,ut,wk,n,dxmq,dxtmq)
1569
1570      do i=1,lxyzq
1571         wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i)
1572         ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i)
1573         wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i)
1574         ur(i) = wr
1575         us(i) = ws
1576         ut(i) = wt
1577      enddo
1578
1579      call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp)
1580      call intp_rstd  (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u
1581
1582      return
1583      end
1584C-----------------------------------------------------------------------
1585      subroutine axhm1_bp3(pap,ap1,p1,h1,h2)
1586C     Vector conjugate gradient matvec for solution of BP3 (stiffness matrix)
1587C     Input: pap,p1,h1,h2           Output: ap1
1588      include 'SIZE'
1589      include 'TOTAL'
1590
1591      parameter (lzq=lx1+1)
1592      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
1593      common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq)
1594
1595      real*8 pap(3)
1596      real*8 ap1(lx,lelt)
1597      real*8  p1(lx,lelt)
1598      real*8  h1(lx,lelt),h2(lx,lelt)
1599
1600      common /ctmp1/ ur,us,ut,wk
1601      real*8 ur(lq),us(lq),ut(lq),wk(lq)
1602
1603      integer e
1604
1605      pap(1)=0.
1606
1607      do e=1,nelt
1608
1609         call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk)
1610         do i=1,lx
1611           pap(1)=pap(1)+p1(i,e)*ap1(i,e)
1612         enddo
1613
1614      enddo
1615
1616      return
1617      end
1618C-----------------------------------------------------------------------
1619      subroutine axhm1(pap,ap1,p1,h1,h2,bpname)
1620C     Vector conjugate gradient matvec for solution of uncoupled
1621C     Helmholtz equations
1622C     Input: pap,p1,h1,h2,bpname    Output: ap1
1623      include 'SIZE'
1624      include 'TOTAL'
1625
1626      parameter (lx=lx1*ly1*lz1)
1627
1628      real*8 pap(3),ap1(lx,lelt),p1(lx,lelt)
1629      real*8 h1(lx,lelt),h2(lx,lelt)
1630
1631      character*3 bpname
1632
1633      if (bpname.eq.'bp1') then
1634         call axhm1_bp1(pap,ap1,p1,h1,h2)
1635
1636      elseif (bpname.eq.'bp3') then
1637         call axhm1_bp3(pap,ap1,p1,h1,h2)
1638
1639      else
1640         write(6,*) bpname,' axhm1 bpname error'
1641         stop
1642
1643      endif
1644
1645      return
1646      end
1647C-----------------------------------------------------------------------
1648      subroutine get_bp(bp)
1649C     Get BP to run
1650C     Input:                        Output: bp
1651      integer i,bp
1652      character*64 bpval
1653
1654      bp=0
1655      if(iargc().ge.1) then
1656        call getarg(1,bpval)
1657      endif
1658      if(bpval.eq."bp1") then
1659        bp=1
1660      elseif(bpval.eq."bp3") then
1661        bp=3
1662      endif
1663
1664      return
1665      end
1666C-----------------------------------------------------------------------
1667      subroutine get_spec(spec)
1668C     Get CEED backend specification
1669C     Input:                        Output: spec
1670      integer i
1671      character*64 spec
1672
1673      spec = '/cpu/self'
1674      if(iargc().ge.2) then
1675        call getarg(2,spec)
1676      endif
1677
1678      return
1679      end
1680C-----------------------------------------------------------------------
1681      subroutine get_test(test)
1682C     Get test mode flag
1683C     Input:                        Output: test
1684      integer i,test
1685      character*64 testval
1686
1687      test=0
1688      if(iargc().ge.3) then
1689        call getarg(3,testval)
1690      endif
1691      if(testval.eq."test") then
1692        test=1
1693      endif
1694
1695      return
1696      end
1697C-----------------------------------------------------------------------
1698
1699