xref: /libCEED/examples/nek/bps/bps.usr (revision ee07ded2da68b589c1255126ba09102d164f666f)
1C Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2C the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3C reserved. See files LICENSE and NOTICE for details.
4C
5C This file is part of CEED, a collection of benchmarks, miniapps, software
6C libraries and APIs for efficient high-order finite element and spectral
7C element discretizations for exascale applications. For more information and
8C source code availability see http://github.com/ceed.
9C
10C The CEED research is supported by the Exascale Computing Project (17-SC-20-SC)
11C a collaborative effort of two U.S. Department of Energy organizations (Office
12C of Science and the National Nuclear Security Administration) responsible for
13C the planning and preparation of a capable exascale ecosystem, including
14C software, applications, hardware, advanced system engineering and early
15C testbed platforms, in support of the nation's exascale computing imperative.
16
17C> @file
18C> Mass and diffusion operators examples using Nek5000
19C TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test
20
21C-----------------------------------------------------------------------
22      subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
23     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
24     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
25C     Set up mass operator
26C     Input: u1,u2,u3,q             Output: v1,v2,ierr
27      integer q,ierr
28      real*8 ctx(1)
29      real*8 u1(3*q)
30      real*8 u2(9*q)
31      real*8 u3(q)
32      real*8 v1(q)
33      real*8 v2(q)
34      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
35      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
36      real*8 jacmq
37
38C     Quadrature Point Loop
39      do i=1,q
40        a11=u2(i+q*0)
41        a12=u2(i+q*3)
42        a13=u2(i+q*6)
43
44        a21=u2(i+q*1)
45        a22=u2(i+q*4)
46        a23=u2(i+q*7)
47
48        a31=u2(i+q*2)
49        a32=u2(i+q*5)
50        a33=u2(i+q*8)
51
52        g11 = (a22*a33-a23*a32)
53        g12 = (a13*a32-a33*a12)
54        g13 = (a12*a23-a22*a13)
55
56        g21 = (a23*a31-a21*a33)
57        g22 = (a11*a33-a31*a13)
58        g23 = (a13*a21-a23*a11)
59
60        g31 = (a21*a32-a22*a31)
61        g32 = (a12*a31-a32*a11)
62        g33 = (a11*a22-a21*a12)
63
64        jacmq = a11*g11+a21*g12+a31*g13
65
66C       Rho
67        v1(i)=u3(i)*jacmq
68
69C       RHS
70        v2(i)=u3(i)*jacmq
71     $             *dsqrt(u1(i+q*0)*u1(i+q*0)
72     $                   +u1(i+q*1)*u1(i+q*1)
73     $                   +u1(i+q*2)*u1(i+q*2))
74      enddo
75
76      ierr=0
77      end
78C-----------------------------------------------------------------------
79      subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
80     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
81     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
82C     Apply mass operator
83C     Input: u1,u2,q                Output: v1,ierr
84      integer q,ierr
85      real*8 ctx(1)
86      real*8 u1(q)
87      real*8 u2(q)
88      real*8 v1(q)
89
90C     Quadrature Point Loop
91      do i=1,q
92        v1(i)=u2(i)*u1(i)
93      enddo
94
95      ierr=0
96      end
97C-----------------------------------------------------------------------
98      subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
99     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
100     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
101C     Set up diffusion operator
102C     Input: u1,u2,u3,q             Output: v1,v2,ierr
103      integer q,ierr
104      real*8 ctx(1)
105      real*8 u1(3*q)
106      real*8 u2(9*q)
107      real*8 u3(q)
108      real*8 v1(6*q)
109      real*8 v2(q)
110      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
111      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
112      real*8 jacmq,scl
113      real*8 c(3),k(3)
114
115C     Quadrature Point Loop
116      do i=1,q
117        pi = 3.14159265358979323846
118
119        c(1)=0.
120        c(2)=1.
121        c(3)=2.
122        k(1)=1.
123        k(2)=2.
124        k(3)=3.
125
126        a11=u2(i+q*0)
127        a12=u2(i+q*3)
128        a13=u2(i+q*6)
129
130        a21=u2(i+q*1)
131        a22=u2(i+q*4)
132        a23=u2(i+q*7)
133
134        a31=u2(i+q*2)
135        a32=u2(i+q*5)
136        a33=u2(i+q*8)
137
138        g11 = (a22*a33-a23*a32)
139        g12 = (a13*a32-a33*a12)
140        g13 = (a12*a23-a22*a13)
141
142        g21 = (a23*a31-a21*a33)
143        g22 = (a11*a33-a31*a13)
144        g23 = (a13*a21-a23*a11)
145
146        g31 = (a21*a32-a22*a31)
147        g32 = (a12*a31-a32*a11)
148        g33 = (a11*a22-a21*a12)
149
150        jacmq = a11*g11+a21*g12+a31*g13
151
152        scl = u3(i)/jacmq
153
154C       Geometric factors
155        v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr
156        v1(i+1*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs
157        v1(i+2*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt
158        v1(i+3*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss
159        v1(i+4*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst
160        v1(i+5*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt
161
162C       RHS
163        v2(i) = u3(i)*jacmq*pi*pi
164     $            *dsin(pi*(c(1)+k(1)*u1(i+0*q)))
165     $            *dsin(pi*(c(2)+k(2)*u1(i+1*q)))
166     $            *dsin(pi*(c(3)+k(3)*u1(i+2*q)))
167     $            *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3))
168
169      enddo
170
171      ierr=0
172      end
173C-----------------------------------------------------------------------
174      subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
175     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
176     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
177C     Apply diffusion operator
178C     Input: u1,u2,q                Output: v1,ierr
179      integer q,ierr
180      real*8 ctx(1)
181      real*8 u1(3*q)
182      real*8 u2(6*q)
183      real*8 v1(3*q)
184
185C     Quadrature Point Loop
186      do i=1,q
187        v1(i+0*q)=
188     $     u2(i+0*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q)
189        v1(i+1*q)=
190     $     u2(i+1*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q)
191        v1(i+2*q)=
192     $     u2(i+2*q)*u1(i)+u2(i+4*q)*u1(i+q)+u2(i+5*q)*u1(i+2*q)
193      enddo
194
195      ierr=0
196      end
197C-----------------------------------------------------------------------
198      subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq)
199C     Set h2 as rhoJac
200C     Input: bmq,nxq                Output: h2
201      include 'SIZE'
202      real*8 h2(1),bmq(1)
203
204      common /ctmp77/ wd(lxd),zd(lxd)
205      integer e,i,L
206
207      call zwgl(zd,wd,nxq)  ! nxq = number of points
208
209      q = 1.0               ! Later, this can be a function of position...
210
211      L = 0
212      do e=1,lelt
213      do i=1,nxq**ldim
214         L=L+1
215         h2(L) = q*bmq(L)
216      enddo
217      enddo
218
219      return
220      end
221C-----------------------------------------------------------------------
222      subroutine dist_fld_h1(e)
223C     Set distance initial condition for BP1
224C     Input:                        Output: e
225      include 'SIZE'
226      include 'TOTAL'
227      real*8 x,y,z
228      real*8 e(1)
229
230      n=lx1*ly1*lz1*nelt
231
232      do i=1,n
233        x=xm1(i,1,1,1)
234        y=ym1(i,1,1,1)
235        z=zm1(i,1,1,1)
236
237        e(i) = dsqrt(x*x+y*y+z*z)
238      enddo
239
240      call dsavg(e)  ! This is requisite for random fields
241
242      return
243      end
244C-----------------------------------------------------------------------
245      subroutine sin_fld_h1(e)
246C     Set sine initial condition for BP3
247C     Input:                        Output: e
248      include 'SIZE'
249      include 'TOTAL'
250      real*8 x,y,z
251      real*8 e(1)
252      real*8 c(3),k(3)
253
254      n=lx1*ly1*lz1*nelt
255      pi = 3.14159265358979323846
256
257      c(1)=0.
258      c(2)=1.
259      c(3)=2.
260      k(1)=1.
261      k(2)=2.
262      k(3)=3.
263
264      do i=1,n
265        x=xm1(i,1,1,1)
266        y=ym1(i,1,1,1)
267        z=zm1(i,1,1,1)
268
269        e(i) = dsin(pi*(c(1)+k(1)*x))
270     &        *dsin(pi*(c(2)+k(2)*y))
271     &        *dsin(pi*(c(3)+k(3)*z))
272
273      enddo
274
275      call dsavg(e)  ! This is requisite for random fields
276
277      return
278      end
279C-----------------------------------------------------------------------
280      subroutine uservp(ix,iy,iz,eg) ! set variable properties
281      include 'SIZE'
282      include 'TOTAL'
283      include 'NEKUSE'
284      integer e,f,eg
285C     e = gllel(eg)
286
287      udiff  = 0.0
288      utrans = 0.0
289
290      return
291      end
292C-----------------------------------------------------------------------
293      subroutine userf(ix,iy,iz,eg) ! set acceleration term
294C
295C     Note: this is an acceleration term, NOT a force!
296C     Thus, ffx will subsequently be multiplied by rho(x,t).
297C
298      include 'SIZE'
299      include 'TOTAL'
300      include 'NEKUSE'
301      integer e,f,eg
302C     e = gllel(eg)
303
304      ffx = 0.0
305      ffy = 0.0
306      ffz = 0.0
307
308      return
309      end
310C-----------------------------------------------------------------------
311      subroutine userq(i,j,k,eg) ! set source term
312      include 'SIZE'
313      include 'TOTAL'
314      include 'NEKUSE'
315      integer e,f,eg
316      e = gllel(eg)
317
318      qvol   = 0
319
320      return
321      end
322C-----------------------------------------------------------------------
323      subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions
324C     NOTE ::: This subroutine MAY NOT be called by every process
325      include 'SIZE'
326      include 'TOTAL'
327      include 'NEKUSE'
328      integer e,f,eg
329
330      ux   = 0.0
331      uy   = 0.0
332      uz   = 0.0
333      temp = 0.0
334
335      return
336      end
337C-----------------------------------------------------------------------
338      subroutine useric(ix,iy,iz,eg) ! set up initial conditions
339      include 'SIZE'
340      include 'TOTAL'
341      include 'NEKUSE'
342      integer e,f,eg
343
344      ux   = 0.0
345      uy   = 0.0
346      uz   = 0.0
347      temp = 0.0
348
349      return
350      end
351C-----------------------------------------------------------------------
352      subroutine usrdat   ! This routine to modify element vertices
353      include 'SIZE'
354      include 'TOTAL'
355
356      return
357      end
358C-----------------------------------------------------------------------
359      subroutine usrdat2  ! This routine to modify mesh coordinates
360      include 'SIZE'
361      include 'TOTAL'
362
363      x0 = 0
364      x1 = 1
365      call rescale_x(xm1,x0,x1)
366      call rescale_x(ym1,x0,x1)
367      call rescale_x(zm1,x0,x1)
368
369      param(59)=1  ! Force Nek to use the "deformed element" formulation
370
371      return
372      end
373C-----------------------------------------------------------------------
374      subroutine usrdat3
375      include 'SIZE'
376      include 'TOTAL'
377
378      return
379      end
380C-----------------------------------------------------------------------
381      subroutine xmask1   (r1,h2,nel)
382C     Apply zero Dirichlet boundary conditions
383C     Input: h2,nel                 Output: r1
384      include 'SIZE'
385      include 'TOTAL'
386      real*8 r1(1),h2(1)
387
388      n=nx1*ny1*nz1*nel
389      do i=1,n
390         r1(i)=r1(i)*h2(i)
391      enddo
392
393      return
394      end
395C-----------------------------------------------------------------------
396      function glrdif(x,y,n)
397C     Compute Linfty norm of (x-y)
398C     Input: x,y                    Output: n
399      real*8 x(n),y(n)
400
401      dmx=0
402      xmx=0
403      ymx=0
404
405      do i=1,n
406         diff=abs(x(i)-y(i))
407         dmx =max(dmx,diff)
408         xmx =max(xmx,x(i))
409         ymx =max(ymx,y(i))
410      enddo
411
412      xmx = max(xmx,ymx)
413      dmx = glmax(dmx,1) ! max across processors
414      xmx = glmax(xmx,1)
415
416      if (xmx.gt.0) then
417         glrdif = dmx/xmx
418      else
419         glrdif = -dmx   ! Negative indicates something strange
420      endif
421
422      return
423      end
424C-----------------------------------------------------------------------
425      subroutine loc_grad3(ur,us,ut,u,N,D,Dt)
426C     3D transpose of local gradient
427C     Input: u,N,D,Dt               Output: ur,us,ut
428      real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
429      real*8 u (0:N,0:N,0:N)
430      real*8 D (0:N,0:N),Dt(0:N,0:N)
431
432      m1 = N+1
433      m2 = m1*m1
434
435      call mxm(D ,m1,u(0,0,0),m1,ur,m2)
436      do k=0,N
437         call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1)
438      enddo
439      call mxm(u(0,0,0),m2,Dt,m1,ut,m1)
440
441      return
442      end
443c-----------------------------------------------------------------------
444      subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w)
445C     3D transpose of local gradient
446C     Input: ur,us,ut,N,D,Dt        Output: u
447       real*8 u (0:N,0:N,0:N)
448       real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
449       real*8 D (0:N,0:N),Dt(0:N,0:N)
450       real*8 w (0:N,0:N,0:N)
451
452       m1 = N+1
453       m2 = m1*m1
454       m3 = m1*m1*m1
455
456       call mxm(Dt,m1,ur,m1,u(0,0,0),m2)
457       do k=0,N
458          call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1)
459       enddo
460       call add2(u(0,0,0),w,m3)
461       call mxm(ut,m2,D ,m1,w,m1)
462       call add2(u(0,0,0),w,m3)
463
464      return
465      end
466C-----------------------------------------------------------------------
467      subroutine geodatq(gf,bmq,w3mq,nxq)
468C     Routine to generate elemental geometric matrices on mesh 1
469C     (Gauss-Legendre Lobatto mesh).
470      include 'SIZE'
471      include 'TOTAL'
472
473      parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim)
474
475      real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq)
476
477      common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd)
478      common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd)
479     $ ,             zr(lxyd),zs(lxyd),zt(lxyd)
480
481      common /ctmp77/ wd(lxd),zd(lxd)
482      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
483
484      integer e
485      real*8 tmp(lxyd)
486      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
487      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
488      real*8 jacmq
489
490      if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq)
491
492      call zwgl    (zd,wd,lzq)                            ! nxq = number of points
493      call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp)
494
495      do k=1,nxq
496      do j=1,nxq
497      do i=1,nxq
498         w3mq(i,j,k) = wd(i)*wd(j)*wd(k)
499      enddo
500      enddo
501      enddo
502
503      nxyzq = nxq**ldim
504      nxqm1 = lzq-1
505
506      do e=1,nelt
507         call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation
508         call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq)
509
510         call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0)
511         call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq)
512
513         call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0)
514         call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq)
515
516         do i=1,nxyzq
517            a11 = xr(i)
518            a12 = xs(i)
519            a13 = xt(i)
520
521            a21 = yr(i)
522            a22 = ys(i)
523            a23 = yt(i)
524
525            a31 = zr(i)
526            a32 = zs(i)
527            a33 = zt(i)
528
529            g11 = (a22*a33-a23*a32)
530            g12 = (a13*a32-a33*a12)
531            g13 = (a12*a23-a22*a13)
532
533            g21 = (a23*a31-a21*a33)
534            g22 = (a11*a33-a31*a13)
535            g23 = (a13*a21-a23*a11)
536
537            g31 = (a21*a32-a22*a31)
538            g32 = (a12*a31-a32*a11)
539            g33 = (a11*a22-a21*a12)
540
541            jacmq = a11*g11+a21*g12+a31*g13
542
543            bmq(i,e)  = w3mq(i,1,1)*jacmq
544            scale     = w3mq(i,1,1)/jacmq
545
546            gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr
547            gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs
548            gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt
549            gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss
550            gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst
551            gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt
552
553         enddo
554      enddo
555
556      return
557      end
558C-----------------------------------------------------------------------
559      subroutine setprecn_bp1 (d,h1,h2)
560C     Generate diagonal preconditioner for Helmholtz operator
561C     Input: h1,h2                  Output: d
562      include 'SIZE'
563      include 'TOTAL'
564
565      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
566
567      real*8    d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt)
568      integer e
569
570      real*8         gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data
571      equivalence (gf,g1m1)                  ! layout to g1m1...g6m1
572
573      real*8 ysm1(ly1)
574
575      nel   = nelfld(ifield)
576      n     = nel*lx1*ly1*lz1
577      nxyz  = lx1*ly1*lz1
578
579      call copy    (d,bm1,n)   ! Mass matrix preconditioning full mass matrix
580      call dssum   (d,nx1,ny1,nz1)
581      call invcol1 (d,n)
582      return
583
584      call dsset(lx1,ly1,lz1)
585
586      do 1000 e=1,nel
587
588        call rzero(d(1,1,1,e),nxyz)
589
590        do 320 iz=1,lz1
591         do 320 iy=1,ly1
592         do 320 ix=1,lx1
593         do 320 iq=1,lx1
594           d(ix,iy,iz,e) = d(ix,iy,iz,e)
595     $                   + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2
596     $                   + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2
597     $                   + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2
598  320    continue
599C
600C        Add cross terms if element is deformed.
601C
602         if (lxyz.gt.0) then
603
604           do i2=1,ly1,ly1-1
605           do i1=1,lx1,lx1-1
606              d(1,i1,i2,e) = d(1,i1,i2,e)
607     $            + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1)
608     $            + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2)
609              d(lx1,i1,i2,e) = d(lx1,i1,i2,e)
610     $            + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1)
611     $            + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2)
612              d(i1,1,i2,e) = d(i1,1,i2,e)
613     $            + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1)
614     $            + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2)
615              d(i1,ly1,i2,e) = d(i1,ly1,i2,e)
616     $            + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1)
617     $            + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2)
618              d(i1,i2,1,e) = d(i1,i2,1,e)
619     $            + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1)
620     $            + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2)
621              d(i1,i2,lz1,e) = d(i1,i2,lz1,e)
622     $            + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1)
623     $            + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2)
624
625           enddo
626           enddo
627         endif
628
629        do i=1,lxyz
630           d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e)
631        enddo
632
633 1000 continue ! element loop
634
635C     If axisymmetric, add a diagonal term in the radial direction (ISD=2)
636
637      if (ifaxis.and.(isd.eq.2)) then
638         do 1200 e=1,nel
639            if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1)
640            k=0
641            do 1190 j=1,ly1
642            do 1190 i=1,lx1
643               k=k+1
644               if (ym1(i,j,1,e).ne.0.) then
645                  term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2
646                  if (ifrzer(e)) then
647                     term2 =  wxm1(i)*wam1(1)*dam1(1,j)
648     $                       *jacm1(i,1,1,e)/ysm1(i)
649                  else
650                     term2 = 0.
651                  endif
652                  d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2)
653               endif
654 1190       continue
655 1200    continue
656
657      endif
658      call dssum   (d,nx1,ny1,nz1)
659      call invcol1 (d,n)
660
661      if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1)
662   1  format(i9,1p4e12.4,' diag prec')
663
664      return
665      end
666C-----------------------------------------------------------------------
667      subroutine setprecn_bp3 (d,h1,h2)
668C     Generate dummy diagonal preconditioner for Helmholtz operator
669C     Input: h1,h2                  Output: d
670      include 'SIZE'
671      include 'TOTAL'
672
673      parameter (n=lx1*ly1*lz1*lelt)
674      real*8 d(n),h1(n),h2(n)
675
676      call rone (d,n)
677
678      return
679      end
680C-----------------------------------------------------------------------
681      subroutine userchk
682      include 'SIZE'
683      include 'TOTAL'
684
685      integer bp
686
687      call get_bp(bp)
688
689      if (bp==1) then
690        if (istep.gt.0) call bp1
691      elseif (bp==3) then
692        if (istep.gt.0) call bp3
693      else
694        write(6,*) "INVALID BP SPECIFICED"
695      endif
696
697      return
698      end
699C-----------------------------------------------------------------------
700      subroutine bp1
701C     Solution to BP1 using libCEED
702      include 'SIZE'
703      include 'TOTAL'
704      include 'CTIMER'  ! ifsync
705      include 'FDMH1'
706      include 'ceedf.h'
707
708      parameter (lzq=lx1+1)
709      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
710      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
711
712      parameter (lt=lx1*ly1*lz1*lelt)
713      parameter (ld=lxd*lyd*lzd*lelt)
714      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
715      common /vcrny/ e1(lt)
716      common /vcrvh/ h1(lt),h2(lx*lelt),pap(3)
717      real*8 coords(ldim*lx*lelt)
718
719      logical ifh3
720      integer*8 nnode
721      integer ceed,err,test
722      character*64 spec
723
724      integer p,q,ncompx,ncompu,enode,lnode
725      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
726      integer erstrctu,erstrctx,erstrctw
727      integer basisu,basisx
728      integer qf_mass,qf_setup
729      integer op_mass,op_setup
730      real*8  x,y,z
731      integer*8 offset
732
733      external massf,masssetupf
734
735      ifield = 1
736      nxq    = nx1+1
737      n = nx1*ny1*nz1*nelt
738
739      ifsync = .false.
740
741C     Set up coordinates
742      ii=0
743      do j=0,nelt-1
744      do i=1,lx
745        ii=ii+1
746        x = xm1(ii,1,1,1)
747        y = ym1(ii,1,1,1)
748        z = zm1(ii,1,1,1)
749        coords(i+0*lx+3*j*lx)=x
750        coords(i+1*lx+3*j*lx)=y
751        coords(i+2*lx+3*j*lx)=z
752      enddo
753      enddo
754
755C     Init ceed library
756      call get_spec(spec)
757      call ceedinit(trim(spec)//char(0),ceed,err)
758
759      call get_test(test)
760
761C     Set up Nek geometry data
762      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
763      call set_h2_as_rhoJac_GL(h2,bmq,nxq)
764
765C     Set up true soln
766      call dist_fld_h1   (e1)
767      call copy          (h1,e1,n)                 ! Save exact soln in h1
768
769C     Set up solver parameters
770      tol       = 1e-10
771      param(22) = tol
772      maxit     = 100
773
774      call nekgsync()
775
776C     Create ceed basis for mesh and computation
777      p=nx1
778      q=p+1
779      ncompu=1
780      ncompx=ldim
781      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q,
782     $  ceed_gauss,basisx,err)
783      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q,
784     $  ceed_gauss,basisu,err)
785
786C     Create ceed element restrictions for mesh and computation
787      enode=p**ldim
788      lnode=enode*nelt*ncompu
789      call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode,
790     $  ldim,erstrctx,err)
791      call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode,
792     $  ncompu,erstrctu,err)
793      call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim,
794     $  nelt*q**ldim,1,erstrctw,err)
795
796C     Create ceed vectors
797      call ceedvectorcreate(ceed,lnode,vec_p1,err)
798      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
799      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
800      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
801      call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err)
802
803      offset=0
804      call ceedvectorsetarray(vec_coords,ceed_mem_host,
805     $  ceed_use_pointer,coords,offset,err)
806
807C     Create ceed qfunctions for masssetupf and massf
808      call ceedqfunctioncreateinterior(ceed,1,masssetupf,
809     $  __FILE__
810     $  //':masssetupf',qf_setup,err)
811      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
812     $  ceed_eval_interp,err)
813      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
814     $  ceed_eval_grad,err)
815      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
816     $  ceed_eval_weight,err)
817      call ceedqfunctionaddoutput(qf_setup,'rho',ncompu,
818     $  ceed_eval_none,err)
819      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
820     $  ceed_eval_interp,err)
821
822      call ceedqfunctioncreateinterior(ceed,1,massf,
823     $  __FILE__
824     $  //':massf',qf_mass,err)
825      call ceedqfunctionaddinput(qf_mass,'u',ncompu,
826     $  ceed_eval_interp,err)
827      call ceedqfunctionaddinput(qf_mass,'rho',ncompu,
828     $  ceed_eval_none,err)
829      call ceedqfunctionaddoutput(qf_mass,'v',ncompu,
830     $  ceed_eval_interp,err)
831
832C     Create ceed operators
833      call ceedoperatorcreate(ceed,qf_setup,
834     $  ceed_null,ceed_null,op_setup,err)
835      call ceedoperatorsetfield(op_setup,'x',erstrctx,
836     $  ceed_notranspose,basisx,ceed_vector_active,err)
837      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
838     $  ceed_notranspose,basisx,ceed_vector_active,err)
839      call ceedoperatorsetfield(op_setup,'weight',erstrctx,
840     $  ceed_notranspose,basisx,ceed_vector_none,err)
841      call ceedoperatorsetfield(op_setup,'rho',erstrctw,
842     $  ceed_notranspose,ceed_basis_collocated,
843     $  ceed_vector_active,err)
844      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
845     $  ceed_notranspose,basisu,vec_rhs,err)
846
847      call ceedoperatorcreate(ceed,qf_mass,
848     $  ceed_null,ceed_null,op_mass,err)
849      call ceedoperatorsetfield(op_mass,'u',erstrctu,
850     $  ceed_notranspose,basisu,ceed_vector_active,err)
851      call ceedoperatorsetfield(op_mass,'rho',erstrctw,
852     $  ceed_notranspose,ceed_basis_collocated,
853     $  vec_qdata,err)
854      call ceedoperatorsetfield(op_mass,'v',erstrctu,
855     $  ceed_notranspose,basisu,ceed_vector_active,err)
856
857C     Compute setup data
858      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
859     $  ceed_use_pointer,r1,offset,err)
860      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
861     $  ceed_request_immediate,err)
862
863C     Set up true RHS
864      call dssum         (r1,nx1,ny1,nz1)          ! r1
865
866C     Set up algebraic RHS with libCEED
867      call ceedvectorsetarray(vec_p1,ceed_mem_host,
868     $  ceed_use_pointer,h1,offset,err)
869      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
870     $  ceed_use_pointer,r2,offset,err)
871      call ceedoperatorapply(op_mass,vec_p1,vec_ap1,
872     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
873      call dssum         (r2,nx1,ny1,nz1)
874
875C     Set up algebraic RHS with Nek5000
876      call axhm1         (pap,r3,h1,h1,h2,'bp1')   ! r3 = A_nek5k*h1
877      call dssum         (r3,nx1,ny1,nz1)
878
879      call nekgsync()
880
881C     Solve true RHS
882      if (nid.eq.0) write (6,*) "libCEED true RHS"
883      tstart = dnekclock()
884      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass,
885     $  vec_p1,vec_ap1,maxit,'bp1')
886      tstop  = dnekclock()
887
888C     Output
889      telaps = (tstop-tstart)
890      maxits = maxit
891      er1 = glrdif(u1,e1,n)
892      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
893
894      if (test.eq.1.and.nid.eq.0) then
895        if (maxit>=100) then
896          write(6,*) "UNCONVERGED CG"
897        endif
898        if (dabs(er1)>5e-3) then
899          write(6,*) "ERROR IS TOO LARGE"
900        endif
901      endif
902
903      nx     = nx1-1
904      nnode  = nelgt            ! nnodes
905      nnode  = nnode*(nx**ldim) ! nnodes
906      nppp   = nnode/np         ! nnodes/proc
907
908      dofps  = nnode/telaps     ! DOF/sec - scalar form
909      titers = telaps/maxits    ! time per iteration
910      tppp_s = titers/nppp      ! time per iteraton per local point
911
912      if (nid.eq.0) write(6,1) 'case scalar:'
913     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
914
915C     Solve libCEED algebraic RHS
916      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
917      maxit = 100
918      tstart = dnekclock()
919      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass,
920     $  vec_p1,vec_ap1,maxit,'bp1')
921      tstop  = dnekclock()
922
923C     Output
924      telaps = (tstop-tstart)
925      maxits = maxit
926      er1 = glrdif(u1,e1,n)
927      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
928
929      if (test.eq.1.and.nid.eq.0) then
930        if (maxit>=100) then
931          write(6,*) "UNCONVERGED CG"
932        endif
933        if (dabs(er1)>1e-5) then
934          write(6,*) "ERROR IS TOO LARGE"
935        endif
936      endif
937
938      nx      = nx1-1
939      nnode   = nelgt            ! nnodes
940      nnode   = nnode*(nx**ldim) ! nnodes
941      nppp    = nnode/np         ! nnodes/proc
942
943      dofps   = nnode/telaps     ! DOF/sec - scalar form
944      titers  = telaps/maxits    ! time per iteration
945      tppp_s  = titers/nppp      ! time per iteraton per local point
946
947      if (nid.eq.0) write(6,1) 'case scalar:'
948     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
949
950C     Solve Nek5000 algebraic RHS
951      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
952      maxit = 100
953      tstart = dnekclock()
954      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass,
955     $  vec_p1,vec_ap1,maxit,'bp1')
956      tstop  = dnekclock()
957
958C     Output
959      telaps = (tstop-tstart)
960      maxits = maxit
961      er1 = glrdif(u1,e1,n)
962      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
963
964      if (test.eq.1.and.nid.eq.0) then
965        if (maxit>=100) then
966          write(6,*) "UNCONVERGED CG"
967        endif
968        if (dabs(er1)>1e-5) then
969          write(6,*) "ERROR IS TOO LARGE"
970        endif
971      endif
972
973      nx      = nx1-1
974      nnode   = nelgt            ! nnodes
975      nnode   = nnode*(nx**ldim) ! nnodes
976      nppp    = nnode/np         ! nnodes/proc
977
978      dofps   = nnode/telaps     ! DOF/sec - scalar form
979      titers  = telaps/maxits    ! time per iteration
980      tppp_s  = titers/nppp      ! time per iteraton per local point
981
982      if (nid.eq.0) write(6,1) 'case scalar:'
983     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
984
985    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
986    3 format(i3,i9,e12.4,1x,a8,i9)
987
988C     Destroy ceed handles
989      call ceedvectordestroy(vec_p1,err)
990      call ceedvectordestroy(vec_ap1,err)
991      call ceedvectordestroy(vec_rhs,err)
992      call ceedvectordestroy(vec_qdata,err)
993      call ceedvectordestroy(vec_coords,err)
994      call ceedelemrestrictiondestroy(erstrctu,err)
995      call ceedelemrestrictiondestroy(erstrctx,err)
996      call ceedelemrestrictiondestroy(erstrctw,err)
997      call ceedbasisdestroy(basisu,err)
998      call ceedbasisdestroy(basisx,err)
999      call ceedqfunctiondestroy(qf_setup,err)
1000      call ceedqfunctiondestroy(qf_mass,err)
1001      call ceedoperatordestroy(op_setup,err)
1002      call ceedoperatordestroy(op_mass,err)
1003      call ceeddestroy(ceed,err)
1004
1005      return
1006      end
1007C-----------------------------------------------------------------------
1008      subroutine bp3
1009C     Solution to BP3 using libCEED
1010      include 'SIZE'
1011      include 'TOTAL'
1012      include 'CTIMER'  ! ifsync
1013      include 'FDMH1'
1014      include 'ceedf.h'
1015
1016      parameter (lzq=lx1+1)
1017      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
1018      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
1019
1020      parameter (lt=lx1*ly1*lz1*lelt)
1021      parameter (ld=lxd*lyd*lzd*lelt)
1022      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
1023      common /vcrny/ e1(lt)
1024      common /vcrvh/ h1(lt),h2(ld),pap(3)
1025      real*8 coords(ldim*lx*lelt)
1026
1027      logical ifh3
1028      integer*8 nnode
1029      integer ceed,err,test
1030      character*64 spec
1031
1032      integer p,q,ncompx,ncompu,enode,lnode
1033      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
1034      integer erstrctu,erstrctx,erstrctw
1035      integer basisu,basisx
1036      integer qf_diffusion,qf_setup
1037      integer op_diffusion,op_setup
1038      integer ii,i,ngeo
1039      real*8  x,y,z
1040      integer*8 offset
1041
1042      external diffusionf,diffsetupf
1043
1044      ifield = 1
1045      nxq    = nx1+1
1046      n = nx1*ny1*nz1*nelt
1047
1048      ifsync = .false.
1049
1050C     Set up coordinates and mask
1051      ii=0
1052      do j=0,nelt-1
1053      do i=1,lx
1054        ii=ii+1
1055        x = xm1(ii,1,1,1)
1056        y = ym1(ii,1,1,1)
1057        z = zm1(ii,1,1,1)
1058        coords(i+0*lx+3*j*lx)=x
1059        coords(i+1*lx+3*j*lx)=y
1060        coords(i+2*lx+3*j*lx)=z
1061        if ( x.eq.0.or.x.eq.1
1062     $   .or.y.eq.0.or.y.eq.1
1063     $   .or.z.eq.0.or.z.eq.1 ) then
1064          h2(ii)=0.
1065        else
1066          h2(ii)=1.
1067        endif
1068      enddo
1069      enddo
1070
1071C     Init ceed library
1072      call get_spec(spec)
1073      call ceedinit(trim(spec)//char(0),ceed,err)
1074
1075      call get_test(test)
1076
1077C     Set up Nek geometry data
1078      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
1079
1080C     Set up true soln
1081      call sin_fld_h1    (e1)
1082      call xmask1        (e1,h2,nelt)
1083      call copy          (h1,e1,n)                 ! Save exact soln in h1
1084
1085C     Set up solver parameters
1086      tol       = 1e-10
1087      param(22) = tol
1088      maxit     = 100
1089
1090      call nekgsync()
1091
1092C     Create ceed basis for mesh and computation
1093      p=nx1
1094      q=p+1
1095      ncompu=1
1096      ncompx=ldim
1097      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q,
1098     $  ceed_gauss,basisx,err)
1099      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q,
1100     $  ceed_gauss,basisu,err)
1101
1102C     Create ceed element restrictions for mesh and computation
1103      enode=p**ldim
1104      lnode=enode*nelt*ncompu
1105      ngeo=(ldim*(ldim+1))/2
1106      call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode,
1107     $  ldim,erstrctx,err)
1108      call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode,
1109     $  ncompu,erstrctu,err)
1110      call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim,
1111     $  nelt*q**ldim,ngeo,erstrctw,err)
1112
1113C     Create ceed vectors
1114      call ceedvectorcreate(ceed,lnode,vec_p1,err)
1115      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
1116      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
1117      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
1118      call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err)
1119
1120      offset=0
1121      call ceedvectorsetarray(vec_coords,ceed_mem_host,
1122     $  ceed_use_pointer,coords,offset,err)
1123
1124C     Create ceed qfunctions for diffsetupf and diffusionf
1125      call ceedqfunctioncreateinterior(ceed,1,diffsetupf,
1126     $  __FILE__
1127     $  //':diffsetupf'//char(0),qf_setup,err)
1128      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
1129     $  ceed_eval_interp,err)
1130      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
1131     $  ceed_eval_grad,err)
1132      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
1133     $  ceed_eval_weight,err)
1134      call ceedqfunctionaddoutput(qf_setup,'rho',ngeo,
1135     $  ceed_eval_none,err)
1136      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
1137     $  ceed_eval_interp,err)
1138
1139      call ceedqfunctioncreateinterior(ceed,1,diffusionf,
1140     $  __FILE__
1141     $  //':diffusionf'//char(0),qf_diffusion,err)
1142      call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim,
1143     $  ceed_eval_grad,err)
1144      call ceedqfunctionaddinput(qf_diffusion,'rho',ngeo,
1145     $  ceed_eval_none,err)
1146      call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim,
1147     $  ceed_eval_grad,err)
1148
1149C     Create ceed operators
1150      call ceedoperatorcreate(ceed,qf_setup,
1151     $  ceed_null,ceed_null,op_setup,err)
1152      call ceedoperatorsetfield(op_setup,'x',erstrctx,
1153     $  ceed_notranspose,basisx,ceed_vector_active,err)
1154      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
1155     $  ceed_notranspose,basisx,ceed_vector_active,err)
1156      call ceedoperatorsetfield(op_setup,'weight',erstrctx,
1157     $  ceed_notranspose,basisx,ceed_vector_none,err)
1158      call ceedoperatorsetfield(op_setup,'rho',erstrctw,
1159     $  ceed_notranspose,ceed_basis_collocated,
1160     $  ceed_vector_active,err)
1161      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
1162     $  ceed_notranspose,basisu,vec_rhs,err)
1163
1164      call ceedoperatorcreate(ceed,qf_diffusion,
1165     $  ceed_null,ceed_null,op_diffusion,err)
1166      call ceedoperatorsetfield(op_diffusion,'u',erstrctu,
1167     $  ceed_notranspose,basisu,ceed_vector_active,err)
1168      call ceedoperatorsetfield(op_diffusion,'rho',erstrctw,
1169     $  ceed_notranspose,ceed_basis_collocated,
1170     $  vec_qdata,err)
1171      call ceedoperatorsetfield(op_diffusion,'v',erstrctu,
1172     $  ceed_notranspose,basisu,ceed_vector_active,err)
1173
1174C     Compute setup data
1175      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
1176     $  ceed_use_pointer,r1,offset,err)
1177      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
1178     $  ceed_request_immediate,err)
1179
1180C     Set up true RHS
1181      call dssum         (r1,nx1,ny1,nz1)          ! r1
1182      call xmask1        (r1,h2,nelt)
1183
1184C     Set up algebraic RHS with libCEED
1185      call ceedvectorsetarray(vec_p1,ceed_mem_host,
1186     $  ceed_use_pointer,h1,offset,err)
1187      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
1188     $  ceed_use_pointer,r2,offset,err)
1189      call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1,
1190     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
1191      call dssum         (r2,nx1,ny1,nz1)
1192      call xmask1        (r2,h2,nelt)
1193
1194C     Set up algebraic RHS with Nek5000
1195      call axhm1         (pap,r3,h1,h1,h2,'bp3')   ! r3 = A_nek5k*h1
1196      call dssum         (r3,nx1,ny1,nz1)
1197      call xmask1        (r3,h2,nelt)
1198
1199      call nekgsync()
1200
1201C     Solve true RHS
1202      if (nid.eq.0) write (6,*) "libCEED true RHS"
1203      tstart = dnekclock()
1204      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1205     $  vec_p1,vec_ap1,maxit,'bp3')
1206      tstop  = dnekclock()
1207
1208C     Output
1209      telaps = (tstop-tstart)
1210      maxits = maxit
1211      er1 = glrdif(u1,e1,n)
1212      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1213
1214      if (test.eq.1.and.nid.eq.0) then
1215        if (maxit>=100) then
1216          write(6,*) "UNCONVERGED CG"
1217        endif
1218        if (dabs(er1)>1e-3) then
1219          write(6,*) "ERROR IS TOO LARGE"
1220        endif
1221      endif
1222
1223      nx      = nx1-1
1224      nnode   = nelgt            ! nnodes
1225      nnode   = nnode*(nx**ldim) ! nnodes
1226      nppp    = nnode/np         ! nnodes/proc
1227
1228      dofps   = nnode/telaps     ! DOF/sec - scalar form
1229      titers  = telaps/maxits    ! time per iteration
1230      tppp_s  = titers/nppp      ! time per iteraton per local point
1231
1232      if (nid.eq.0) write(6,1) 'case scalar:'
1233     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
1234
1235C     Solve libCEED algebraic RHS
1236      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
1237      maxit = 100
1238      tstart = dnekclock()
1239      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1240     $  vec_p1,vec_ap1,maxit,'bp3')
1241      tstop  = dnekclock()
1242
1243C     Output
1244      telaps = (tstop-tstart)
1245      maxits = maxit
1246      er1 = glrdif(u1,e1,n)
1247      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1248
1249      if (test.eq.1.and.nid.eq.0) then
1250        if (maxit>=100) then
1251          write(6,*) "UNCONVERGED CG"
1252        endif
1253        if (dabs(er1)>1e-9) then
1254          write(6,*) "ERROR IS TOO LARGE"
1255        endif
1256      endif
1257
1258      nx      = nx1-1
1259      nnode   = nelgt            ! nnodes
1260      nnode   = nnode*(nx**ldim) ! nnodes
1261      nppp    = nnode/np         ! nnodes/proc
1262
1263      dofps   = nnode/telaps     ! DOF/sec - scalar form
1264      titers  = telaps/maxits    ! time per iteration
1265      tppp_s  = titers/nppp      ! time per iteraton per local point
1266
1267      if (nid.eq.0) write(6,1) 'case scalar:'
1268     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
1269
1270C     Solve Nek5000 algebraic RHS
1271      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
1272      maxit = 100
1273      tstart = dnekclock()
1274      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1275     $  vec_p1,vec_ap1,maxit,'bp3')
1276      tstop  = dnekclock()
1277
1278C     Output
1279      telaps = (tstop-tstart)
1280      maxits = maxit
1281      er1 = glrdif(u1,e1,n)
1282      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1283
1284      if (test.eq.1.and.nid.eq.0) then
1285        if (maxit>=100) then
1286          write(6,*) "UNCONVERGED CG"
1287        endif
1288        if (dabs(er1)>1e-9) then
1289          write(6,*) "ERROR IS TOO LARGE"
1290        endif
1291      endif
1292
1293      nx     = nx1-1
1294      nnode   = nelgt           ! nnodes
1295      nnode   = nnode*(nx**ldim) ! nnodes
1296      nppp   = nnode/np         ! nnodes/proc
1297
1298      nodepss = nnode/telaps     ! DOF/sec - scalar form
1299      titers = telaps/maxits   ! time per iteration
1300      tppp_s = titers/nppp     ! time per iteraton per local point
1301
1302      if (nid.eq.0) write(6,1) 'case scalar:'
1303     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,nodepss,titers,tppp_s
1304
1305    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
1306    3 format(i3,i9,e12.4,1x,a8,i9)
1307
1308C     Destroy ceed handles
1309      call ceedvectordestroy(vec_p1,err)
1310      call ceedvectordestroy(vec_ap1,err)
1311      call ceedvectordestroy(vec_rhs,err)
1312      call ceedvectordestroy(vec_qdata,err)
1313      call ceedvectordestroy(vec_coords,err)
1314      call ceedelemrestrictiondestroy(erstrctu,err)
1315      call ceedelemrestrictiondestroy(erstrctx,err)
1316      call ceedelemrestrictiondestroy(erstrctw,err)
1317      call ceedbasisdestroy(basisu,err)
1318      call ceedbasisdestroy(basisx,err)
1319      call ceedqfunctiondestroy(qf_setup,err)
1320      call ceedqfunctiondestroy(qf_diffusion,err)
1321      call ceedoperatordestroy(op_setup,err)
1322      call ceedoperatordestroy(op_diffusion,err)
1323      call ceeddestroy(ceed,err)
1324
1325      return
1326      end
1327C-----------------------------------------------------------------------
1328      subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,
1329     $  vec_ap1,maxit,bpname)
1330C     Scalar conjugate gradient iteration for solution of uncoupled
1331C     Helmholtz equations
1332C     Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname
1333C     Output: u1,maxit
1334      include 'SIZE'
1335      include 'TOTAL'
1336      include 'DOMAIN'
1337      include 'FDMH1'
1338      character*3 bpname
1339
1340C     INPUT:  rhs1 - rhs
1341C             h1   - exact solution
1342
1343      parameter (lt=lx1*ly1*lz1*lelt)
1344      parameter (ld=lxd*lyd*lzd*lelt)
1345      real*8 u1(lt),r1(lt),h1(lt),h2(lt)
1346      real*8 rmult(1),binv(1)
1347      integer ceed,ceed_op,vec_ap1,vec_p1
1348      common /scrcg/ dpc(lt),p1(lt),z1(lt)
1349      common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4)
1350
1351      real*8 ap1(lt)
1352      equivalence (ap1,z1)
1353
1354      vol   = volfld(ifield)
1355      nel   = nelfld(ifield)
1356      nxyz  = lx1*ly1*lz1
1357      n     = nxyz*nel
1358      nx    = nx1-1                  ! Polynomial order (just for i/o)
1359
1360      tol=tin
1361
1362      if(bpname.ne.'bp1') then
1363        call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner
1364      else
1365        call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner
1366      endif
1367
1368      call rzero         (u1,n)      ! Initialize solution
1369
1370      wv(1)=0
1371      do i=1,n
1372         s=rmult(i)                  !      -1
1373         p1(i)=dpc(i)*r1(i)          ! p = M  r      T
1374         wv(1)=wv(1)+s*p1(i)*r1(i)   !              r p
1375      enddo
1376      call gop(wv(1),wk,'+  ',1)
1377      rpp1(1) = wv  (1)
1378
1379      do 1000 iter=1,maxit
1380         call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op,
1381     $     vec_ap1,vec_p1)
1382         call dssum    (ap1,nx1,ny1,nz1)
1383         if (bpname.ne.'bp1') call xmask1(ap1,h2,nel)
1384
1385         call gop      (pap,wk,'+  ',1)
1386         alph(1) = rpp1(1)/pap(1)
1387
1388         do i=1,n
1389            u1(i)=u1(i)+alph(1)* p1(i)
1390            r1(i)=r1(i)-alph(1)*ap1(i)
1391         enddo
1392
1393C        tolerance check here
1394         call rzero(wv,2)
1395         do i=1,n
1396            wv(1)=wv(1)+r1(i)*r1(i)            ! L2 error estimate
1397            z1(i)=dpc(i)*r1(i)                 ! z = M  r
1398            wv(2)=wv(2)+rmult(i)*z1(i)*r1(i)   ! r z
1399         enddo
1400         call gop(wv,wk,'+  ',2)
1401
1402C        if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1)
1403  1     format(i2,i9,i5,i4,1p1e12.4,' cggos')
1404
1405         enorm=sqrt(wv(1))
1406         if (enorm.lt.tol) then
1407            ifin = iter
1408            if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol
1409            goto 9999
1410         endif
1411C        if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha'
1412 2      format(i5,1p3e12.4,2x,a5)
1413
1414         rpp2(1)=rpp1(1)
1415         rpp1(1)=wv  (2)
1416         beta1  =rpp1(1)/rpp2(1)
1417         do i=1,n
1418            p1(i)=z1(i) + beta1*p1(i)
1419         enddo
1420
1421 1000 continue
1422
1423      rbnorm=sqrt(wv(1))
1424      if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol
1425      iter = iter-1
1426
1427 9999 continue
1428
1429      maxit=iter
1430
1431 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4)
1432 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6)
1433
1434      return
1435      end
1436C-----------------------------------------------------------------------
1437      subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op,
1438     $  vec_ap1,vec_p1)
1439C     Vector conjugate gradient matvec for solution of uncoupled
1440C     Helmholtz equations
1441C     Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1
1442C     Output: ap1
1443      include 'SIZE'
1444      include 'TOTAL'
1445      include 'ceedf.h'
1446
1447      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
1448      real*8       gf(lg,lx,lelt)             ! Equivalence new gf() data
1449      equivalence (gf,g1m1)                   ! layout to g1m1...g6m1
1450
1451      real*8   pap(3)
1452      real*8   ap1(lx,lelt)
1453      real*8    p1(lx,lelt)
1454      real*8    h1(lx,lelt),h2(lx,lelt)
1455      integer ceed,ceed_op,vec_ap1,vec_p1,err
1456      integer i,e
1457      integer*8 offset
1458
1459      offset=0
1460      call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer,
1461     $  p1,offset,err)
1462      call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer,
1463     $  ap1,offset,err)
1464
1465      call ceedoperatorapply(ceed_op,vec_p1,vec_ap1,
1466     $  ceed_request_immediate,err)
1467
1468      call ceedvectorsyncarray(vec_ap1,ceed_mem_host,err)
1469
1470      pap(1)=0.
1471
1472      do e=1,nelt
1473         do i=1,lx
1474           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
1475         enddo
1476      enddo
1477
1478      return
1479      end
1480C-----------------------------------------------------------------------
1481      subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut)
1482C     Local matrix-vector for solution of BP3 (stiffness matrix)
1483C     Input: u,g,h1,h2,b,ju,us,ut   Output: w
1484      include 'SIZE'
1485      include 'TOTAL'
1486
1487      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
1488      real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz)
1489      real*8 ju(lxyz),us(lxyz),ut(lxyz)
1490
1491      nxq = nx1+1 ! Number of quadrature points
1492
1493      lxyzq = nxq**ldim
1494
1495      call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation
1496      do i=1,lxyzq
1497         ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts
1498      enddo
1499      call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u
1500
1501      return
1502      end
1503C-----------------------------------------------------------------------
1504      subroutine axhm1_bp1(pap,ap1,p1,h1,h2)
1505C     Vector conjugate gradient matvec for solution of BP1 (mass matrix)
1506C     Input: pap,p1,h1,h2           Output: ap1
1507      include 'SIZE'
1508      include 'TOTAL'
1509
1510      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
1511      real*8         gf(lg,lx,lelt)             ! Equivalence new gf() data
1512      equivalence (gf,g1m1)                     ! layout to g1m1...g6m1
1513
1514      real*8 pap(3)
1515      real*8 ap1(lx,lelt)
1516      real*8  p1(lx,lelt)
1517      real*8  h1(lx,lelt),h2(lx,lelt)
1518
1519      real*8 ur(lx),us(lx),ut(lx)
1520      common /ctmp1/ ur,us,ut
1521
1522      integer e
1523
1524      pap(1)=0.
1525
1526      k=1
1527      nxq = nx1+1
1528
1529      do e=1,nelt
1530
1531         call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1)
1532     $                                          ,bm1(1,1,1,e),ur,us,ut)
1533         do i=1,lx
1534           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
1535         enddo
1536         k=k+nxq*nxq*nxq
1537
1538      enddo
1539
1540      return
1541      end
1542C-----------------------------------------------------------------------
1543      subroutine ax_e_bp3(w,u,g,ur,us,ut,wk)
1544C     Local matrix-vector for solution of BP3 (stiffness matrix)
1545C     Input: u,g,ur,us,ut,wk        Output: w
1546      include 'SIZE'
1547      include 'TOTAL'
1548
1549      parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq)
1550
1551      common /ctmp0/ tmp(lxyzq)
1552      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
1553
1554      real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq)
1555      real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq)
1556
1557      n = lzq-1
1558
1559      call intp_rstd  (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation
1560      call loc_grad3  (ur,us,ut,wk,n,dxmq,dxtmq)
1561
1562      do i=1,lxyzq
1563         wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i)
1564         ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i)
1565         wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i)
1566         ur(i) = wr
1567         us(i) = ws
1568         ut(i) = wt
1569      enddo
1570
1571      call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp)
1572      call intp_rstd  (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u
1573
1574      return
1575      end
1576C-----------------------------------------------------------------------
1577      subroutine axhm1_bp3(pap,ap1,p1,h1,h2)
1578C     Vector conjugate gradient matvec for solution of BP3 (stiffness matrix)
1579C     Input: pap,p1,h1,h2           Output: ap1
1580      include 'SIZE'
1581      include 'TOTAL'
1582
1583      parameter (lzq=lx1+1)
1584      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
1585      common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq)
1586
1587      real*8 pap(3)
1588      real*8 ap1(lx,lelt)
1589      real*8  p1(lx,lelt)
1590      real*8  h1(lx,lelt),h2(lx,lelt)
1591
1592      common /ctmp1/ ur,us,ut,wk
1593      real*8 ur(lq),us(lq),ut(lq),wk(lq)
1594
1595      integer e
1596
1597      pap(1)=0.
1598
1599      do e=1,nelt
1600
1601         call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk)
1602         do i=1,lx
1603           pap(1)=pap(1)+p1(i,e)*ap1(i,e)
1604         enddo
1605
1606      enddo
1607
1608      return
1609      end
1610C-----------------------------------------------------------------------
1611      subroutine axhm1(pap,ap1,p1,h1,h2,bpname)
1612C     Vector conjugate gradient matvec for solution of uncoupled
1613C     Helmholtz equations
1614C     Input: pap,p1,h1,h2,bpname    Output: ap1
1615      include 'SIZE'
1616      include 'TOTAL'
1617
1618      parameter (lx=lx1*ly1*lz1)
1619
1620      real*8 pap(3),ap1(lx,lelt),p1(lx,lelt)
1621      real*8 h1(lx,lelt),h2(lx,lelt)
1622
1623      character*3 bpname
1624
1625      if (bpname.eq.'bp1') then
1626         call axhm1_bp1(pap,ap1,p1,h1,h2)
1627
1628      elseif (bpname.eq.'bp3') then
1629         call axhm1_bp3(pap,ap1,p1,h1,h2)
1630
1631      else
1632         write(6,*) bpname,' axhm1 bpname error'
1633         stop
1634
1635      endif
1636
1637      return
1638      end
1639C-----------------------------------------------------------------------
1640      subroutine get_bp(bp)
1641C     Get BP to run
1642C     Input:                        Output: bp
1643      integer i,bp
1644      character*64 bpval
1645
1646      bp=0
1647      if(iargc().ge.1) then
1648        call getarg(1,bpval)
1649      endif
1650      if(bpval.eq."bp1") then
1651        bp=1
1652      elseif(bpval.eq."bp3") then
1653        bp=3
1654      endif
1655
1656      return
1657      end
1658C-----------------------------------------------------------------------
1659      subroutine get_spec(spec)
1660C     Get CEED backend specification
1661C     Input:                        Output: spec
1662      integer i
1663      character*64 spec
1664
1665      spec = '/cpu/self'
1666      if(iargc().ge.2) then
1667        call getarg(2,spec)
1668      endif
1669
1670      return
1671      end
1672C-----------------------------------------------------------------------
1673      subroutine get_test(test)
1674C     Get test mode flag
1675C     Input:                        Output: test
1676      integer i,test
1677      character*64 testval
1678
1679      test=0
1680      if(iargc().ge.3) then
1681        call getarg(3,testval)
1682      endif
1683      if(testval.eq."test") then
1684        test=1
1685      endif
1686
1687      return
1688      end
1689C-----------------------------------------------------------------------
1690