xref: /libCEED/examples/nek/bps/bps.usr (revision 93b6d8191bb649fce95198206c6bff32567615d1)
1C Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors
2C All Rights Reserved. See the top-level COPYRIGHT and NOTICE files for details.
3C
4C SPDX-License-Identifier: (BSD-2-Clause)
5C
6C This file is part of CEED:  http://github.com/ceed
7
8C> @file
9C> Mass and diffusion operators examples using Nek5000
10C_TESTARGS(name="BP1") -c {ceed_resource} -e bp1 -n 1 -b 4 -test
11C_TESTARGS(name="BP3") -c {ceed_resource} -e bp3 -n 1 -b 4 -test
12
13C-----------------------------------------------------------------------
14      subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
15     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
16     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
17C     Set up mass operator
18C     Input: u1,u2,u3,q             Output: v1,v2,ierr
19      integer q,ierr
20      real*8 ctx(1)
21      real*8 u1(3*q)
22      real*8 u2(9*q)
23      real*8 u3(q)
24      real*8 v1(q)
25      real*8 v2(q)
26      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
27      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
28      real*8 jacmq
29
30C     Quadrature Point Loop
31      do i=1,q
32        a11=u2(i+q*0)
33        a12=u2(i+q*3)
34        a13=u2(i+q*6)
35
36        a21=u2(i+q*1)
37        a22=u2(i+q*4)
38        a23=u2(i+q*7)
39
40        a31=u2(i+q*2)
41        a32=u2(i+q*5)
42        a33=u2(i+q*8)
43
44        g11 = (a22*a33-a23*a32)
45        g12 = (a13*a32-a33*a12)
46        g13 = (a12*a23-a22*a13)
47
48        g21 = (a23*a31-a21*a33)
49        g22 = (a11*a33-a31*a13)
50        g23 = (a13*a21-a23*a11)
51
52        g31 = (a21*a32-a22*a31)
53        g32 = (a12*a31-a32*a11)
54        g33 = (a11*a22-a21*a12)
55
56        jacmq = a11*g11+a21*g12+a31*g13
57
58C       Rho
59        v1(i)=u3(i)*jacmq
60
61C       RHS
62        v2(i)=u3(i)*jacmq
63     $             *dsqrt(u1(i+q*0)*u1(i+q*0)
64     $                   +u1(i+q*1)*u1(i+q*1)
65     $                   +u1(i+q*2)*u1(i+q*2))
66      enddo
67
68      ierr=0
69      end
70C-----------------------------------------------------------------------
71      subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
72     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
73     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
74C     Apply mass operator
75C     Input: u1,u2,q                Output: v1,ierr
76      integer q,ierr
77      real*8 ctx(1)
78      real*8 u1(q)
79      real*8 u2(q)
80      real*8 v1(q)
81
82C     Quadrature Point Loop
83      do i=1,q
84        v1(i)=u2(i)*u1(i)
85      enddo
86
87      ierr=0
88      end
89C-----------------------------------------------------------------------
90      subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
91     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
92     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
93C     Set up diffusion operator
94C     Input: u1,u2,u3,q             Output: v1,v2,ierr
95      integer q,ierr
96      real*8 ctx(1)
97      real*8 u1(3*q)
98      real*8 u2(9*q)
99      real*8 u3(q)
100      real*8 v1(6*q)
101      real*8 v2(q)
102      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
103      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
104      real*8 jacmq,scl
105      real*8 c(3),k(3)
106
107C     Quadrature Point Loop
108      do i=1,q
109        pi = 3.14159265358979323846
110
111        c(1)=0.
112        c(2)=1.
113        c(3)=2.
114        k(1)=1.
115        k(2)=2.
116        k(3)=3.
117
118        a11=u2(i+q*0)
119        a12=u2(i+q*3)
120        a13=u2(i+q*6)
121
122        a21=u2(i+q*1)
123        a22=u2(i+q*4)
124        a23=u2(i+q*7)
125
126        a31=u2(i+q*2)
127        a32=u2(i+q*5)
128        a33=u2(i+q*8)
129
130        g11 = (a22*a33-a23*a32)
131        g12 = (a13*a32-a33*a12)
132        g13 = (a12*a23-a22*a13)
133
134        g21 = (a23*a31-a21*a33)
135        g22 = (a11*a33-a31*a13)
136        g23 = (a13*a21-a23*a11)
137
138        g31 = (a21*a32-a22*a31)
139        g32 = (a12*a31-a32*a11)
140        g33 = (a11*a22-a21*a12)
141
142        jacmq = a11*g11+a21*g12+a31*g13
143
144        scl = u3(i)/jacmq
145
146C       Geometric factors
147C       Stored in Voigt convention
148C       0 5 4
149C       5 1 3
150C       4 3 2
151        v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr
152        v1(i+1*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss
153        v1(i+2*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt
154        v1(i+3*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst
155        v1(i+4*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt
156        v1(i+5*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs
157
158C       RHS
159        v2(i) = u3(i)*jacmq*pi*pi
160     $            *dsin(pi*(c(1)+k(1)*u1(i+0*q)))
161     $            *dsin(pi*(c(2)+k(2)*u1(i+1*q)))
162     $            *dsin(pi*(c(3)+k(3)*u1(i+2*q)))
163     $            *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3))
164
165      enddo
166
167      ierr=0
168      end
169C-----------------------------------------------------------------------
170      subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
171     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
172     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
173C     Apply diffusion operator
174C     Input: u1,u2,q                Output: v1,ierr
175      integer q,ierr
176      real*8 ctx(1)
177      real*8 u1(3*q)
178      real*8 u2(6*q)
179      real*8 v1(3*q)
180
181C     Quadrature Point Loop
182      do i=1,q
183        v1(i+0*q)=
184     $     u2(i+0*q)*u1(i)+u2(i+5*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q)
185        v1(i+1*q)=
186     $     u2(i+5*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+3*q)*u1(i+2*q)
187        v1(i+2*q)=
188     $     u2(i+4*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q)
189      enddo
190
191      ierr=0
192      end
193C-----------------------------------------------------------------------
194      subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq)
195C     Set h2 as rhoJac
196C     Input: bmq,nxq                Output: h2
197      include 'SIZE'
198      real*8 h2(1),bmq(1)
199
200      common /ctmp77/ wd(lxd),zd(lxd)
201      integer e,i,L
202
203      call zwgl(zd,wd,nxq)  ! nxq = number of points
204
205      q = 1.0               ! Later, this can be a function of position...
206
207      L = 0
208      do e=1,lelt
209      do i=1,nxq**ldim
210         L=L+1
211         h2(L) = q*bmq(L)
212      enddo
213      enddo
214
215      return
216      end
217C-----------------------------------------------------------------------
218      subroutine dist_fld_h1(e)
219C     Set distance initial condition for BP1
220C     Input:                        Output: e
221      include 'SIZE'
222      include 'TOTAL'
223      real*8 x,y,z
224      real*8 e(1)
225
226      n=lx1*ly1*lz1*nelt
227
228      do i=1,n
229        x=xm1(i,1,1,1)
230        y=ym1(i,1,1,1)
231        z=zm1(i,1,1,1)
232
233        e(i) = dsqrt(x*x+y*y+z*z)
234      enddo
235
236      call dsavg(e)  ! This is requisite for random fields
237
238      return
239      end
240C-----------------------------------------------------------------------
241      subroutine sin_fld_h1(e)
242C     Set sine initial condition for BP3
243C     Input:                        Output: e
244      include 'SIZE'
245      include 'TOTAL'
246      real*8 x,y,z
247      real*8 e(1)
248      real*8 c(3),k(3)
249
250      n=lx1*ly1*lz1*nelt
251      pi = 3.14159265358979323846
252
253      c(1)=0.
254      c(2)=1.
255      c(3)=2.
256      k(1)=1.
257      k(2)=2.
258      k(3)=3.
259
260      do i=1,n
261        x=xm1(i,1,1,1)
262        y=ym1(i,1,1,1)
263        z=zm1(i,1,1,1)
264
265        e(i) = dsin(pi*(c(1)+k(1)*x))
266     &        *dsin(pi*(c(2)+k(2)*y))
267     &        *dsin(pi*(c(3)+k(3)*z))
268
269      enddo
270
271      call dsavg(e)  ! This is requisite for random fields
272
273      return
274      end
275C-----------------------------------------------------------------------
276      subroutine uservp(ix,iy,iz,eg) ! set variable properties
277      include 'SIZE'
278      include 'TOTAL'
279      include 'NEKUSE'
280      integer e,f,eg
281C     e = gllel(eg)
282
283      udiff  = 0.0
284      utrans = 0.0
285
286      return
287      end
288C-----------------------------------------------------------------------
289      subroutine userf(ix,iy,iz,eg) ! set acceleration term
290C
291C     Note: this is an acceleration term, NOT a force!
292C     Thus, ffx will subsequently be multiplied by rho(x,t).
293C
294      include 'SIZE'
295      include 'TOTAL'
296      include 'NEKUSE'
297      integer e,f,eg
298C     e = gllel(eg)
299
300      ffx = 0.0
301      ffy = 0.0
302      ffz = 0.0
303
304      return
305      end
306C-----------------------------------------------------------------------
307      subroutine userq(i,j,k,eg) ! set source term
308      include 'SIZE'
309      include 'TOTAL'
310      include 'NEKUSE'
311      integer e,f,eg
312      e = gllel(eg)
313
314      qvol   = 0
315
316      return
317      end
318C-----------------------------------------------------------------------
319      subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions
320C     NOTE ::: This subroutine MAY NOT be called by every process
321      include 'SIZE'
322      include 'TOTAL'
323      include 'NEKUSE'
324      integer e,f,eg
325
326      ux   = 0.0
327      uy   = 0.0
328      uz   = 0.0
329      temp = 0.0
330
331      return
332      end
333C-----------------------------------------------------------------------
334      subroutine useric(ix,iy,iz,eg) ! set up initial conditions
335      include 'SIZE'
336      include 'TOTAL'
337      include 'NEKUSE'
338      integer e,f,eg
339
340      ux   = 0.0
341      uy   = 0.0
342      uz   = 0.0
343      temp = 0.0
344
345      return
346      end
347C-----------------------------------------------------------------------
348      subroutine usrdat   ! This routine to modify element vertices
349      include 'SIZE'
350      include 'TOTAL'
351
352      return
353      end
354C-----------------------------------------------------------------------
355      subroutine usrdat2  ! This routine to modify mesh coordinates
356      include 'SIZE'
357      include 'TOTAL'
358
359      x0 = 0
360      x1 = 1
361      call rescale_x(xm1,x0,x1)
362      call rescale_x(ym1,x0,x1)
363      call rescale_x(zm1,x0,x1)
364
365      param(59)=1  ! Force Nek to use the "deformed element" formulation
366
367      return
368      end
369C-----------------------------------------------------------------------
370      subroutine usrdat3
371      include 'SIZE'
372      include 'TOTAL'
373
374      return
375      end
376C-----------------------------------------------------------------------
377      subroutine xmask1   (r1,h2,nel)
378C     Apply zero Dirichlet boundary conditions
379C     Input: h2,nel                 Output: r1
380      include 'SIZE'
381      include 'TOTAL'
382      real*8 r1(1),h2(1)
383
384      n=nx1*ny1*nz1*nel
385      do i=1,n
386         r1(i)=r1(i)*h2(i)
387      enddo
388
389      return
390      end
391C-----------------------------------------------------------------------
392      function glrdif(x,y,n)
393C     Compute Linfty norm of (x-y)
394C     Input: x,y                    Output: n
395      real*8 x(n),y(n)
396
397      dmx=0
398      xmx=0
399      ymx=0
400
401      do i=1,n
402         diff=abs(x(i)-y(i))
403         dmx =max(dmx,diff)
404         xmx =max(xmx,x(i))
405         ymx =max(ymx,y(i))
406      enddo
407
408      xmx = max(xmx,ymx)
409      dmx = glmax(dmx,1) ! max across processors
410      xmx = glmax(xmx,1)
411
412      if (xmx.gt.0) then
413         glrdif = dmx/xmx
414      else
415         glrdif = -dmx   ! Negative indicates something strange
416      endif
417
418      return
419      end
420C-----------------------------------------------------------------------
421      subroutine loc_grad3(ur,us,ut,u,N,D,Dt)
422C     3D transpose of local gradient
423C     Input: u,N,D,Dt               Output: ur,us,ut
424      real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
425      real*8 u (0:N,0:N,0:N)
426      real*8 D (0:N,0:N),Dt(0:N,0:N)
427
428      m1 = N+1
429      m2 = m1*m1
430
431      call mxm(D ,m1,u(0,0,0),m1,ur,m2)
432      do k=0,N
433         call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1)
434      enddo
435      call mxm(u(0,0,0),m2,Dt,m1,ut,m1)
436
437      return
438      end
439c-----------------------------------------------------------------------
440      subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w)
441C     3D transpose of local gradient
442C     Input: ur,us,ut,N,D,Dt        Output: u
443       real*8 u (0:N,0:N,0:N)
444       real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
445       real*8 D (0:N,0:N),Dt(0:N,0:N)
446       real*8 w (0:N,0:N,0:N)
447
448       m1 = N+1
449       m2 = m1*m1
450       m3 = m1*m1*m1
451
452       call mxm(Dt,m1,ur,m1,u(0,0,0),m2)
453       do k=0,N
454          call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1)
455       enddo
456       call add2(u(0,0,0),w,m3)
457       call mxm(ut,m2,D ,m1,w,m1)
458       call add2(u(0,0,0),w,m3)
459
460      return
461      end
462C-----------------------------------------------------------------------
463      subroutine geodatq(gf,bmq,w3mq,nxq)
464C     Routine to generate elemental geometric matrices on mesh 1
465C     (Gauss-Legendre Lobatto mesh).
466      include 'SIZE'
467      include 'TOTAL'
468
469      parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim)
470
471      real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq)
472
473      common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd)
474      common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd)
475     $ ,             zr(lxyd),zs(lxyd),zt(lxyd)
476
477      common /ctmp77/ wd(lxd),zd(lxd)
478      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
479
480      integer e
481      real*8 tmp(lxyd)
482      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
483      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
484      real*8 jacmq
485
486      if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq)
487
488      call zwgl    (zd,wd,lzq)                            ! nxq = number of points
489      call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp)
490
491      do k=1,nxq
492      do j=1,nxq
493      do i=1,nxq
494         w3mq(i,j,k) = wd(i)*wd(j)*wd(k)
495      enddo
496      enddo
497      enddo
498
499      nxyzq = nxq**ldim
500      nxqm1 = lzq-1
501
502      do e=1,nelt
503         call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation
504         call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq)
505
506         call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0)
507         call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq)
508
509         call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0)
510         call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq)
511
512         do i=1,nxyzq
513            a11 = xr(i)
514            a12 = xs(i)
515            a13 = xt(i)
516
517            a21 = yr(i)
518            a22 = ys(i)
519            a23 = yt(i)
520
521            a31 = zr(i)
522            a32 = zs(i)
523            a33 = zt(i)
524
525            g11 = (a22*a33-a23*a32)
526            g12 = (a13*a32-a33*a12)
527            g13 = (a12*a23-a22*a13)
528
529            g21 = (a23*a31-a21*a33)
530            g22 = (a11*a33-a31*a13)
531            g23 = (a13*a21-a23*a11)
532
533            g31 = (a21*a32-a22*a31)
534            g32 = (a12*a31-a32*a11)
535            g33 = (a11*a22-a21*a12)
536
537            jacmq = a11*g11+a21*g12+a31*g13
538
539            bmq(i,e)  = w3mq(i,1,1)*jacmq
540            scale     = w3mq(i,1,1)/jacmq
541
542            gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr
543            gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs
544            gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt
545            gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss
546            gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst
547            gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt
548
549         enddo
550      enddo
551
552      return
553      end
554C-----------------------------------------------------------------------
555      subroutine setprecn_bp1 (d,h1,h2)
556C     Generate diagonal preconditioner for Helmholtz operator
557C     Input: h1,h2                  Output: d
558      include 'SIZE'
559      include 'TOTAL'
560
561      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
562
563      real*8    d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt)
564      integer e
565
566      real*8         gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data
567      equivalence (gf,g1m1)                  ! layout to g1m1...g6m1
568
569      real*8 ysm1(ly1)
570
571      nel   = nelfld(ifield)
572      n     = nel*lx1*ly1*lz1
573      nxyz  = lx1*ly1*lz1
574
575      call copy    (d,bm1,n)   ! Mass matrix preconditioning full mass matrix
576      call dssum   (d,nx1,ny1,nz1)
577      call invcol1 (d,n)
578      return
579
580      call dsset(lx1,ly1,lz1)
581
582      do 1000 e=1,nel
583
584        call rzero(d(1,1,1,e),nxyz)
585
586        do 320 iz=1,lz1
587         do 320 iy=1,ly1
588         do 320 ix=1,lx1
589         do 320 iq=1,lx1
590           d(ix,iy,iz,e) = d(ix,iy,iz,e)
591     $                   + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2
592     $                   + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2
593     $                   + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2
594  320    continue
595C
596C        Add cross terms if element is deformed.
597C
598         if (lxyz.gt.0) then
599
600           do i2=1,ly1,ly1-1
601           do i1=1,lx1,lx1-1
602              d(1,i1,i2,e) = d(1,i1,i2,e)
603     $            + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1)
604     $            + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2)
605              d(lx1,i1,i2,e) = d(lx1,i1,i2,e)
606     $            + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1)
607     $            + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2)
608              d(i1,1,i2,e) = d(i1,1,i2,e)
609     $            + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1)
610     $            + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2)
611              d(i1,ly1,i2,e) = d(i1,ly1,i2,e)
612     $            + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1)
613     $            + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2)
614              d(i1,i2,1,e) = d(i1,i2,1,e)
615     $            + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1)
616     $            + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2)
617              d(i1,i2,lz1,e) = d(i1,i2,lz1,e)
618     $            + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1)
619     $            + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2)
620
621           enddo
622           enddo
623         endif
624
625        do i=1,lxyz
626           d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e)
627        enddo
628
629 1000 continue ! element loop
630
631C     If axisymmetric, add a diagonal term in the radial direction (ISD=2)
632
633      if (ifaxis.and.(isd.eq.2)) then
634         do 1200 e=1,nel
635            if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1)
636            k=0
637            do 1190 j=1,ly1
638            do 1190 i=1,lx1
639               k=k+1
640               if (ym1(i,j,1,e).ne.0.) then
641                  term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2
642                  if (ifrzer(e)) then
643                     term2 =  wxm1(i)*wam1(1)*dam1(1,j)
644     $                       *jacm1(i,1,1,e)/ysm1(i)
645                  else
646                     term2 = 0.
647                  endif
648                  d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2)
649               endif
650 1190       continue
651 1200    continue
652
653      endif
654      call dssum   (d,nx1,ny1,nz1)
655      call invcol1 (d,n)
656
657      if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1)
658   1  format(i9,1p4e12.4,' diag prec')
659
660      return
661      end
662C-----------------------------------------------------------------------
663      subroutine setprecn_bp3 (d,h1,h2)
664C     Generate dummy diagonal preconditioner for Helmholtz operator
665C     Input: h1,h2                  Output: d
666      include 'SIZE'
667      include 'TOTAL'
668
669      parameter (n=lx1*ly1*lz1*lelt)
670      real*8 d(n),h1(n),h2(n)
671
672      call rone (d,n)
673
674      return
675      end
676C-----------------------------------------------------------------------
677      subroutine userchk
678      include 'SIZE'
679      include 'TOTAL'
680
681      integer bp
682
683      call get_bp(bp)
684
685      if (bp==1) then
686        if (istep.gt.0) call bp1
687      elseif (bp==3) then
688        if (istep.gt.0) call bp3
689      else
690        write(6,*) "INVALID BP SPECIFICED"
691      endif
692
693      return
694      end
695C-----------------------------------------------------------------------
696      subroutine bp1
697C     Solution to BP1 using libCEED
698      include 'SIZE'
699      include 'TOTAL'
700      include 'CTIMER'  ! ifsync
701      include 'FDMH1'
702      include 'ceed/fortran.h'
703
704      parameter (lzq=lx1+1)
705      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
706      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
707
708      parameter (lt=lx1*ly1*lz1*lelt)
709      parameter (ld=lxd*lyd*lzd*lelt)
710      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
711      common /vcrny/ e1(lt)
712      common /vcrvh/ h1(lt),h2(lx*lelt),pap(3)
713      real*8 coords(ldim*lx*lelt)
714
715      logical ifh3
716      integer*8 nnode
717      integer ceed,err,test
718      character*64 spec
719
720      integer p,q,ncompx,ncompu,enode,lnode
721      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
722      integer stridesu(3),stridesx(3),stridesw(3)
723      integer erstrctu,erstrctx,erstrctw
724      integer basisu,basisx
725      integer qf_mass,qf_setup
726      integer op_mass,op_setup
727      real*8  x,y,z
728      integer*8 offset
729
730      external massf,masssetupf
731
732      ifield = 1
733      nxq    = nx1+1
734      n = nx1*ny1*nz1*nelt
735
736      ifsync = .false.
737
738C     Set up coordinates
739      ii=0
740      do j=0,nelt-1
741      do i=1,lx
742        ii=ii+1
743        x = xm1(ii,1,1,1)
744        y = ym1(ii,1,1,1)
745        z = zm1(ii,1,1,1)
746        coords(i+0*lx+3*j*lx)=x
747        coords(i+1*lx+3*j*lx)=y
748        coords(i+2*lx+3*j*lx)=z
749      enddo
750      enddo
751
752C     Init ceed library
753      call get_spec(spec)
754      call ceedinit(trim(spec)//char(0),ceed,err)
755
756      call get_test(test)
757
758C     Set up Nek geometry data
759      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
760      call set_h2_as_rhoJac_GL(h2,bmq,nxq)
761
762C     Set up true soln
763      call dist_fld_h1   (e1)
764      call copy          (h1,e1,n)                 ! Save exact soln in h1
765
766C     Set up solver parameters
767      tol       = 1e-10
768      param(22) = tol
769      maxit     = 100
770
771      call nekgsync()
772
773C     Create ceed basis for mesh and computation
774      p=nx1
775      q=p+1
776      ncompu=1
777      ncompx=ldim
778      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q,
779     $  ceed_gauss,basisx,err)
780      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q,
781     $  ceed_gauss,basisu,err)
782
783C     Create ceed element restrictions for mesh and computation
784      enode=p**ldim
785      lnode=enode*nelt*ncompu
786      stridesx=[1,enode,enode*ldim]
787      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim,
788     $  ldim*lnode,stridesx,erstrctx,err)
789      stridesu=[1,enode,enode*ncompu]
790      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu,
791     $  ncompu*lnode,stridesu,erstrctu,err)
792      call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim,
793     $  1,nelt*q**ldim,ceed_strides_backend,erstrctw,err)
794
795C     Create ceed vectors
796      call ceedvectorcreate(ceed,lnode,vec_p1,err)
797      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
798      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
799      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
800      call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err)
801
802      offset=0
803      call ceedvectorsetarray(vec_coords,ceed_mem_host,
804     $  ceed_use_pointer,coords,offset,err)
805
806C     Create ceed qfunctions for masssetupf and massf
807      call ceedqfunctioncreateinterior(ceed,1,masssetupf,
808     $  EXAMPLE_DIR
809     $  //'bps/bps.h:masssetupf',qf_setup,err)
810      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
811     $  ceed_eval_interp,err)
812      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
813     $  ceed_eval_grad,err)
814      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
815     $  ceed_eval_weight,err)
816      call ceedqfunctionaddoutput(qf_setup,'qdata',ncompu,
817     $  ceed_eval_none,err)
818      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
819     $  ceed_eval_interp,err)
820
821      call ceedqfunctioncreateinterior(ceed,1,massf,
822     $  EXAMPLE_DIR
823     $  //'bps/bps.h:massf',qf_mass,err)
824      call ceedqfunctionaddinput(qf_mass,'u',ncompu,
825     $  ceed_eval_interp,err)
826      call ceedqfunctionaddinput(qf_mass,'qdata',ncompu,
827     $  ceed_eval_none,err)
828      call ceedqfunctionaddoutput(qf_mass,'v',ncompu,
829     $  ceed_eval_interp,err)
830
831C     Create ceed operators
832      call ceedoperatorcreate(ceed,qf_setup,
833     $  ceed_qfunction_none,ceed_qfunction_none,op_setup,err)
834      call ceedoperatorsetfield(op_setup,'x',erstrctx,
835     $  basisx,ceed_vector_active,err)
836      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
837     $  basisx,ceed_vector_active,err)
838      call ceedoperatorsetfield(op_setup,'weight',
839     $  ceed_elemrestriction_none,basisx,ceed_vector_none,err)
840      call ceedoperatorsetfield(op_setup,'qdata',erstrctw,
841     $  ceed_basis_collocated,ceed_vector_active,err)
842      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
843     $  basisu,vec_rhs,err)
844
845      call ceedoperatorcreate(ceed,qf_mass,
846     $  ceed_qfunction_none,ceed_qfunction_none,op_mass,err)
847      call ceedoperatorsetfield(op_mass,'u',erstrctu,
848     $  basisu,ceed_vector_active,err)
849      call ceedoperatorsetfield(op_mass,'qdata',erstrctw,
850     $  ceed_basis_collocated,vec_qdata,err)
851      call ceedoperatorsetfield(op_mass,'v',erstrctu,
852     $  basisu,ceed_vector_active,err)
853
854C     Compute setup data
855      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
856     $  ceed_use_pointer,r1,offset,err)
857      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
858     $  ceed_request_immediate,err)
859
860C     Set up true RHS
861      call dssum         (r1,nx1,ny1,nz1)          ! r1
862
863C     Set up algebraic RHS with libCEED
864      call ceedvectorsetarray(vec_p1,ceed_mem_host,
865     $  ceed_use_pointer,h1,offset,err)
866      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
867     $  ceed_use_pointer,r2,offset,err)
868      call ceedoperatorapply(op_mass,vec_p1,vec_ap1,
869     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
870      call dssum         (r2,nx1,ny1,nz1)
871
872C     Set up algebraic RHS with Nek5000
873      call axhm1         (pap,r3,h1,h1,h2,'bp1')   ! r3 = A_nek5k*h1
874      call dssum         (r3,nx1,ny1,nz1)
875
876      call nekgsync()
877
878C     Solve true RHS
879      if (nid.eq.0) write (6,*) "libCEED true RHS"
880      tstart = dnekclock()
881      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass,
882     $  vec_p1,vec_ap1,maxit,'bp1')
883      tstop  = dnekclock()
884
885C     Output
886      telaps = (tstop-tstart)
887      maxits = maxit
888      er1 = glrdif(u1,e1,n)
889      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
890
891      if (test.eq.1.and.nid.eq.0) then
892        if (maxit>=100) then
893          write(6,*) "UNCONVERGED CG"
894        endif
895        if (dabs(er1)>5e-3) then
896          write(6,*) "ERROR IS TOO LARGE"
897        endif
898      endif
899
900      nx     = nx1-1
901      nnode  = nelgt            ! nnodes
902      nnode  = nnode*(nx**ldim) ! nnodes
903      nppp   = nnode/np         ! nnodes/proc
904
905      dofps  = nnode/telaps     ! DOF/sec - scalar form
906      titers = telaps/maxits    ! time per iteration
907      tppp_s = titers/nppp      ! time per iteraton per local point
908
909      if (nid.eq.0) write(6,1) 'case scalar:'
910     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
911
912C     Solve libCEED algebraic RHS
913      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
914      maxit = 100
915      tstart = dnekclock()
916      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass,
917     $  vec_p1,vec_ap1,maxit,'bp1')
918      tstop  = dnekclock()
919
920C     Output
921      telaps = (tstop-tstart)
922      maxits = maxit
923      er1 = glrdif(u1,e1,n)
924      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
925
926      if (test.eq.1.and.nid.eq.0) then
927        if (maxit>=100) then
928          write(6,*) "UNCONVERGED CG"
929        endif
930        if (dabs(er1)>1e-5) then
931          write(6,*) "ERROR IS TOO LARGE"
932        endif
933      endif
934
935      nx      = nx1-1
936      nnode   = nelgt            ! nnodes
937      nnode   = nnode*(nx**ldim) ! nnodes
938      nppp    = nnode/np         ! nnodes/proc
939
940      dofps   = nnode/telaps     ! DOF/sec - scalar form
941      titers  = telaps/maxits    ! time per iteration
942      tppp_s  = titers/nppp      ! time per iteraton per local point
943
944      if (nid.eq.0) write(6,1) 'case scalar:'
945     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
946
947C     Solve Nek5000 algebraic RHS
948      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
949      maxit = 100
950      tstart = dnekclock()
951      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass,
952     $  vec_p1,vec_ap1,maxit,'bp1')
953      tstop  = dnekclock()
954
955C     Output
956      telaps = (tstop-tstart)
957      maxits = maxit
958      er1 = glrdif(u1,e1,n)
959      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
960
961      if (test.eq.1.and.nid.eq.0) then
962        if (maxit>=100) then
963          write(6,*) "UNCONVERGED CG"
964        endif
965        if (dabs(er1)>1e-5) then
966          write(6,*) "ERROR IS TOO LARGE"
967        endif
968      endif
969
970      nx      = nx1-1
971      nnode   = nelgt            ! nnodes
972      nnode   = nnode*(nx**ldim) ! nnodes
973      nppp    = nnode/np         ! nnodes/proc
974
975      dofps   = nnode/telaps     ! DOF/sec - scalar form
976      titers  = telaps/maxits    ! time per iteration
977      tppp_s  = titers/nppp      ! time per iteraton per local point
978
979      if (nid.eq.0) write(6,1) 'case scalar:'
980     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
981
982    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
983    3 format(i3,i9,e12.4,1x,a8,i9)
984
985C     Destroy ceed handles
986      call ceedvectordestroy(vec_p1,err)
987      call ceedvectordestroy(vec_ap1,err)
988      call ceedvectordestroy(vec_rhs,err)
989      call ceedvectordestroy(vec_qdata,err)
990      call ceedvectordestroy(vec_coords,err)
991      call ceedelemrestrictiondestroy(erstrctu,err)
992      call ceedelemrestrictiondestroy(erstrctx,err)
993      call ceedelemrestrictiondestroy(erstrctw,err)
994      call ceedbasisdestroy(basisu,err)
995      call ceedbasisdestroy(basisx,err)
996      call ceedqfunctiondestroy(qf_setup,err)
997      call ceedqfunctiondestroy(qf_mass,err)
998      call ceedoperatordestroy(op_setup,err)
999      call ceedoperatordestroy(op_mass,err)
1000      call ceeddestroy(ceed,err)
1001
1002      return
1003      end
1004C-----------------------------------------------------------------------
1005      subroutine bp3
1006C     Solution to BP3 using libCEED
1007      include 'SIZE'
1008      include 'TOTAL'
1009      include 'CTIMER'  ! ifsync
1010      include 'FDMH1'
1011      include 'ceed/fortran.h'
1012
1013      parameter (lzq=lx1+1)
1014      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
1015      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
1016
1017      parameter (lt=lx1*ly1*lz1*lelt)
1018      parameter (ld=lxd*lyd*lzd*lelt)
1019      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
1020      common /vcrny/ e1(lt)
1021      common /vcrvh/ h1(lt),h2(ld),pap(3)
1022      real*8 coords(ldim*lx*lelt)
1023
1024      logical ifh3
1025      integer*8 nnode
1026      integer ceed,err,test
1027      character*64 spec
1028
1029      integer p,q,ncompx,ncompu,enode,lnode
1030      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
1031      integer stridesu(3),stridesx(3),stridesw(3)
1032      integer erstrctu,erstrctx,erstrctw
1033      integer basisu,basisx
1034      integer qf_diffusion,qf_setup
1035      integer op_diffusion,op_setup
1036      integer ii,i,ngeo
1037      real*8  x,y,z
1038      integer*8 offset
1039
1040      external diffusionf,diffsetupf
1041
1042      ifield = 1
1043      nxq    = nx1+1
1044      n = nx1*ny1*nz1*nelt
1045
1046      ifsync = .false.
1047
1048C     Set up coordinates and mask
1049      ii=0
1050      do j=0,nelt-1
1051      do i=1,lx
1052        ii=ii+1
1053        x = xm1(ii,1,1,1)
1054        y = ym1(ii,1,1,1)
1055        z = zm1(ii,1,1,1)
1056        coords(i+0*lx+3*j*lx)=x
1057        coords(i+1*lx+3*j*lx)=y
1058        coords(i+2*lx+3*j*lx)=z
1059        if ( x.eq.0.or.x.eq.1
1060     $   .or.y.eq.0.or.y.eq.1
1061     $   .or.z.eq.0.or.z.eq.1 ) then
1062          h2(ii)=0.
1063        else
1064          h2(ii)=1.
1065        endif
1066      enddo
1067      enddo
1068
1069C     Init ceed library
1070      call get_spec(spec)
1071      call ceedinit(trim(spec)//char(0),ceed,err)
1072
1073      call get_test(test)
1074
1075C     Set up Nek geometry data
1076      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
1077
1078C     Set up true soln
1079      call sin_fld_h1    (e1)
1080      call xmask1        (e1,h2,nelt)
1081      call copy          (h1,e1,n)                 ! Save exact soln in h1
1082
1083C     Set up solver parameters
1084      tol       = 1e-10
1085      param(22) = tol
1086      maxit     = 100
1087
1088      call nekgsync()
1089
1090C     Create ceed basis for mesh and computation
1091      p=nx1
1092      q=p+1
1093      ncompu=1
1094      ncompx=ldim
1095      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q,
1096     $  ceed_gauss,basisx,err)
1097      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q,
1098     $  ceed_gauss,basisu,err)
1099
1100C     Create ceed element restrictions for mesh and computation
1101      enode=p**ldim
1102      lnode=enode*nelt*ncompu
1103      ngeo=(ldim*(ldim+1))/2
1104      stridesx=[1,enode,enode*ldim]
1105      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim,
1106     $  ldim*lnode,stridesx,erstrctx,err)
1107      stridesu=[1,enode,enode*ncompu]
1108      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu,
1109     $  ncompu*lnode,stridesu,erstrctu,err)
1110      stridesw=[1,q**ldim,ngeo*q**ldim]
1111      call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim,
1112     $  ngeo,nelt*q**ldim*ngeo,stridesw,erstrctw,err)
1113
1114C     Create ceed vectors
1115      call ceedvectorcreate(ceed,lnode,vec_p1,err)
1116      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
1117      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
1118      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
1119      call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err)
1120
1121      offset=0
1122      call ceedvectorsetarray(vec_coords,ceed_mem_host,
1123     $  ceed_use_pointer,coords,offset,err)
1124
1125C     Create ceed qfunctions for diffsetupf and diffusionf
1126      call ceedqfunctioncreateinterior(ceed,1,diffsetupf,
1127     $  EXAMPLE_DIR
1128     $  //'bps/bps.h:diffsetupf'//char(0),qf_setup,err)
1129      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
1130     $  ceed_eval_interp,err)
1131      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
1132     $  ceed_eval_grad,err)
1133      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
1134     $  ceed_eval_weight,err)
1135      call ceedqfunctionaddoutput(qf_setup,'qdata',ngeo,
1136     $  ceed_eval_none,err)
1137      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
1138     $  ceed_eval_interp,err)
1139
1140      call ceedqfunctioncreateinterior(ceed,1,diffusionf,
1141     $  EXAMPLE_DIR
1142     $  //'bps/bps.h:diffusionf'//char(0),qf_diffusion,err)
1143      call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim,
1144     $  ceed_eval_grad,err)
1145      call ceedqfunctionaddinput(qf_diffusion,'qdata',ngeo,
1146     $  ceed_eval_none,err)
1147      call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim,
1148     $  ceed_eval_grad,err)
1149
1150C     Create ceed operators
1151      call ceedoperatorcreate(ceed,qf_setup,
1152     $  ceed_qfunction_none,ceed_qfunction_none,op_setup,err)
1153      call ceedoperatorsetfield(op_setup,'x',erstrctx,
1154     $  basisx,ceed_vector_active,err)
1155      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
1156     $  basisx,ceed_vector_active,err)
1157      call ceedoperatorsetfield(op_setup,'weight',
1158     $  ceed_elemrestriction_none,basisx,ceed_vector_none,err)
1159      call ceedoperatorsetfield(op_setup,'qdata',erstrctw,
1160     $  ceed_basis_collocated,ceed_vector_active,err)
1161      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
1162     $  basisu,vec_rhs,err)
1163
1164      call ceedoperatorcreate(ceed,qf_diffusion,
1165     $  ceed_qfunction_none,ceed_qfunction_none,op_diffusion,err)
1166      call ceedoperatorsetfield(op_diffusion,'u',erstrctu,
1167     $  basisu,ceed_vector_active,err)
1168      call ceedoperatorsetfield(op_diffusion,'qdata',erstrctw,
1169     $  ceed_basis_collocated,vec_qdata,err)
1170      call ceedoperatorsetfield(op_diffusion,'v',erstrctu,
1171     $  basisu,ceed_vector_active,err)
1172
1173C     Compute setup data
1174      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
1175     $  ceed_use_pointer,r1,offset,err)
1176      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
1177     $  ceed_request_immediate,err)
1178
1179C     Set up true RHS
1180      call dssum         (r1,nx1,ny1,nz1)          ! r1
1181      call xmask1        (r1,h2,nelt)
1182
1183C     Set up algebraic RHS with libCEED
1184      call ceedvectorsetarray(vec_p1,ceed_mem_host,
1185     $  ceed_use_pointer,h1,offset,err)
1186      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
1187     $  ceed_use_pointer,r2,offset,err)
1188      call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1,
1189     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
1190      call dssum         (r2,nx1,ny1,nz1)
1191      call xmask1        (r2,h2,nelt)
1192
1193C     Set up algebraic RHS with Nek5000
1194      call axhm1         (pap,r3,h1,h1,h2,'bp3')   ! r3 = A_nek5k*h1
1195      call dssum         (r3,nx1,ny1,nz1)
1196      call xmask1        (r3,h2,nelt)
1197
1198      call nekgsync()
1199
1200C     Solve true RHS
1201      if (nid.eq.0) write (6,*) "libCEED true RHS"
1202      tstart = dnekclock()
1203      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1204     $  vec_p1,vec_ap1,maxit,'bp3')
1205      tstop  = dnekclock()
1206
1207C     Output
1208      telaps = (tstop-tstart)
1209      maxits = maxit
1210      er1 = glrdif(u1,e1,n)
1211      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1212
1213      if (test.eq.1.and.nid.eq.0) then
1214        if (maxit>=100) then
1215          write(6,*) "UNCONVERGED CG"
1216        endif
1217        if (dabs(er1)>1e-3) then
1218          write(6,*) "ERROR IS TOO LARGE"
1219        endif
1220      endif
1221
1222      nx      = nx1-1
1223      nnode   = nelgt            ! nnodes
1224      nnode   = nnode*(nx**ldim) ! nnodes
1225      nppp    = nnode/np         ! nnodes/proc
1226
1227      dofps   = nnode/telaps     ! DOF/sec - scalar form
1228      titers  = telaps/maxits    ! time per iteration
1229      tppp_s  = titers/nppp      ! time per iteraton per local point
1230
1231      if (nid.eq.0) write(6,1) 'case scalar:'
1232     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
1233
1234C     Solve libCEED algebraic RHS
1235      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
1236      maxit = 100
1237      tstart = dnekclock()
1238      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1239     $  vec_p1,vec_ap1,maxit,'bp3')
1240      tstop  = dnekclock()
1241
1242C     Output
1243      telaps = (tstop-tstart)
1244      maxits = maxit
1245      er1 = glrdif(u1,e1,n)
1246      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1247
1248      if (test.eq.1.and.nid.eq.0) then
1249        if (maxit>=100) then
1250          write(6,*) "UNCONVERGED CG"
1251        endif
1252        if (dabs(er1)>1e-9) then
1253          write(6,*) "ERROR IS TOO LARGE"
1254        endif
1255      endif
1256
1257      nx      = nx1-1
1258      nnode   = nelgt            ! nnodes
1259      nnode   = nnode*(nx**ldim) ! nnodes
1260      nppp    = nnode/np         ! nnodes/proc
1261
1262      dofps   = nnode/telaps     ! DOF/sec - scalar form
1263      titers  = telaps/maxits    ! time per iteration
1264      tppp_s  = titers/nppp      ! time per iteraton per local point
1265
1266      if (nid.eq.0) write(6,1) 'case scalar:'
1267     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
1268
1269C     Solve Nek5000 algebraic RHS
1270      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
1271      maxit = 100
1272      tstart = dnekclock()
1273      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1274     $  vec_p1,vec_ap1,maxit,'bp3')
1275      tstop  = dnekclock()
1276
1277C     Output
1278      telaps = (tstop-tstart)
1279      maxits = maxit
1280      er1 = glrdif(u1,e1,n)
1281      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1282
1283      if (test.eq.1.and.nid.eq.0) then
1284        if (maxit>=100) then
1285          write(6,*) "UNCONVERGED CG"
1286        endif
1287        if (dabs(er1)>1e-9) then
1288          write(6,*) "ERROR IS TOO LARGE"
1289        endif
1290      endif
1291
1292      nx      = nx1-1
1293      nnode   = nelgt            ! nnodes
1294      nnode   = nnode*(nx**ldim) ! nnodes
1295      nppp    = nnode/np         ! nnodes/proc
1296
1297      dofps   = nnode/telaps     ! DOF/sec - scalar form
1298      titers  = telaps/maxits    ! time per iteration
1299      tppp_s  = titers/nppp      ! time per iteraton per local point
1300
1301      if (nid.eq.0) write(6,1) 'case scalar:'
1302     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
1303
1304    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
1305    3 format(i3,i9,e12.4,1x,a8,i9)
1306
1307C     Destroy ceed handles
1308      call ceedvectordestroy(vec_p1,err)
1309      call ceedvectordestroy(vec_ap1,err)
1310      call ceedvectordestroy(vec_rhs,err)
1311      call ceedvectordestroy(vec_qdata,err)
1312      call ceedvectordestroy(vec_coords,err)
1313      call ceedelemrestrictiondestroy(erstrctu,err)
1314      call ceedelemrestrictiondestroy(erstrctx,err)
1315      call ceedelemrestrictiondestroy(erstrctw,err)
1316      call ceedbasisdestroy(basisu,err)
1317      call ceedbasisdestroy(basisx,err)
1318      call ceedqfunctiondestroy(qf_setup,err)
1319      call ceedqfunctiondestroy(qf_diffusion,err)
1320      call ceedoperatordestroy(op_setup,err)
1321      call ceedoperatordestroy(op_diffusion,err)
1322      call ceeddestroy(ceed,err)
1323
1324      return
1325      end
1326C-----------------------------------------------------------------------
1327      subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,
1328     $  vec_ap1,maxit,bpname)
1329C     Scalar conjugate gradient iteration for solution of uncoupled
1330C     Helmholtz equations
1331C     Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname
1332C     Output: u1,maxit
1333      include 'SIZE'
1334      include 'TOTAL'
1335      include 'DOMAIN'
1336      include 'FDMH1'
1337      character*3 bpname
1338
1339C     INPUT:  rhs1 - rhs
1340C             h1   - exact solution
1341
1342      parameter (lt=lx1*ly1*lz1*lelt)
1343      parameter (ld=lxd*lyd*lzd*lelt)
1344      real*8 u1(lt),r1(lt),h1(lt),h2(lt)
1345      real*8 rmult(1),binv(1)
1346      integer ceed,ceed_op,vec_ap1,vec_p1
1347      common /scrcg/ dpc(lt),p1(lt),z1(lt)
1348      common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4)
1349
1350      real*8 ap1(lt)
1351      equivalence (ap1,z1)
1352
1353      vol   = volfld(ifield)
1354      nel   = nelfld(ifield)
1355      nxyz  = lx1*ly1*lz1
1356      n     = nxyz*nel
1357      nx    = nx1-1                  ! Polynomial order (just for i/o)
1358
1359      tol=tin
1360
1361      if(bpname.ne.'bp1') then
1362        call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner
1363      else
1364        call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner
1365      endif
1366
1367      call rzero         (u1,n)      ! Initialize solution
1368
1369      wv(1)=0
1370      do i=1,n
1371         s=rmult(i)                  !      -1
1372         p1(i)=dpc(i)*r1(i)          ! p = M  r      T
1373         wv(1)=wv(1)+s*p1(i)*r1(i)   !              r p
1374      enddo
1375      call gop(wv(1),wk,'+  ',1)
1376      rpp1(1) = wv  (1)
1377
1378      do 1000 iter=1,maxit
1379         call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op,
1380     $     vec_ap1,vec_p1)
1381         call dssum    (ap1,nx1,ny1,nz1)
1382         if (bpname.ne.'bp1') call xmask1(ap1,h2,nel)
1383
1384         call gop      (pap,wk,'+  ',1)
1385         alph(1) = rpp1(1)/pap(1)
1386
1387         do i=1,n
1388            u1(i)=u1(i)+alph(1)* p1(i)
1389            r1(i)=r1(i)-alph(1)*ap1(i)
1390         enddo
1391
1392C        tolerance check here
1393         call rzero(wv,2)
1394         do i=1,n
1395            wv(1)=wv(1)+r1(i)*r1(i)            ! L2 error estimate
1396            z1(i)=dpc(i)*r1(i)                 ! z = M  r
1397            wv(2)=wv(2)+rmult(i)*z1(i)*r1(i)   ! r z
1398         enddo
1399         call gop(wv,wk,'+  ',2)
1400
1401C        if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1)
1402  1     format(i2,i9,i5,i4,1p1e12.4,' cggos')
1403
1404         enorm=sqrt(wv(1))
1405         if (enorm.lt.tol) then
1406            ifin = iter
1407            if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol
1408            goto 9999
1409         endif
1410C        if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha'
1411 2      format(i5,1p3e12.4,2x,a5)
1412
1413         rpp2(1)=rpp1(1)
1414         rpp1(1)=wv  (2)
1415         beta1  =rpp1(1)/rpp2(1)
1416         do i=1,n
1417            p1(i)=z1(i) + beta1*p1(i)
1418         enddo
1419
1420 1000 continue
1421
1422      rbnorm=sqrt(wv(1))
1423      if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol
1424      iter = iter-1
1425
1426 9999 continue
1427
1428      maxit=iter
1429
1430 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4)
1431 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6)
1432
1433      return
1434      end
1435C-----------------------------------------------------------------------
1436      subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op,
1437     $  vec_ap1,vec_p1)
1438C     Vector conjugate gradient matvec for solution of uncoupled
1439C     Helmholtz equations
1440C     Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1
1441C     Output: ap1
1442      include 'SIZE'
1443      include 'TOTAL'
1444      include 'ceed/fortran.h'
1445
1446      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
1447      real*8       gf(lg,lx,lelt)             ! Equivalence new gf() data
1448      equivalence (gf,g1m1)                   ! layout to g1m1...g6m1
1449
1450      real*8   pap(3)
1451      real*8   ap1(lx,lelt)
1452      real*8    p1(lx,lelt)
1453      real*8    h1(lx,lelt),h2(lx,lelt)
1454      integer ceed,ceed_op,vec_ap1,vec_p1,err
1455      integer i,e
1456      integer*8 offset
1457
1458      offset=0
1459      call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer,
1460     $  p1,offset,err)
1461      call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer,
1462     $  ap1,offset,err)
1463
1464      call ceedoperatorapply(ceed_op,vec_p1,vec_ap1,
1465     $  ceed_request_immediate,err)
1466
1467      call ceedvectortakearray(vec_p1,ceed_mem_host,0,offset,err)
1468      call ceedvectortakearray(vec_ap1,ceed_mem_host,0,offset,err)
1469
1470      pap(1)=0.
1471
1472      do e=1,nelt
1473         do i=1,lx
1474           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
1475         enddo
1476      enddo
1477
1478      return
1479      end
1480C-----------------------------------------------------------------------
1481      subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut)
1482C     Local matrix-vector for solution of BP3 (stiffness matrix)
1483C     Input: u,g,h1,h2,b,ju,us,ut   Output: w
1484      include 'SIZE'
1485      include 'TOTAL'
1486
1487      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
1488      real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz)
1489      real*8 ju(lxyz),us(lxyz),ut(lxyz)
1490
1491      nxq = nx1+1 ! Number of quadrature points
1492
1493      lxyzq = nxq**ldim
1494
1495      call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation
1496      do i=1,lxyzq
1497         ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts
1498      enddo
1499      call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u
1500
1501      return
1502      end
1503C-----------------------------------------------------------------------
1504      subroutine axhm1_bp1(pap,ap1,p1,h1,h2)
1505C     Vector conjugate gradient matvec for solution of BP1 (mass matrix)
1506C     Input: pap,p1,h1,h2           Output: ap1
1507      include 'SIZE'
1508      include 'TOTAL'
1509
1510      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
1511      real*8         gf(lg,lx,lelt)             ! Equivalence new gf() data
1512      equivalence (gf,g1m1)                     ! layout to g1m1...g6m1
1513
1514      real*8 pap(3)
1515      real*8 ap1(lx,lelt)
1516      real*8  p1(lx,lelt)
1517      real*8  h1(lx,lelt),h2(lx,lelt)
1518
1519      real*8 ur(lx),us(lx),ut(lx)
1520      common /ctmp1/ ur,us,ut
1521
1522      integer e
1523
1524      pap(1)=0.
1525
1526      k=1
1527      nxq = nx1+1
1528
1529      do e=1,nelt
1530
1531         call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1)
1532     $                                          ,bm1(1,1,1,e),ur,us,ut)
1533         do i=1,lx
1534           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
1535         enddo
1536         k=k+nxq*nxq*nxq
1537
1538      enddo
1539
1540      return
1541      end
1542C-----------------------------------------------------------------------
1543      subroutine ax_e_bp3(w,u,g,ur,us,ut,wk)
1544C     Local matrix-vector for solution of BP3 (stiffness matrix)
1545C     Input: u,g,ur,us,ut,wk        Output: w
1546      include 'SIZE'
1547      include 'TOTAL'
1548
1549      parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq)
1550
1551      common /ctmp0/ tmp(lxyzq)
1552      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
1553
1554      real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq)
1555      real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq)
1556
1557      n = lzq-1
1558
1559      call intp_rstd  (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation
1560      call loc_grad3  (ur,us,ut,wk,n,dxmq,dxtmq)
1561
1562      do i=1,lxyzq
1563         wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i)
1564         ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i)
1565         wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i)
1566         ur(i) = wr
1567         us(i) = ws
1568         ut(i) = wt
1569      enddo
1570
1571      call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp)
1572      call intp_rstd  (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u
1573
1574      return
1575      end
1576C-----------------------------------------------------------------------
1577      subroutine axhm1_bp3(pap,ap1,p1,h1,h2)
1578C     Vector conjugate gradient matvec for solution of BP3 (stiffness matrix)
1579C     Input: pap,p1,h1,h2           Output: ap1
1580      include 'SIZE'
1581      include 'TOTAL'
1582
1583      parameter (lzq=lx1+1)
1584      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
1585      common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq)
1586
1587      real*8 pap(3)
1588      real*8 ap1(lx,lelt)
1589      real*8  p1(lx,lelt)
1590      real*8  h1(lx,lelt),h2(lx,lelt)
1591
1592      common /ctmp1/ ur,us,ut,wk
1593      real*8 ur(lq),us(lq),ut(lq),wk(lq)
1594
1595      integer e
1596
1597      pap(1)=0.
1598
1599      do e=1,nelt
1600
1601         call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk)
1602         do i=1,lx
1603           pap(1)=pap(1)+p1(i,e)*ap1(i,e)
1604         enddo
1605
1606      enddo
1607
1608      return
1609      end
1610C-----------------------------------------------------------------------
1611      subroutine axhm1(pap,ap1,p1,h1,h2,bpname)
1612C     Vector conjugate gradient matvec for solution of uncoupled
1613C     Helmholtz equations
1614C     Input: pap,p1,h1,h2,bpname    Output: ap1
1615      include 'SIZE'
1616      include 'TOTAL'
1617
1618      parameter (lx=lx1*ly1*lz1)
1619
1620      real*8 pap(3),ap1(lx,lelt),p1(lx,lelt)
1621      real*8 h1(lx,lelt),h2(lx,lelt)
1622
1623      character*3 bpname
1624
1625      if (bpname.eq.'bp1') then
1626         call axhm1_bp1(pap,ap1,p1,h1,h2)
1627
1628      elseif (bpname.eq.'bp3') then
1629         call axhm1_bp3(pap,ap1,p1,h1,h2)
1630
1631      else
1632         write(6,*) bpname,' axhm1 bpname error'
1633         stop
1634
1635      endif
1636
1637      return
1638      end
1639C-----------------------------------------------------------------------
1640      subroutine get_bp(bp)
1641C     Get BP to run
1642C     Input:                        Output: bp
1643      integer i,bp
1644      character*64 bpval
1645
1646      bp=0
1647      if(iargc().ge.1) then
1648        call getarg(1,bpval)
1649      endif
1650      if(bpval.eq."bp1") then
1651        bp=1
1652      elseif(bpval.eq."bp3") then
1653        bp=3
1654      endif
1655
1656      return
1657      end
1658C-----------------------------------------------------------------------
1659      subroutine get_spec(spec)
1660C     Get CEED backend specification
1661C     Input:                        Output: spec
1662      integer i
1663      character*64 spec
1664
1665      spec = '/cpu/self'
1666      if(iargc().ge.2) then
1667        call getarg(2,spec)
1668      endif
1669
1670      return
1671      end
1672C-----------------------------------------------------------------------
1673      subroutine get_test(test)
1674C     Get test mode flag
1675C     Input:                        Output: test
1676      integer i,test
1677      character*64 testval
1678
1679      test=0
1680      if(iargc().ge.3) then
1681        call getarg(3,testval)
1682      endif
1683      if(testval.eq."test") then
1684        test=1
1685      endif
1686
1687      return
1688      end
1689C-----------------------------------------------------------------------
1690
1691