xref: /libCEED/examples/nek/bps/bps.usr (revision 86a4271f02baafab377de162285a9b890a25166a)
1*86a4271fSThilina RathnayakeC Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
2*86a4271fSThilina RathnayakeC the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
3*86a4271fSThilina RathnayakeC reserved. See files LICENSE and NOTICE for details.
4*86a4271fSThilina RathnayakeC
5*86a4271fSThilina RathnayakeC This file is part of CEED, a collection of benchmarks, miniapps, software
6*86a4271fSThilina RathnayakeC libraries and APIs for efficient high-order finite element and spectral
7*86a4271fSThilina RathnayakeC element discretizations for exascale applications. For more information and
8*86a4271fSThilina RathnayakeC source code availability see http://github.com/ceed.
9*86a4271fSThilina RathnayakeC
10*86a4271fSThilina RathnayakeC The CEED research is supported by the Exascale Computing Project (17-SC-20-SC)
11*86a4271fSThilina RathnayakeC a collaborative effort of two U.S. Department of Energy organizations (Office
12*86a4271fSThilina RathnayakeC of Science and the National Nuclear Security Administration) responsible for
13*86a4271fSThilina RathnayakeC the planning and preparation of a capable exascale ecosystem, including
14*86a4271fSThilina RathnayakeC software, applications, hardware, advanced system engineering and early
15*86a4271fSThilina RathnayakeC testbed platforms, in support of the nation's exascale computing imperative.
16*86a4271fSThilina Rathnayake
17*86a4271fSThilina RathnayakeC> @file
18*86a4271fSThilina RathnayakeC> Mass and diffusion operators examples using Nek5000
19*86a4271fSThilina RathnayakeC TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test
20*86a4271fSThilina Rathnayake
21*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
22*86a4271fSThilina Rathnayake      subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
23*86a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
24*86a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
25*86a4271fSThilina RathnayakeC     Set up mass operator
26*86a4271fSThilina RathnayakeC     Input: u1,u2,u3,q             Output: v1,v2,ierr
27*86a4271fSThilina Rathnayake      integer q,ierr
28*86a4271fSThilina Rathnayake      real*8 ctx(1)
29*86a4271fSThilina Rathnayake      real*8 u1(3*q)
30*86a4271fSThilina Rathnayake      real*8 u2(9*q)
31*86a4271fSThilina Rathnayake      real*8 u3(q)
32*86a4271fSThilina Rathnayake      real*8 v1(q)
33*86a4271fSThilina Rathnayake      real*8 v2(q)
34*86a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
35*86a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
36*86a4271fSThilina Rathnayake      real*8 jacmq
37*86a4271fSThilina Rathnayake
38*86a4271fSThilina Rathnayake      do i=1,q
39*86a4271fSThilina Rathnayake        a11=u2(i+q*0)
40*86a4271fSThilina Rathnayake        a12=u2(i+q*3)
41*86a4271fSThilina Rathnayake        a13=u2(i+q*6)
42*86a4271fSThilina Rathnayake
43*86a4271fSThilina Rathnayake        a21=u2(i+q*1)
44*86a4271fSThilina Rathnayake        a22=u2(i+q*4)
45*86a4271fSThilina Rathnayake        a23=u2(i+q*7)
46*86a4271fSThilina Rathnayake
47*86a4271fSThilina Rathnayake        a31=u2(i+q*2)
48*86a4271fSThilina Rathnayake        a32=u2(i+q*5)
49*86a4271fSThilina Rathnayake        a33=u2(i+q*8)
50*86a4271fSThilina Rathnayake
51*86a4271fSThilina Rathnayake        g11 = (a22*a33-a23*a32)
52*86a4271fSThilina Rathnayake        g12 = (a13*a32-a33*a12)
53*86a4271fSThilina Rathnayake        g13 = (a12*a23-a22*a13)
54*86a4271fSThilina Rathnayake
55*86a4271fSThilina Rathnayake        g21 = (a23*a31-a21*a33)
56*86a4271fSThilina Rathnayake        g22 = (a11*a33-a31*a13)
57*86a4271fSThilina Rathnayake        g23 = (a13*a21-a23*a11)
58*86a4271fSThilina Rathnayake
59*86a4271fSThilina Rathnayake        g31 = (a21*a32-a22*a31)
60*86a4271fSThilina Rathnayake        g32 = (a12*a31-a32*a11)
61*86a4271fSThilina Rathnayake        g33 = (a11*a22-a21*a12)
62*86a4271fSThilina Rathnayake
63*86a4271fSThilina Rathnayake        jacmq = a11*g11+a21*g12+a31*g13
64*86a4271fSThilina Rathnayake
65*86a4271fSThilina RathnayakeC       Rho
66*86a4271fSThilina Rathnayake        v1(i)=u3(i)*jacmq
67*86a4271fSThilina Rathnayake
68*86a4271fSThilina RathnayakeC       RHS
69*86a4271fSThilina Rathnayake        v2(i)=u3(i)*jacmq
70*86a4271fSThilina Rathnayake     $             *dsqrt(u1(i+q*0)*u1(i+q*0)
71*86a4271fSThilina Rathnayake     $                   +u1(i+q*1)*u1(i+q*1)
72*86a4271fSThilina Rathnayake     $                   +u1(i+q*2)*u1(i+q*2))
73*86a4271fSThilina Rathnayake      enddo
74*86a4271fSThilina Rathnayake
75*86a4271fSThilina Rathnayake      ierr=0
76*86a4271fSThilina Rathnayake      end
77*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
78*86a4271fSThilina Rathnayake      subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
79*86a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
80*86a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
81*86a4271fSThilina RathnayakeC     Apply mass operator
82*86a4271fSThilina RathnayakeC     Input: u1,u2,q                Output: v1,ierr
83*86a4271fSThilina Rathnayake      integer q,ierr
84*86a4271fSThilina Rathnayake      real*8 ctx(1)
85*86a4271fSThilina Rathnayake      real*8 u1(q)
86*86a4271fSThilina Rathnayake      real*8 u2(q)
87*86a4271fSThilina Rathnayake      real*8 v1(q)
88*86a4271fSThilina Rathnayake
89*86a4271fSThilina Rathnayake      do i=1,q
90*86a4271fSThilina Rathnayake        v1(i)=u2(i)*u1(i)
91*86a4271fSThilina Rathnayake      enddo
92*86a4271fSThilina Rathnayake
93*86a4271fSThilina Rathnayake      ierr=0
94*86a4271fSThilina Rathnayake      end
95*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
96*86a4271fSThilina Rathnayake      subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
97*86a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
98*86a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
99*86a4271fSThilina RathnayakeC     Set up diffusion operator
100*86a4271fSThilina RathnayakeC     Input: u1,u2,u3,q             Output: v1,v2,ierr
101*86a4271fSThilina Rathnayake      integer q,ierr
102*86a4271fSThilina Rathnayake      real*8 ctx(1)
103*86a4271fSThilina Rathnayake      real*8 u1(3*q)
104*86a4271fSThilina Rathnayake      real*8 u2(9*q)
105*86a4271fSThilina Rathnayake      real*8 u3(q)
106*86a4271fSThilina Rathnayake      real*8 v1(6*q)
107*86a4271fSThilina Rathnayake      real*8 v2(q)
108*86a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
109*86a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
110*86a4271fSThilina Rathnayake      real*8 jacmq,scl
111*86a4271fSThilina Rathnayake      real*8 c(3),k(3)
112*86a4271fSThilina Rathnayake
113*86a4271fSThilina Rathnayake      do i=1,q
114*86a4271fSThilina Rathnayake        pi = 3.14159265358979323846
115*86a4271fSThilina Rathnayake
116*86a4271fSThilina Rathnayake        c(1)=0.
117*86a4271fSThilina Rathnayake        c(2)=1.
118*86a4271fSThilina Rathnayake        c(3)=2.
119*86a4271fSThilina Rathnayake        k(1)=1.
120*86a4271fSThilina Rathnayake        k(2)=2.
121*86a4271fSThilina Rathnayake        k(3)=3.
122*86a4271fSThilina Rathnayake
123*86a4271fSThilina Rathnayake        a11=u2(i+q*0)
124*86a4271fSThilina Rathnayake        a12=u2(i+q*3)
125*86a4271fSThilina Rathnayake        a13=u2(i+q*6)
126*86a4271fSThilina Rathnayake
127*86a4271fSThilina Rathnayake        a21=u2(i+q*1)
128*86a4271fSThilina Rathnayake        a22=u2(i+q*4)
129*86a4271fSThilina Rathnayake        a23=u2(i+q*7)
130*86a4271fSThilina Rathnayake
131*86a4271fSThilina Rathnayake        a31=u2(i+q*2)
132*86a4271fSThilina Rathnayake        a32=u2(i+q*5)
133*86a4271fSThilina Rathnayake        a33=u2(i+q*8)
134*86a4271fSThilina Rathnayake
135*86a4271fSThilina Rathnayake        g11 = (a22*a33-a23*a32)
136*86a4271fSThilina Rathnayake        g12 = (a13*a32-a33*a12)
137*86a4271fSThilina Rathnayake        g13 = (a12*a23-a22*a13)
138*86a4271fSThilina Rathnayake
139*86a4271fSThilina Rathnayake        g21 = (a23*a31-a21*a33)
140*86a4271fSThilina Rathnayake        g22 = (a11*a33-a31*a13)
141*86a4271fSThilina Rathnayake        g23 = (a13*a21-a23*a11)
142*86a4271fSThilina Rathnayake
143*86a4271fSThilina Rathnayake        g31 = (a21*a32-a22*a31)
144*86a4271fSThilina Rathnayake        g32 = (a12*a31-a32*a11)
145*86a4271fSThilina Rathnayake        g33 = (a11*a22-a21*a12)
146*86a4271fSThilina Rathnayake
147*86a4271fSThilina Rathnayake        jacmq = a11*g11+a21*g12+a31*g13
148*86a4271fSThilina Rathnayake
149*86a4271fSThilina Rathnayake        scl = u3(i)/jacmq
150*86a4271fSThilina Rathnayake
151*86a4271fSThilina RathnayakeC       Geometric factors
152*86a4271fSThilina Rathnayake        v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr
153*86a4271fSThilina Rathnayake        v1(i+1*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs
154*86a4271fSThilina Rathnayake        v1(i+2*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt
155*86a4271fSThilina Rathnayake        v1(i+3*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss
156*86a4271fSThilina Rathnayake        v1(i+4*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst
157*86a4271fSThilina Rathnayake        v1(i+5*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt
158*86a4271fSThilina Rathnayake
159*86a4271fSThilina RathnayakeC       RHS
160*86a4271fSThilina Rathnayake        v2(i) = u3(i)*jacmq*pi*pi
161*86a4271fSThilina Rathnayake     $            *dsin(pi*(c(1)+k(1)*u1(i+0*q)))
162*86a4271fSThilina Rathnayake     $            *dsin(pi*(c(2)+k(2)*u1(i+1*q)))
163*86a4271fSThilina Rathnayake     $            *dsin(pi*(c(3)+k(3)*u1(i+2*q)))
164*86a4271fSThilina Rathnayake     $            *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3))
165*86a4271fSThilina Rathnayake
166*86a4271fSThilina Rathnayake      enddo
167*86a4271fSThilina Rathnayake
168*86a4271fSThilina Rathnayake      ierr=0
169*86a4271fSThilina Rathnayake      end
170*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
171*86a4271fSThilina Rathnayake      subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
172*86a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
173*86a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
174*86a4271fSThilina RathnayakeC     Apply diffusion operator
175*86a4271fSThilina RathnayakeC     Input: u1,u2,q                Output: v1,ierr
176*86a4271fSThilina Rathnayake      integer q,ierr
177*86a4271fSThilina Rathnayake      real*8 ctx(1)
178*86a4271fSThilina Rathnayake      real*8 u1(3*q)
179*86a4271fSThilina Rathnayake      real*8 u2(6*q)
180*86a4271fSThilina Rathnayake      real*8 v1(3*q)
181*86a4271fSThilina Rathnayake
182*86a4271fSThilina Rathnayake      do i=1,q
183*86a4271fSThilina Rathnayake        v1(i+0*q)=
184*86a4271fSThilina Rathnayake     $     u2(i+0*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q)
185*86a4271fSThilina Rathnayake        v1(i+1*q)=
186*86a4271fSThilina Rathnayake     $     u2(i+1*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q)
187*86a4271fSThilina Rathnayake        v1(i+2*q)=
188*86a4271fSThilina Rathnayake     $     u2(i+2*q)*u1(i)+u2(i+4*q)*u1(i+q)+u2(i+5*q)*u1(i+2*q)
189*86a4271fSThilina Rathnayake      enddo
190*86a4271fSThilina Rathnayake
191*86a4271fSThilina Rathnayake      ierr=0
192*86a4271fSThilina Rathnayake      end
193*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
194*86a4271fSThilina Rathnayake      subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq)
195*86a4271fSThilina RathnayakeC     Set h2 as rhoJac
196*86a4271fSThilina RathnayakeC     Input: bmq,nxq                Output: h2
197*86a4271fSThilina Rathnayake      include 'SIZE'
198*86a4271fSThilina Rathnayake      real*8 h2(1),bmq(1)
199*86a4271fSThilina Rathnayake
200*86a4271fSThilina Rathnayake      common /ctmp77/ wd(lxd),zd(lxd)
201*86a4271fSThilina Rathnayake      integer e,i,L
202*86a4271fSThilina Rathnayake
203*86a4271fSThilina Rathnayake      call zwgl(zd,wd,nxq)  ! nxq = number of points
204*86a4271fSThilina Rathnayake
205*86a4271fSThilina Rathnayake      q = 1.0               ! Later, this can be a function of position...
206*86a4271fSThilina Rathnayake
207*86a4271fSThilina Rathnayake      L = 0
208*86a4271fSThilina Rathnayake      do e=1,lelt
209*86a4271fSThilina Rathnayake      do i=1,nxq**ldim
210*86a4271fSThilina Rathnayake         L=L+1
211*86a4271fSThilina Rathnayake         h2(L) = q*bmq(L)
212*86a4271fSThilina Rathnayake      enddo
213*86a4271fSThilina Rathnayake      enddo
214*86a4271fSThilina Rathnayake
215*86a4271fSThilina Rathnayake      return
216*86a4271fSThilina Rathnayake      end
217*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
218*86a4271fSThilina Rathnayake      subroutine dist_fld_h1(e)
219*86a4271fSThilina RathnayakeC     Set distance initial condition for BP1
220*86a4271fSThilina RathnayakeC     Input:                        Output: e
221*86a4271fSThilina Rathnayake      include 'SIZE'
222*86a4271fSThilina Rathnayake      include 'TOTAL'
223*86a4271fSThilina Rathnayake      real*8 x,y,z
224*86a4271fSThilina Rathnayake      real*8 e(1)
225*86a4271fSThilina Rathnayake
226*86a4271fSThilina Rathnayake      n=lx1*ly1*lz1*nelt
227*86a4271fSThilina Rathnayake
228*86a4271fSThilina Rathnayake      do i=1,n
229*86a4271fSThilina Rathnayake        x=xm1(i,1,1,1)
230*86a4271fSThilina Rathnayake        y=ym1(i,1,1,1)
231*86a4271fSThilina Rathnayake        z=zm1(i,1,1,1)
232*86a4271fSThilina Rathnayake
233*86a4271fSThilina Rathnayake        e(i) = dsqrt(x*x+y*y+z*z)
234*86a4271fSThilina Rathnayake      enddo
235*86a4271fSThilina Rathnayake
236*86a4271fSThilina Rathnayake      call dsavg(e)  ! This is requisite for random fields
237*86a4271fSThilina Rathnayake
238*86a4271fSThilina Rathnayake      return
239*86a4271fSThilina Rathnayake      end
240*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
241*86a4271fSThilina Rathnayake      subroutine sin_fld_h1(e)
242*86a4271fSThilina RathnayakeC     Set sine initial condition for BP3
243*86a4271fSThilina RathnayakeC     Input:                        Output: e
244*86a4271fSThilina Rathnayake      include 'SIZE'
245*86a4271fSThilina Rathnayake      include 'TOTAL'
246*86a4271fSThilina Rathnayake      real*8 x,y,z
247*86a4271fSThilina Rathnayake      real*8 e(1)
248*86a4271fSThilina Rathnayake      real*8 c(3),k(3)
249*86a4271fSThilina Rathnayake
250*86a4271fSThilina Rathnayake      n=lx1*ly1*lz1*nelt
251*86a4271fSThilina Rathnayake      pi = 3.14159265358979323846
252*86a4271fSThilina Rathnayake
253*86a4271fSThilina Rathnayake      c(1)=0.
254*86a4271fSThilina Rathnayake      c(2)=1.
255*86a4271fSThilina Rathnayake      c(3)=2.
256*86a4271fSThilina Rathnayake      k(1)=1.
257*86a4271fSThilina Rathnayake      k(2)=2.
258*86a4271fSThilina Rathnayake      k(3)=3.
259*86a4271fSThilina Rathnayake
260*86a4271fSThilina Rathnayake      do i=1,n
261*86a4271fSThilina Rathnayake        x=xm1(i,1,1,1)
262*86a4271fSThilina Rathnayake        y=ym1(i,1,1,1)
263*86a4271fSThilina Rathnayake        z=zm1(i,1,1,1)
264*86a4271fSThilina Rathnayake
265*86a4271fSThilina Rathnayake        e(i) = dsin(pi*(c(1)+k(1)*x))
266*86a4271fSThilina Rathnayake     &        *dsin(pi*(c(2)+k(2)*y))
267*86a4271fSThilina Rathnayake     &        *dsin(pi*(c(3)+k(3)*z))
268*86a4271fSThilina Rathnayake
269*86a4271fSThilina Rathnayake      enddo
270*86a4271fSThilina Rathnayake
271*86a4271fSThilina Rathnayake      call dsavg(e)  ! This is requisite for random fields
272*86a4271fSThilina Rathnayake
273*86a4271fSThilina Rathnayake      return
274*86a4271fSThilina Rathnayake      end
275*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
276*86a4271fSThilina Rathnayake      subroutine uservp(ix,iy,iz,eg) ! set variable properties
277*86a4271fSThilina Rathnayake      include 'SIZE'
278*86a4271fSThilina Rathnayake      include 'TOTAL'
279*86a4271fSThilina Rathnayake      include 'NEKUSE'
280*86a4271fSThilina Rathnayake      integer e,f,eg
281*86a4271fSThilina RathnayakeC     e = gllel(eg)
282*86a4271fSThilina Rathnayake
283*86a4271fSThilina Rathnayake      udiff  = 0.0
284*86a4271fSThilina Rathnayake      utrans = 0.0
285*86a4271fSThilina Rathnayake
286*86a4271fSThilina Rathnayake      return
287*86a4271fSThilina Rathnayake      end
288*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
289*86a4271fSThilina Rathnayake      subroutine userf(ix,iy,iz,eg) ! set acceleration term
290*86a4271fSThilina RathnayakeC
291*86a4271fSThilina RathnayakeC     Note: this is an acceleration term, NOT a force!
292*86a4271fSThilina RathnayakeC     Thus, ffx will subsequently be multiplied by rho(x,t).
293*86a4271fSThilina RathnayakeC
294*86a4271fSThilina Rathnayake      include 'SIZE'
295*86a4271fSThilina Rathnayake      include 'TOTAL'
296*86a4271fSThilina Rathnayake      include 'NEKUSE'
297*86a4271fSThilina Rathnayake      integer e,f,eg
298*86a4271fSThilina RathnayakeC     e = gllel(eg)
299*86a4271fSThilina Rathnayake
300*86a4271fSThilina Rathnayake      ffx = 0.0
301*86a4271fSThilina Rathnayake      ffy = 0.0
302*86a4271fSThilina Rathnayake      ffz = 0.0
303*86a4271fSThilina Rathnayake
304*86a4271fSThilina Rathnayake      return
305*86a4271fSThilina Rathnayake      end
306*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
307*86a4271fSThilina Rathnayake      subroutine userq(i,j,k,eg) ! set source term
308*86a4271fSThilina Rathnayake      include 'SIZE'
309*86a4271fSThilina Rathnayake      include 'TOTAL'
310*86a4271fSThilina Rathnayake      include 'NEKUSE'
311*86a4271fSThilina Rathnayake      integer e,f,eg
312*86a4271fSThilina Rathnayake      e = gllel(eg)
313*86a4271fSThilina Rathnayake
314*86a4271fSThilina Rathnayake      qvol   = 0
315*86a4271fSThilina Rathnayake
316*86a4271fSThilina Rathnayake      return
317*86a4271fSThilina Rathnayake      end
318*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
319*86a4271fSThilina Rathnayake      subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions
320*86a4271fSThilina RathnayakeC     NOTE ::: This subroutine MAY NOT be called by every process
321*86a4271fSThilina Rathnayake      include 'SIZE'
322*86a4271fSThilina Rathnayake      include 'TOTAL'
323*86a4271fSThilina Rathnayake      include 'NEKUSE'
324*86a4271fSThilina Rathnayake      integer e,f,eg
325*86a4271fSThilina Rathnayake
326*86a4271fSThilina Rathnayake      ux   = 0.0
327*86a4271fSThilina Rathnayake      uy   = 0.0
328*86a4271fSThilina Rathnayake      uz   = 0.0
329*86a4271fSThilina Rathnayake      temp = 0.0
330*86a4271fSThilina Rathnayake
331*86a4271fSThilina Rathnayake      return
332*86a4271fSThilina Rathnayake      end
333*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
334*86a4271fSThilina Rathnayake      subroutine useric(ix,iy,iz,eg) ! set up initial conditions
335*86a4271fSThilina Rathnayake      include 'SIZE'
336*86a4271fSThilina Rathnayake      include 'TOTAL'
337*86a4271fSThilina Rathnayake      include 'NEKUSE'
338*86a4271fSThilina Rathnayake      integer e,f,eg
339*86a4271fSThilina Rathnayake
340*86a4271fSThilina Rathnayake      ux   = 0.0
341*86a4271fSThilina Rathnayake      uy   = 0.0
342*86a4271fSThilina Rathnayake      uz   = 0.0
343*86a4271fSThilina Rathnayake      temp = 0.0
344*86a4271fSThilina Rathnayake
345*86a4271fSThilina Rathnayake      return
346*86a4271fSThilina Rathnayake      end
347*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
348*86a4271fSThilina Rathnayake      subroutine usrdat   ! This routine to modify element vertices
349*86a4271fSThilina Rathnayake      include 'SIZE'
350*86a4271fSThilina Rathnayake      include 'TOTAL'
351*86a4271fSThilina Rathnayake
352*86a4271fSThilina Rathnayake      return
353*86a4271fSThilina Rathnayake      end
354*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
355*86a4271fSThilina Rathnayake      subroutine usrdat2  ! This routine to modify mesh coordinates
356*86a4271fSThilina Rathnayake      include 'SIZE'
357*86a4271fSThilina Rathnayake      include 'TOTAL'
358*86a4271fSThilina Rathnayake
359*86a4271fSThilina Rathnayake      x0 = 0
360*86a4271fSThilina Rathnayake      x1 = 1
361*86a4271fSThilina Rathnayake      call rescale_x(xm1,x0,x1)
362*86a4271fSThilina Rathnayake      call rescale_x(ym1,x0,x1)
363*86a4271fSThilina Rathnayake      call rescale_x(zm1,x0,x1)
364*86a4271fSThilina Rathnayake
365*86a4271fSThilina Rathnayake      param(59)=1  ! Force Nek to use the "deformed element" formulation
366*86a4271fSThilina Rathnayake
367*86a4271fSThilina Rathnayake      return
368*86a4271fSThilina Rathnayake      end
369*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
370*86a4271fSThilina Rathnayake      subroutine usrdat3
371*86a4271fSThilina Rathnayake      include 'SIZE'
372*86a4271fSThilina Rathnayake      include 'TOTAL'
373*86a4271fSThilina Rathnayake
374*86a4271fSThilina Rathnayake      return
375*86a4271fSThilina Rathnayake      end
376*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
377*86a4271fSThilina Rathnayake      subroutine xmask1   (r1,h2,nel)
378*86a4271fSThilina RathnayakeC     Apply zero Dirichlet boundary conditions
379*86a4271fSThilina RathnayakeC     Input: h2,nel                 Output: r1
380*86a4271fSThilina Rathnayake      include 'SIZE'
381*86a4271fSThilina Rathnayake      include 'TOTAL'
382*86a4271fSThilina Rathnayake      real*8 r1(1),h2(1)
383*86a4271fSThilina Rathnayake
384*86a4271fSThilina Rathnayake      n=nx1*ny1*nz1*nel
385*86a4271fSThilina Rathnayake      do i=1,n
386*86a4271fSThilina Rathnayake         r1(i)=r1(i)*h2(i)
387*86a4271fSThilina Rathnayake      enddo
388*86a4271fSThilina Rathnayake
389*86a4271fSThilina Rathnayake      return
390*86a4271fSThilina Rathnayake      end
391*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
392*86a4271fSThilina Rathnayake      function glrdif(x,y,n)
393*86a4271fSThilina RathnayakeC     Compute Linfty norm of (x-y)
394*86a4271fSThilina RathnayakeC     Input: x,y                    Output: n
395*86a4271fSThilina Rathnayake      real*8 x(n),y(n)
396*86a4271fSThilina Rathnayake
397*86a4271fSThilina Rathnayake      dmx=0
398*86a4271fSThilina Rathnayake      xmx=0
399*86a4271fSThilina Rathnayake      ymx=0
400*86a4271fSThilina Rathnayake
401*86a4271fSThilina Rathnayake      do i=1,n
402*86a4271fSThilina Rathnayake         diff=abs(x(i)-y(i))
403*86a4271fSThilina Rathnayake         dmx =max(dmx,diff)
404*86a4271fSThilina Rathnayake         xmx =max(xmx,x(i))
405*86a4271fSThilina Rathnayake         ymx =max(ymx,y(i))
406*86a4271fSThilina Rathnayake      enddo
407*86a4271fSThilina Rathnayake
408*86a4271fSThilina Rathnayake      xmx = max(xmx,ymx)
409*86a4271fSThilina Rathnayake      dmx = glmax(dmx,1) ! max across processors
410*86a4271fSThilina Rathnayake      xmx = glmax(xmx,1)
411*86a4271fSThilina Rathnayake
412*86a4271fSThilina Rathnayake      if (xmx.gt.0) then
413*86a4271fSThilina Rathnayake         glrdif = dmx/xmx
414*86a4271fSThilina Rathnayake      else
415*86a4271fSThilina Rathnayake         glrdif = -dmx   ! Negative indicates something strange
416*86a4271fSThilina Rathnayake      endif
417*86a4271fSThilina Rathnayake
418*86a4271fSThilina Rathnayake      return
419*86a4271fSThilina Rathnayake      end
420*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
421*86a4271fSThilina Rathnayake      subroutine loc_grad3(ur,us,ut,u,N,D,Dt)
422*86a4271fSThilina RathnayakeC     3D transpose of local gradient
423*86a4271fSThilina RathnayakeC     Input: u,N,D,Dt               Output: ur,us,ut
424*86a4271fSThilina Rathnayake      real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
425*86a4271fSThilina Rathnayake      real*8 u (0:N,0:N,0:N)
426*86a4271fSThilina Rathnayake      real*8 D (0:N,0:N),Dt(0:N,0:N)
427*86a4271fSThilina Rathnayake
428*86a4271fSThilina Rathnayake      m1 = N+1
429*86a4271fSThilina Rathnayake      m2 = m1*m1
430*86a4271fSThilina Rathnayake
431*86a4271fSThilina Rathnayake      call mxm(D ,m1,u(0,0,0),m1,ur,m2)
432*86a4271fSThilina Rathnayake      do k=0,N
433*86a4271fSThilina Rathnayake         call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1)
434*86a4271fSThilina Rathnayake      enddo
435*86a4271fSThilina Rathnayake      call mxm(u(0,0,0),m2,Dt,m1,ut,m1)
436*86a4271fSThilina Rathnayake
437*86a4271fSThilina Rathnayake      return
438*86a4271fSThilina Rathnayake      end
439*86a4271fSThilina Rathnayakec-----------------------------------------------------------------------
440*86a4271fSThilina Rathnayake      subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w)
441*86a4271fSThilina RathnayakeC     3D transpose of local gradient
442*86a4271fSThilina RathnayakeC     Input: ur,us,ut,N,D,Dt        Output: u
443*86a4271fSThilina Rathnayake       real*8 u (0:N,0:N,0:N)
444*86a4271fSThilina Rathnayake       real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
445*86a4271fSThilina Rathnayake       real*8 D (0:N,0:N),Dt(0:N,0:N)
446*86a4271fSThilina Rathnayake       real*8 w (0:N,0:N,0:N)
447*86a4271fSThilina Rathnayake
448*86a4271fSThilina Rathnayake       m1 = N+1
449*86a4271fSThilina Rathnayake       m2 = m1*m1
450*86a4271fSThilina Rathnayake       m3 = m1*m1*m1
451*86a4271fSThilina Rathnayake
452*86a4271fSThilina Rathnayake       call mxm(Dt,m1,ur,m1,u(0,0,0),m2)
453*86a4271fSThilina Rathnayake       do k=0,N
454*86a4271fSThilina Rathnayake          call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1)
455*86a4271fSThilina Rathnayake       enddo
456*86a4271fSThilina Rathnayake       call add2(u(0,0,0),w,m3)
457*86a4271fSThilina Rathnayake       call mxm(ut,m2,D ,m1,w,m1)
458*86a4271fSThilina Rathnayake       call add2(u(0,0,0),w,m3)
459*86a4271fSThilina Rathnayake
460*86a4271fSThilina Rathnayake      return
461*86a4271fSThilina Rathnayake      end
462*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
463*86a4271fSThilina Rathnayake      subroutine geodatq(gf,bmq,w3mq,nxq)
464*86a4271fSThilina RathnayakeC     Routine to generate elemental geometric matrices on mesh 1
465*86a4271fSThilina RathnayakeC     (Gauss-Legendre Lobatto mesh).
466*86a4271fSThilina Rathnayake      include 'SIZE'
467*86a4271fSThilina Rathnayake      include 'TOTAL'
468*86a4271fSThilina Rathnayake
469*86a4271fSThilina Rathnayake      parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim)
470*86a4271fSThilina Rathnayake
471*86a4271fSThilina Rathnayake      real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq)
472*86a4271fSThilina Rathnayake
473*86a4271fSThilina Rathnayake      common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd)
474*86a4271fSThilina Rathnayake      common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd)
475*86a4271fSThilina Rathnayake     $ ,             zr(lxyd),zs(lxyd),zt(lxyd)
476*86a4271fSThilina Rathnayake
477*86a4271fSThilina Rathnayake      common /ctmp77/ wd(lxd),zd(lxd)
478*86a4271fSThilina Rathnayake      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
479*86a4271fSThilina Rathnayake
480*86a4271fSThilina Rathnayake      integer e
481*86a4271fSThilina Rathnayake      real*8 tmp(lxyd)
482*86a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
483*86a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
484*86a4271fSThilina Rathnayake      real*8 jacmq
485*86a4271fSThilina Rathnayake
486*86a4271fSThilina Rathnayake      if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq)
487*86a4271fSThilina Rathnayake
488*86a4271fSThilina Rathnayake      call zwgl    (zd,wd,lzq)                            ! nxq = number of points
489*86a4271fSThilina Rathnayake      call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp)
490*86a4271fSThilina Rathnayake
491*86a4271fSThilina Rathnayake      do k=1,nxq
492*86a4271fSThilina Rathnayake      do j=1,nxq
493*86a4271fSThilina Rathnayake      do i=1,nxq
494*86a4271fSThilina Rathnayake         w3mq(i,j,k) = wd(i)*wd(j)*wd(k)
495*86a4271fSThilina Rathnayake      enddo
496*86a4271fSThilina Rathnayake      enddo
497*86a4271fSThilina Rathnayake      enddo
498*86a4271fSThilina Rathnayake
499*86a4271fSThilina Rathnayake      nxyzq = nxq**ldim
500*86a4271fSThilina Rathnayake      nxqm1 = lzq-1
501*86a4271fSThilina Rathnayake
502*86a4271fSThilina Rathnayake      do e=1,nelt
503*86a4271fSThilina Rathnayake         call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation
504*86a4271fSThilina Rathnayake         call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq)
505*86a4271fSThilina Rathnayake
506*86a4271fSThilina Rathnayake         call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0)
507*86a4271fSThilina Rathnayake         call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq)
508*86a4271fSThilina Rathnayake
509*86a4271fSThilina Rathnayake         call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0)
510*86a4271fSThilina Rathnayake         call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq)
511*86a4271fSThilina Rathnayake
512*86a4271fSThilina Rathnayake         do i=1,nxyzq
513*86a4271fSThilina Rathnayake            a11 = xr(i)
514*86a4271fSThilina Rathnayake            a12 = xs(i)
515*86a4271fSThilina Rathnayake            a13 = xt(i)
516*86a4271fSThilina Rathnayake
517*86a4271fSThilina Rathnayake            a21 = yr(i)
518*86a4271fSThilina Rathnayake            a22 = ys(i)
519*86a4271fSThilina Rathnayake            a23 = yt(i)
520*86a4271fSThilina Rathnayake
521*86a4271fSThilina Rathnayake            a31 = zr(i)
522*86a4271fSThilina Rathnayake            a32 = zs(i)
523*86a4271fSThilina Rathnayake            a33 = zt(i)
524*86a4271fSThilina Rathnayake
525*86a4271fSThilina Rathnayake            g11 = (a22*a33-a23*a32)
526*86a4271fSThilina Rathnayake            g12 = (a13*a32-a33*a12)
527*86a4271fSThilina Rathnayake            g13 = (a12*a23-a22*a13)
528*86a4271fSThilina Rathnayake
529*86a4271fSThilina Rathnayake            g21 = (a23*a31-a21*a33)
530*86a4271fSThilina Rathnayake            g22 = (a11*a33-a31*a13)
531*86a4271fSThilina Rathnayake            g23 = (a13*a21-a23*a11)
532*86a4271fSThilina Rathnayake
533*86a4271fSThilina Rathnayake            g31 = (a21*a32-a22*a31)
534*86a4271fSThilina Rathnayake            g32 = (a12*a31-a32*a11)
535*86a4271fSThilina Rathnayake            g33 = (a11*a22-a21*a12)
536*86a4271fSThilina Rathnayake
537*86a4271fSThilina Rathnayake            jacmq = a11*g11+a21*g12+a31*g13
538*86a4271fSThilina Rathnayake
539*86a4271fSThilina Rathnayake            bmq(i,e)  = w3mq(i,1,1)*jacmq
540*86a4271fSThilina Rathnayake            scale     = w3mq(i,1,1)/jacmq
541*86a4271fSThilina Rathnayake
542*86a4271fSThilina Rathnayake            gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr
543*86a4271fSThilina Rathnayake            gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs
544*86a4271fSThilina Rathnayake            gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt
545*86a4271fSThilina Rathnayake            gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss
546*86a4271fSThilina Rathnayake            gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst
547*86a4271fSThilina Rathnayake            gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt
548*86a4271fSThilina Rathnayake
549*86a4271fSThilina Rathnayake         enddo
550*86a4271fSThilina Rathnayake      enddo
551*86a4271fSThilina Rathnayake
552*86a4271fSThilina Rathnayake      return
553*86a4271fSThilina Rathnayake      end
554*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
555*86a4271fSThilina Rathnayake      subroutine setprecn_bp1 (d,h1,h2)
556*86a4271fSThilina RathnayakeC     Generate diagonal preconditioner for Helmholtz operator
557*86a4271fSThilina RathnayakeC     Input: h1,h2                  Output: d
558*86a4271fSThilina Rathnayake      include 'SIZE'
559*86a4271fSThilina Rathnayake      include 'TOTAL'
560*86a4271fSThilina Rathnayake
561*86a4271fSThilina Rathnayake      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
562*86a4271fSThilina Rathnayake
563*86a4271fSThilina Rathnayake      real*8    d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt)
564*86a4271fSThilina Rathnayake      integer e
565*86a4271fSThilina Rathnayake
566*86a4271fSThilina Rathnayake      real*8         gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data
567*86a4271fSThilina Rathnayake      equivalence (gf,g1m1)                  ! layout to g1m1...g6m1
568*86a4271fSThilina Rathnayake
569*86a4271fSThilina Rathnayake      real*8 ysm1(ly1)
570*86a4271fSThilina Rathnayake
571*86a4271fSThilina Rathnayake      nel   = nelfld(ifield)
572*86a4271fSThilina Rathnayake      n     = nel*lx1*ly1*lz1
573*86a4271fSThilina Rathnayake      nxyz  = lx1*ly1*lz1
574*86a4271fSThilina Rathnayake
575*86a4271fSThilina Rathnayake      call copy    (d,bm1,n)   ! Mass matrix preconditioning full mass matrix
576*86a4271fSThilina Rathnayake      call dssum   (d,nx1,ny1,nz1)
577*86a4271fSThilina Rathnayake      call invcol1 (d,n)
578*86a4271fSThilina Rathnayake      return
579*86a4271fSThilina Rathnayake
580*86a4271fSThilina Rathnayake      call dsset(lx1,ly1,lz1)
581*86a4271fSThilina Rathnayake
582*86a4271fSThilina Rathnayake      do 1000 e=1,nel
583*86a4271fSThilina Rathnayake
584*86a4271fSThilina Rathnayake        call rzero(d(1,1,1,e),nxyz)
585*86a4271fSThilina Rathnayake
586*86a4271fSThilina Rathnayake        do 320 iz=1,lz1
587*86a4271fSThilina Rathnayake         do 320 iy=1,ly1
588*86a4271fSThilina Rathnayake         do 320 ix=1,lx1
589*86a4271fSThilina Rathnayake         do 320 iq=1,lx1
590*86a4271fSThilina Rathnayake           d(ix,iy,iz,e) = d(ix,iy,iz,e)
591*86a4271fSThilina Rathnayake     $                   + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2
592*86a4271fSThilina Rathnayake     $                   + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2
593*86a4271fSThilina Rathnayake     $                   + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2
594*86a4271fSThilina Rathnayake  320    continue
595*86a4271fSThilina RathnayakeC
596*86a4271fSThilina RathnayakeC        Add cross terms if element is deformed.
597*86a4271fSThilina RathnayakeC
598*86a4271fSThilina Rathnayake         if (lxyz.gt.0) then
599*86a4271fSThilina Rathnayake
600*86a4271fSThilina Rathnayake           do i2=1,ly1,ly1-1
601*86a4271fSThilina Rathnayake           do i1=1,lx1,lx1-1
602*86a4271fSThilina Rathnayake              d(1,i1,i2,e) = d(1,i1,i2,e)
603*86a4271fSThilina Rathnayake     $            + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1)
604*86a4271fSThilina Rathnayake     $            + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2)
605*86a4271fSThilina Rathnayake              d(lx1,i1,i2,e) = d(lx1,i1,i2,e)
606*86a4271fSThilina Rathnayake     $            + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1)
607*86a4271fSThilina Rathnayake     $            + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2)
608*86a4271fSThilina Rathnayake              d(i1,1,i2,e) = d(i1,1,i2,e)
609*86a4271fSThilina Rathnayake     $            + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1)
610*86a4271fSThilina Rathnayake     $            + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2)
611*86a4271fSThilina Rathnayake              d(i1,ly1,i2,e) = d(i1,ly1,i2,e)
612*86a4271fSThilina Rathnayake     $            + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1)
613*86a4271fSThilina Rathnayake     $            + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2)
614*86a4271fSThilina Rathnayake              d(i1,i2,1,e) = d(i1,i2,1,e)
615*86a4271fSThilina Rathnayake     $            + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1)
616*86a4271fSThilina Rathnayake     $            + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2)
617*86a4271fSThilina Rathnayake              d(i1,i2,lz1,e) = d(i1,i2,lz1,e)
618*86a4271fSThilina Rathnayake     $            + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1)
619*86a4271fSThilina Rathnayake     $            + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2)
620*86a4271fSThilina Rathnayake
621*86a4271fSThilina Rathnayake           enddo
622*86a4271fSThilina Rathnayake           enddo
623*86a4271fSThilina Rathnayake         endif
624*86a4271fSThilina Rathnayake
625*86a4271fSThilina Rathnayake        do i=1,lxyz
626*86a4271fSThilina Rathnayake           d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e)
627*86a4271fSThilina Rathnayake        enddo
628*86a4271fSThilina Rathnayake
629*86a4271fSThilina Rathnayake 1000 continue ! element loop
630*86a4271fSThilina Rathnayake
631*86a4271fSThilina RathnayakeC     If axisymmetric, add a diagonal term in the radial direction (ISD=2)
632*86a4271fSThilina Rathnayake
633*86a4271fSThilina Rathnayake      if (ifaxis.and.(isd.eq.2)) then
634*86a4271fSThilina Rathnayake         do 1200 e=1,nel
635*86a4271fSThilina Rathnayake            if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1)
636*86a4271fSThilina Rathnayake            k=0
637*86a4271fSThilina Rathnayake            do 1190 j=1,ly1
638*86a4271fSThilina Rathnayake            do 1190 i=1,lx1
639*86a4271fSThilina Rathnayake               k=k+1
640*86a4271fSThilina Rathnayake               if (ym1(i,j,1,e).ne.0.) then
641*86a4271fSThilina Rathnayake                  term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2
642*86a4271fSThilina Rathnayake                  if (ifrzer(e)) then
643*86a4271fSThilina Rathnayake                     term2 =  wxm1(i)*wam1(1)*dam1(1,j)
644*86a4271fSThilina Rathnayake     $                       *jacm1(i,1,1,e)/ysm1(i)
645*86a4271fSThilina Rathnayake                  else
646*86a4271fSThilina Rathnayake                     term2 = 0.
647*86a4271fSThilina Rathnayake                  endif
648*86a4271fSThilina Rathnayake                  d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2)
649*86a4271fSThilina Rathnayake               endif
650*86a4271fSThilina Rathnayake 1190       continue
651*86a4271fSThilina Rathnayake 1200    continue
652*86a4271fSThilina Rathnayake
653*86a4271fSThilina Rathnayake      endif
654*86a4271fSThilina Rathnayake      call dssum   (d,nx1,ny1,nz1)
655*86a4271fSThilina Rathnayake      call invcol1 (d,n)
656*86a4271fSThilina Rathnayake
657*86a4271fSThilina Rathnayake      if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1)
658*86a4271fSThilina Rathnayake   1  format(i9,1p4e12.4,' diag prec')
659*86a4271fSThilina Rathnayake
660*86a4271fSThilina Rathnayake      return
661*86a4271fSThilina Rathnayake      end
662*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
663*86a4271fSThilina Rathnayake      subroutine setprecn_bp3 (d,h1,h2)
664*86a4271fSThilina RathnayakeC     Generate dummy diagonal preconditioner for Helmholtz operator
665*86a4271fSThilina RathnayakeC     Input: h1,h2                  Output: d
666*86a4271fSThilina Rathnayake      include 'SIZE'
667*86a4271fSThilina Rathnayake      include 'TOTAL'
668*86a4271fSThilina Rathnayake
669*86a4271fSThilina Rathnayake      parameter (n=lx1*ly1*lz1*lelt)
670*86a4271fSThilina Rathnayake      real*8 d(n),h1(n),h2(n)
671*86a4271fSThilina Rathnayake
672*86a4271fSThilina Rathnayake      call rone (d,n)
673*86a4271fSThilina Rathnayake
674*86a4271fSThilina Rathnayake      return
675*86a4271fSThilina Rathnayake      end
676*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
677*86a4271fSThilina Rathnayake      subroutine userchk
678*86a4271fSThilina Rathnayake      include 'SIZE'
679*86a4271fSThilina Rathnayake      include 'TOTAL'
680*86a4271fSThilina Rathnayake
681*86a4271fSThilina Rathnayake      integer bp
682*86a4271fSThilina Rathnayake
683*86a4271fSThilina Rathnayake      call get_bp(bp)
684*86a4271fSThilina Rathnayake
685*86a4271fSThilina Rathnayake      if (bp==1) then
686*86a4271fSThilina Rathnayake        if (istep.gt.0) call bp1
687*86a4271fSThilina Rathnayake      elseif (bp==3) then
688*86a4271fSThilina Rathnayake        if (istep.gt.0) call bp3
689*86a4271fSThilina Rathnayake      else
690*86a4271fSThilina Rathnayake        write(6,*) "INVALID BP SPECIFICED"
691*86a4271fSThilina Rathnayake      endif
692*86a4271fSThilina Rathnayake
693*86a4271fSThilina Rathnayake      return
694*86a4271fSThilina Rathnayake      end
695*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
696*86a4271fSThilina Rathnayake      subroutine bp1
697*86a4271fSThilina RathnayakeC     Solution to BP1 using libCEED
698*86a4271fSThilina Rathnayake      include 'SIZE'
699*86a4271fSThilina Rathnayake      include 'TOTAL'
700*86a4271fSThilina Rathnayake      include 'CTIMER'  ! ifsync
701*86a4271fSThilina Rathnayake      include 'FDMH1'
702*86a4271fSThilina Rathnayake      include 'ceedf.h'
703*86a4271fSThilina Rathnayake
704*86a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
705*86a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
706*86a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
707*86a4271fSThilina Rathnayake
708*86a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
709*86a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
710*86a4271fSThilina Rathnayake      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
711*86a4271fSThilina Rathnayake      common /vcrny/ e1(lt)
712*86a4271fSThilina Rathnayake      common /vcrvh/ h1(lt),h2(lx*lelt),pap(3)
713*86a4271fSThilina Rathnayake      real*8 coords(ldim*lx*lelt)
714*86a4271fSThilina Rathnayake
715*86a4271fSThilina Rathnayake      logical ifh3
716*86a4271fSThilina Rathnayake      integer*8 ndof
717*86a4271fSThilina Rathnayake      integer ceed,err,test
718*86a4271fSThilina Rathnayake      character*64 spec
719*86a4271fSThilina Rathnayake
720*86a4271fSThilina Rathnayake      integer p,q,ncomp,edof,ldof
721*86a4271fSThilina Rathnayake      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
722*86a4271fSThilina Rathnayake      integer erstrctu,erstrctx,erstrctw
723*86a4271fSThilina Rathnayake      integer basisu,basisx
724*86a4271fSThilina Rathnayake      integer qf_mass,qf_setup
725*86a4271fSThilina Rathnayake      integer op_mass,op_setup
726*86a4271fSThilina Rathnayake      real*8  x,y,z
727*86a4271fSThilina Rathnayake      integer*8 offset
728*86a4271fSThilina Rathnayake
729*86a4271fSThilina Rathnayake      external massf,masssetupf
730*86a4271fSThilina Rathnayake
731*86a4271fSThilina Rathnayake      ifield = 1
732*86a4271fSThilina Rathnayake      nxq    = nx1+1
733*86a4271fSThilina Rathnayake      n = nx1*ny1*nz1*nelt
734*86a4271fSThilina Rathnayake
735*86a4271fSThilina Rathnayake      ifsync = .false.
736*86a4271fSThilina Rathnayake
737*86a4271fSThilina RathnayakeC     Set up coordinates
738*86a4271fSThilina Rathnayake      ii=0
739*86a4271fSThilina Rathnayake      do j=0,nelt-1
740*86a4271fSThilina Rathnayake      do i=1,lx
741*86a4271fSThilina Rathnayake        ii=ii+1
742*86a4271fSThilina Rathnayake        x = xm1(ii,1,1,1)
743*86a4271fSThilina Rathnayake        y = ym1(ii,1,1,1)
744*86a4271fSThilina Rathnayake        z = zm1(ii,1,1,1)
745*86a4271fSThilina Rathnayake        coords(i+0*lx+3*j*lx)=x
746*86a4271fSThilina Rathnayake        coords(i+1*lx+3*j*lx)=y
747*86a4271fSThilina Rathnayake        coords(i+2*lx+3*j*lx)=z
748*86a4271fSThilina Rathnayake      enddo
749*86a4271fSThilina Rathnayake      enddo
750*86a4271fSThilina Rathnayake
751*86a4271fSThilina RathnayakeC     Init ceed library
752*86a4271fSThilina Rathnayake      call get_spec(spec)
753*86a4271fSThilina Rathnayake      call ceedinit(trim(spec)//char(0),ceed,err)
754*86a4271fSThilina Rathnayake
755*86a4271fSThilina Rathnayake      call get_test(test)
756*86a4271fSThilina Rathnayake
757*86a4271fSThilina RathnayakeC     Set up Nek geometry data
758*86a4271fSThilina Rathnayake      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
759*86a4271fSThilina Rathnayake      call set_h2_as_rhoJac_GL(h2,bmq,nxq)
760*86a4271fSThilina Rathnayake
761*86a4271fSThilina RathnayakeC     Set up true soln
762*86a4271fSThilina Rathnayake      call dist_fld_h1   (e1)
763*86a4271fSThilina Rathnayake      call copy          (h1,e1,n)                 ! Save exact soln in h1
764*86a4271fSThilina Rathnayake
765*86a4271fSThilina RathnayakeC     Set up solver parameters
766*86a4271fSThilina Rathnayake      tol       = 1e-10
767*86a4271fSThilina Rathnayake      param(22) = tol
768*86a4271fSThilina Rathnayake      maxit     = 100
769*86a4271fSThilina Rathnayake
770*86a4271fSThilina Rathnayake      call nekgsync()
771*86a4271fSThilina Rathnayake
772*86a4271fSThilina RathnayakeC     Create ceed basis for mesh and computation
773*86a4271fSThilina Rathnayake      p=nx1
774*86a4271fSThilina Rathnayake      q=p+1
775*86a4271fSThilina Rathnayake      ncomp=1
776*86a4271fSThilina Rathnayake      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q,
777*86a4271fSThilina Rathnayake     $  ceed_gauss,basisx,err)
778*86a4271fSThilina Rathnayake      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncomp,p,q,
779*86a4271fSThilina Rathnayake     $  ceed_gauss,basisu,err)
780*86a4271fSThilina Rathnayake
781*86a4271fSThilina RathnayakeC     Create ceed element restrictions for mesh and computation
782*86a4271fSThilina Rathnayake      edof=p**ldim
783*86a4271fSThilina Rathnayake      ldof=edof*nelt*ncomp
784*86a4271fSThilina Rathnayake      call ceedelemrestrictioncreateidentity(ceed,nelt,edof,ldof,
785*86a4271fSThilina Rathnayake     $  ldim,erstrctx,err)
786*86a4271fSThilina Rathnayake      call ceedelemrestrictioncreateidentity(ceed,nelt,edof,ldof,
787*86a4271fSThilina Rathnayake     $  ncomp,erstrctu,err)
788*86a4271fSThilina Rathnayake      call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim,
789*86a4271fSThilina Rathnayake     $  nelt*q**ldim,1,erstrctw,err)
790*86a4271fSThilina Rathnayake
791*86a4271fSThilina RathnayakeC     Create ceed vectors
792*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldof,vec_p1,err)
793*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldof,vec_ap1,err)
794*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldof,vec_rhs,err)
795*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
796*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err)
797*86a4271fSThilina Rathnayake
798*86a4271fSThilina Rathnayake      offset=0
799*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_coords,ceed_mem_host,
800*86a4271fSThilina Rathnayake     $  ceed_use_pointer,coords,offset,err)
801*86a4271fSThilina Rathnayake
802*86a4271fSThilina RathnayakeC     Create ceed qfunctions for masssetupf and massf
803*86a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,masssetupf,
804*86a4271fSThilina Rathnayake     $  __FILE__
805*86a4271fSThilina Rathnayake     $  //':masssetupf',qf_setup,err)
806*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_setup,'x',ldim,
807*86a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
808*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_setup,'dx',ldim,
809*86a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
810*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_setup,'weight',1,
811*86a4271fSThilina Rathnayake     $  ceed_eval_weight,err)
812*86a4271fSThilina Rathnayake      call ceedqfunctionaddoutput(qf_setup,'rho',1,
813*86a4271fSThilina Rathnayake     $  ceed_eval_none,err)
814*86a4271fSThilina Rathnayake      call ceedqfunctionaddoutput(qf_setup,'rhs',1,
815*86a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
816*86a4271fSThilina Rathnayake
817*86a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,massf,
818*86a4271fSThilina Rathnayake     $  __FILE__
819*86a4271fSThilina Rathnayake     $  //':massf',qf_mass,err)
820*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_mass,'u',1,
821*86a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
822*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_mass,'rho',1,
823*86a4271fSThilina Rathnayake     $  ceed_eval_none,err)
824*86a4271fSThilina Rathnayake      call ceedqfunctionaddoutput(qf_mass,'v',1,
825*86a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
826*86a4271fSThilina Rathnayake
827*86a4271fSThilina RathnayakeC     Create ceed operators
828*86a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_setup,
829*86a4271fSThilina Rathnayake     $  ceed_null,ceed_null,op_setup,err)
830*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'x',erstrctx,
831*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_active,err)
832*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
833*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_active,err)
834*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'weight',erstrctx,
835*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_none,err)
836*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rho',erstrctw,
837*86a4271fSThilina Rathnayake     $  ceed_notranspose,ceed_basis_collocated,
838*86a4271fSThilina Rathnayake     $  ceed_vector_active,err)
839*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
840*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,vec_rhs,err)
841*86a4271fSThilina Rathnayake
842*86a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_mass,
843*86a4271fSThilina Rathnayake     $  ceed_null,ceed_null,op_mass,err)
844*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_mass,'u',erstrctu,
845*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,ceed_vector_active,err)
846*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_mass,'rho',erstrctw,
847*86a4271fSThilina Rathnayake     $  ceed_notranspose,ceed_basis_collocated,
848*86a4271fSThilina Rathnayake     $  vec_qdata,err)
849*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_mass,'v',erstrctu,
850*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,ceed_vector_active,err)
851*86a4271fSThilina Rathnayake
852*86a4271fSThilina RathnayakeC     Compute setup data
853*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
854*86a4271fSThilina Rathnayake     $  ceed_use_pointer,r1,offset,err)
855*86a4271fSThilina Rathnayake      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
856*86a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
857*86a4271fSThilina Rathnayake
858*86a4271fSThilina RathnayakeC     Set up true RHS
859*86a4271fSThilina Rathnayake      call dssum         (r1,nx1,ny1,nz1)          ! r1
860*86a4271fSThilina Rathnayake
861*86a4271fSThilina RathnayakeC     Set up algebraic RHS with libCEED
862*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,
863*86a4271fSThilina Rathnayake     $  ceed_use_pointer,h1,offset,err)
864*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
865*86a4271fSThilina Rathnayake     $  ceed_use_pointer,r2,offset,err)
866*86a4271fSThilina Rathnayake      call ceedoperatorapply(op_mass,vec_p1,vec_ap1,
867*86a4271fSThilina Rathnayake     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
868*86a4271fSThilina Rathnayake      call dssum         (r2,nx1,ny1,nz1)
869*86a4271fSThilina Rathnayake
870*86a4271fSThilina RathnayakeC     Set up algebraic RHS with Nek5000
871*86a4271fSThilina Rathnayake      call axhm1         (pap,r3,h1,h1,h2,'bp1')   ! r3 = A_nek5k*h1
872*86a4271fSThilina Rathnayake      call dssum         (r3,nx1,ny1,nz1)
873*86a4271fSThilina Rathnayake
874*86a4271fSThilina Rathnayake      call nekgsync()
875*86a4271fSThilina Rathnayake
876*86a4271fSThilina RathnayakeC     Solve true RHS
877*86a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED true RHS"
878*86a4271fSThilina Rathnayake      tstart = dnekclock()
879*86a4271fSThilina Rathnayake      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass,
880*86a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
881*86a4271fSThilina Rathnayake      tstop  = dnekclock()
882*86a4271fSThilina Rathnayake
883*86a4271fSThilina RathnayakeC     Output
884*86a4271fSThilina Rathnayake      telaps = (tstop-tstart)
885*86a4271fSThilina Rathnayake      maxits = maxit
886*86a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
887*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
888*86a4271fSThilina Rathnayake
889*86a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
890*86a4271fSThilina Rathnayake        if (maxit>=100) then
891*86a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
892*86a4271fSThilina Rathnayake        endif
893*86a4271fSThilina Rathnayake        if (dabs(er1)>5e-3) then
894*86a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
895*86a4271fSThilina Rathnayake        endif
896*86a4271fSThilina Rathnayake      endif
897*86a4271fSThilina Rathnayake
898*86a4271fSThilina Rathnayake      nx     = nx1-1
899*86a4271fSThilina Rathnayake      ndof   = nelgt           ! ndofs
900*86a4271fSThilina Rathnayake      ndof   = ndof*(nx**ldim) ! ndofs
901*86a4271fSThilina Rathnayake      nppp   = ndof/np         ! ndofs/proc
902*86a4271fSThilina Rathnayake
903*86a4271fSThilina Rathnayake      dofpss = ndof/telaps     ! DOF/sec - scalar form
904*86a4271fSThilina Rathnayake      titers = telaps/maxits   ! time per iteration
905*86a4271fSThilina Rathnayake      tppp_s = titers/nppp     ! time per iteraton per local point
906*86a4271fSThilina Rathnayake
907*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
908*86a4271fSThilina Rathnayake     $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s
909*86a4271fSThilina Rathnayake
910*86a4271fSThilina RathnayakeC     Solve libCEED algebraic RHS
911*86a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
912*86a4271fSThilina Rathnayake      maxit = 100
913*86a4271fSThilina Rathnayake      tstart = dnekclock()
914*86a4271fSThilina Rathnayake      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass,
915*86a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
916*86a4271fSThilina Rathnayake      tstop  = dnekclock()
917*86a4271fSThilina Rathnayake
918*86a4271fSThilina RathnayakeC     Output
919*86a4271fSThilina Rathnayake      telaps = (tstop-tstart)
920*86a4271fSThilina Rathnayake      maxits = maxit
921*86a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
922*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
923*86a4271fSThilina Rathnayake
924*86a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
925*86a4271fSThilina Rathnayake        if (maxit>=100) then
926*86a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
927*86a4271fSThilina Rathnayake        endif
928*86a4271fSThilina Rathnayake        if (dabs(er1)>1e-5) then
929*86a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
930*86a4271fSThilina Rathnayake        endif
931*86a4271fSThilina Rathnayake      endif
932*86a4271fSThilina Rathnayake
933*86a4271fSThilina Rathnayake      nx     = nx1-1
934*86a4271fSThilina Rathnayake      ndof   = nelgt           ! ndofs
935*86a4271fSThilina Rathnayake      ndof   = ndof*(nx**ldim) ! ndofs
936*86a4271fSThilina Rathnayake      nppp   = ndof/np         ! ndofs/proc
937*86a4271fSThilina Rathnayake
938*86a4271fSThilina Rathnayake      dofpss = ndof/telaps     ! DOF/sec - scalar form
939*86a4271fSThilina Rathnayake      titers = telaps/maxits   ! time per iteration
940*86a4271fSThilina Rathnayake      tppp_s = titers/nppp     ! time per iteraton per local point
941*86a4271fSThilina Rathnayake
942*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
943*86a4271fSThilina Rathnayake     $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s
944*86a4271fSThilina Rathnayake
945*86a4271fSThilina RathnayakeC     Solve Nek5000 algebraic RHS
946*86a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
947*86a4271fSThilina Rathnayake      maxit = 100
948*86a4271fSThilina Rathnayake      tstart = dnekclock()
949*86a4271fSThilina Rathnayake      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass,
950*86a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
951*86a4271fSThilina Rathnayake      tstop  = dnekclock()
952*86a4271fSThilina Rathnayake
953*86a4271fSThilina RathnayakeC     Output
954*86a4271fSThilina Rathnayake      telaps = (tstop-tstart)
955*86a4271fSThilina Rathnayake      maxits = maxit
956*86a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
957*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
958*86a4271fSThilina Rathnayake
959*86a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
960*86a4271fSThilina Rathnayake        if (maxit>=100) then
961*86a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
962*86a4271fSThilina Rathnayake        endif
963*86a4271fSThilina Rathnayake        if (dabs(er1)>1e-5) then
964*86a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
965*86a4271fSThilina Rathnayake        endif
966*86a4271fSThilina Rathnayake      endif
967*86a4271fSThilina Rathnayake
968*86a4271fSThilina Rathnayake      nx     = nx1-1
969*86a4271fSThilina Rathnayake      ndof   = nelgt           ! ndofs
970*86a4271fSThilina Rathnayake      ndof   = ndof*(nx**ldim) ! ndofs
971*86a4271fSThilina Rathnayake      nppp   = ndof/np         ! ndofs/proc
972*86a4271fSThilina Rathnayake
973*86a4271fSThilina Rathnayake      dofpss = ndof/telaps     ! DOF/sec - scalar form
974*86a4271fSThilina Rathnayake      titers = telaps/maxits   ! time per iteration
975*86a4271fSThilina Rathnayake      tppp_s = titers/nppp     ! time per iteraton per local point
976*86a4271fSThilina Rathnayake
977*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
978*86a4271fSThilina Rathnayake     $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s
979*86a4271fSThilina Rathnayake
980*86a4271fSThilina Rathnayake    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
981*86a4271fSThilina Rathnayake    3 format(i3,i9,e12.4,1x,a8,i9)
982*86a4271fSThilina Rathnayake
983*86a4271fSThilina RathnayakeC     Destroy ceed handles
984*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_p1,err)
985*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_ap1,err)
986*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_rhs,err)
987*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_qdata,err)
988*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_coords,err)
989*86a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctu,err)
990*86a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctx,err)
991*86a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctw,err)
992*86a4271fSThilina Rathnayake      call ceedbasisdestroy(basisu,err)
993*86a4271fSThilina Rathnayake      call ceedbasisdestroy(basisx,err)
994*86a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_setup,err)
995*86a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_mass,err)
996*86a4271fSThilina Rathnayake      call ceedoperatordestroy(op_setup,err)
997*86a4271fSThilina Rathnayake      call ceedoperatordestroy(op_mass,err)
998*86a4271fSThilina Rathnayake      call ceeddestroy(ceed,err)
999*86a4271fSThilina Rathnayake
1000*86a4271fSThilina Rathnayake      return
1001*86a4271fSThilina Rathnayake      end
1002*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1003*86a4271fSThilina Rathnayake      subroutine bp3
1004*86a4271fSThilina RathnayakeC     Solution to BP3 using libCEED
1005*86a4271fSThilina Rathnayake      include 'SIZE'
1006*86a4271fSThilina Rathnayake      include 'TOTAL'
1007*86a4271fSThilina Rathnayake      include 'CTIMER'  ! ifsync
1008*86a4271fSThilina Rathnayake      include 'FDMH1'
1009*86a4271fSThilina Rathnayake      include 'ceedf.h'
1010*86a4271fSThilina Rathnayake
1011*86a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
1012*86a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
1013*86a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
1014*86a4271fSThilina Rathnayake
1015*86a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
1016*86a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
1017*86a4271fSThilina Rathnayake      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
1018*86a4271fSThilina Rathnayake      common /vcrny/ e1(lt)
1019*86a4271fSThilina Rathnayake      common /vcrvh/ h1(lt),h2(ld),pap(3)
1020*86a4271fSThilina Rathnayake      real*8 coords(ldim*lx*lelt)
1021*86a4271fSThilina Rathnayake
1022*86a4271fSThilina Rathnayake      logical ifh3
1023*86a4271fSThilina Rathnayake      integer*8 ndof
1024*86a4271fSThilina Rathnayake      integer ceed,err,test
1025*86a4271fSThilina Rathnayake      character*64 spec
1026*86a4271fSThilina Rathnayake
1027*86a4271fSThilina Rathnayake      integer p,q,ncomp,edof,ldof
1028*86a4271fSThilina Rathnayake      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
1029*86a4271fSThilina Rathnayake      integer erstrctu,erstrctx,erstrctw
1030*86a4271fSThilina Rathnayake      integer basisu,basisx
1031*86a4271fSThilina Rathnayake      integer qf_diffusion,qf_setup
1032*86a4271fSThilina Rathnayake      integer op_diffusion,op_setup
1033*86a4271fSThilina Rathnayake      integer ii,i,ngeo
1034*86a4271fSThilina Rathnayake      real*8  x,y,z
1035*86a4271fSThilina Rathnayake      integer*8 offset
1036*86a4271fSThilina Rathnayake
1037*86a4271fSThilina Rathnayake      external diffusionf,diffsetupf
1038*86a4271fSThilina Rathnayake
1039*86a4271fSThilina Rathnayake      ifield = 1
1040*86a4271fSThilina Rathnayake      nxq    = nx1+1
1041*86a4271fSThilina Rathnayake      n = nx1*ny1*nz1*nelt
1042*86a4271fSThilina Rathnayake
1043*86a4271fSThilina Rathnayake      ifsync = .false.
1044*86a4271fSThilina Rathnayake
1045*86a4271fSThilina RathnayakeC     Set up coordinates and mask
1046*86a4271fSThilina Rathnayake      ii=0
1047*86a4271fSThilina Rathnayake      do j=0,nelt-1
1048*86a4271fSThilina Rathnayake      do i=1,lx
1049*86a4271fSThilina Rathnayake        ii=ii+1
1050*86a4271fSThilina Rathnayake        x = xm1(ii,1,1,1)
1051*86a4271fSThilina Rathnayake        y = ym1(ii,1,1,1)
1052*86a4271fSThilina Rathnayake        z = zm1(ii,1,1,1)
1053*86a4271fSThilina Rathnayake        coords(i+0*lx+3*j*lx)=x
1054*86a4271fSThilina Rathnayake        coords(i+1*lx+3*j*lx)=y
1055*86a4271fSThilina Rathnayake        coords(i+2*lx+3*j*lx)=z
1056*86a4271fSThilina Rathnayake        if ( x.eq.0.or.x.eq.1
1057*86a4271fSThilina Rathnayake     $   .or.y.eq.0.or.y.eq.1
1058*86a4271fSThilina Rathnayake     $   .or.z.eq.0.or.z.eq.1 ) then
1059*86a4271fSThilina Rathnayake          h2(ii)=0.
1060*86a4271fSThilina Rathnayake        else
1061*86a4271fSThilina Rathnayake          h2(ii)=1.
1062*86a4271fSThilina Rathnayake        endif
1063*86a4271fSThilina Rathnayake      enddo
1064*86a4271fSThilina Rathnayake      enddo
1065*86a4271fSThilina Rathnayake
1066*86a4271fSThilina RathnayakeC     Init ceed library
1067*86a4271fSThilina Rathnayake      call get_spec(spec)
1068*86a4271fSThilina Rathnayake      call ceedinit(trim(spec)//char(0),ceed,err)
1069*86a4271fSThilina Rathnayake
1070*86a4271fSThilina Rathnayake      call get_test(test)
1071*86a4271fSThilina Rathnayake
1072*86a4271fSThilina RathnayakeC     Set up Nek geometry data
1073*86a4271fSThilina Rathnayake      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
1074*86a4271fSThilina Rathnayake
1075*86a4271fSThilina RathnayakeC     Set up true soln
1076*86a4271fSThilina Rathnayake      call sin_fld_h1    (e1)
1077*86a4271fSThilina Rathnayake      call xmask1        (e1,h2,nelt)
1078*86a4271fSThilina Rathnayake      call copy          (h1,e1,n)                 ! Save exact soln in h1
1079*86a4271fSThilina Rathnayake
1080*86a4271fSThilina RathnayakeC     Set up solver parameters
1081*86a4271fSThilina Rathnayake      tol       = 1e-10
1082*86a4271fSThilina Rathnayake      param(22) = tol
1083*86a4271fSThilina Rathnayake      maxit     = 100
1084*86a4271fSThilina Rathnayake
1085*86a4271fSThilina Rathnayake      call nekgsync()
1086*86a4271fSThilina Rathnayake
1087*86a4271fSThilina RathnayakeC     Create ceed basis for mesh and computation
1088*86a4271fSThilina Rathnayake      p=nx1
1089*86a4271fSThilina Rathnayake      q=p+1
1090*86a4271fSThilina Rathnayake      ncomp=1
1091*86a4271fSThilina Rathnayake      call ceedbasiscreatetensorh1lagrange(ceed,ldim,3*ncomp,p,q,
1092*86a4271fSThilina Rathnayake     $  ceed_gauss,basisx,err)
1093*86a4271fSThilina Rathnayake      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncomp,p,q,
1094*86a4271fSThilina Rathnayake     $  ceed_gauss,basisu,err)
1095*86a4271fSThilina Rathnayake
1096*86a4271fSThilina RathnayakeC     Create ceed element restrictions for mesh and computation
1097*86a4271fSThilina Rathnayake      edof=p**ldim
1098*86a4271fSThilina Rathnayake      ldof=edof*nelt*ncomp
1099*86a4271fSThilina Rathnayake      ngeo=(ldim*(ldim+1))/2
1100*86a4271fSThilina Rathnayake      call ceedelemrestrictioncreateidentity(ceed,nelt,edof,ldof,
1101*86a4271fSThilina Rathnayake     $  ldim,erstrctx,err)
1102*86a4271fSThilina Rathnayake      call ceedelemrestrictioncreateidentity(ceed,nelt,edof,ldof,
1103*86a4271fSThilina Rathnayake     $  ncomp,erstrctu,err)
1104*86a4271fSThilina Rathnayake      call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim,
1105*86a4271fSThilina Rathnayake     $  nelt*q**ldim,ngeo,erstrctw,err)
1106*86a4271fSThilina Rathnayake
1107*86a4271fSThilina RathnayakeC     Create ceed vectors
1108*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldof,vec_p1,err)
1109*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldof,vec_ap1,err)
1110*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldof,vec_rhs,err)
1111*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
1112*86a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err)
1113*86a4271fSThilina Rathnayake
1114*86a4271fSThilina Rathnayake      offset=0
1115*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_coords,ceed_mem_host,
1116*86a4271fSThilina Rathnayake     $  ceed_use_pointer,coords,offset,err)
1117*86a4271fSThilina Rathnayake
1118*86a4271fSThilina RathnayakeC     Create ceed qfunctions for diffsetupf and diffusionf
1119*86a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,diffsetupf,
1120*86a4271fSThilina Rathnayake     $  __FILE__
1121*86a4271fSThilina Rathnayake     $  //':diffsetupf'//char(0),qf_setup,err)
1122*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_setup,'x',ldim,
1123*86a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
1124*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_setup,'dx',ldim,
1125*86a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
1126*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_setup,'weight',1,
1127*86a4271fSThilina Rathnayake     $  ceed_eval_weight,err)
1128*86a4271fSThilina Rathnayake      call ceedqfunctionaddoutput(qf_setup,'rho',ngeo,
1129*86a4271fSThilina Rathnayake     $  ceed_eval_none,err)
1130*86a4271fSThilina Rathnayake      call ceedqfunctionaddoutput(qf_setup,'rhs',1,
1131*86a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
1132*86a4271fSThilina Rathnayake
1133*86a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,diffusionf,
1134*86a4271fSThilina Rathnayake     $  __FILE__
1135*86a4271fSThilina Rathnayake     $  //':diffusionf'//char(0),qf_diffusion,err)
1136*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_diffusion,'u',1,
1137*86a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
1138*86a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_diffusion,'rho',ngeo,
1139*86a4271fSThilina Rathnayake     $  ceed_eval_none,err)
1140*86a4271fSThilina Rathnayake      call ceedqfunctionaddoutput(qf_diffusion,'v',1,
1141*86a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
1142*86a4271fSThilina Rathnayake
1143*86a4271fSThilina RathnayakeC     Create ceed operators
1144*86a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_setup,
1145*86a4271fSThilina Rathnayake     $  ceed_null,ceed_null,op_setup,err)
1146*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'x',erstrctx,
1147*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_active,err)
1148*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
1149*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_active,err)
1150*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'weight',erstrctx,
1151*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_none,err)
1152*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rho',erstrctw,
1153*86a4271fSThilina Rathnayake     $  ceed_notranspose,ceed_basis_collocated,
1154*86a4271fSThilina Rathnayake     $  ceed_vector_active,err)
1155*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
1156*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,vec_rhs,err)
1157*86a4271fSThilina Rathnayake
1158*86a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_diffusion,
1159*86a4271fSThilina Rathnayake     $  ceed_null,ceed_null,op_diffusion,err)
1160*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_diffusion,'u',erstrctu,
1161*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,ceed_vector_active,err)
1162*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_diffusion,'rho',erstrctw,
1163*86a4271fSThilina Rathnayake     $  ceed_notranspose,ceed_basis_collocated,
1164*86a4271fSThilina Rathnayake     $  vec_qdata,err)
1165*86a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_diffusion,'v',erstrctu,
1166*86a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,ceed_vector_active,err)
1167*86a4271fSThilina Rathnayake
1168*86a4271fSThilina RathnayakeC     Compute setup data
1169*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
1170*86a4271fSThilina Rathnayake     $  ceed_use_pointer,r1,offset,err)
1171*86a4271fSThilina Rathnayake      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
1172*86a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
1173*86a4271fSThilina Rathnayake
1174*86a4271fSThilina RathnayakeC     Set up true RHS
1175*86a4271fSThilina Rathnayake      call dssum         (r1,nx1,ny1,nz1)          ! r1
1176*86a4271fSThilina Rathnayake      call xmask1        (r1,h2,nelt)
1177*86a4271fSThilina Rathnayake
1178*86a4271fSThilina RathnayakeC     Set up algebraic RHS with libCEED
1179*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,
1180*86a4271fSThilina Rathnayake     $  ceed_use_pointer,h1,offset,err)
1181*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
1182*86a4271fSThilina Rathnayake     $  ceed_use_pointer,r2,offset,err)
1183*86a4271fSThilina Rathnayake      call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1,
1184*86a4271fSThilina Rathnayake     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
1185*86a4271fSThilina Rathnayake      call dssum         (r2,nx1,ny1,nz1)
1186*86a4271fSThilina Rathnayake      call xmask1        (r2,h2,nelt)
1187*86a4271fSThilina Rathnayake
1188*86a4271fSThilina RathnayakeC     Set up algebraic RHS with Nek5000
1189*86a4271fSThilina Rathnayake      call axhm1         (pap,r3,h1,h1,h2,'bp3')   ! r3 = A_nek5k*h1
1190*86a4271fSThilina Rathnayake      call dssum         (r3,nx1,ny1,nz1)
1191*86a4271fSThilina Rathnayake      call xmask1        (r3,h2,nelt)
1192*86a4271fSThilina Rathnayake
1193*86a4271fSThilina Rathnayake      call nekgsync()
1194*86a4271fSThilina Rathnayake
1195*86a4271fSThilina RathnayakeC     Solve true RHS
1196*86a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED true RHS"
1197*86a4271fSThilina Rathnayake      tstart = dnekclock()
1198*86a4271fSThilina Rathnayake      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1199*86a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
1200*86a4271fSThilina Rathnayake      tstop  = dnekclock()
1201*86a4271fSThilina Rathnayake
1202*86a4271fSThilina RathnayakeC     Output
1203*86a4271fSThilina Rathnayake      telaps = (tstop-tstart)
1204*86a4271fSThilina Rathnayake      maxits = maxit
1205*86a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
1206*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1207*86a4271fSThilina Rathnayake
1208*86a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
1209*86a4271fSThilina Rathnayake        if (maxit>=100) then
1210*86a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
1211*86a4271fSThilina Rathnayake        endif
1212*86a4271fSThilina Rathnayake        if (dabs(er1)>1e-3) then
1213*86a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
1214*86a4271fSThilina Rathnayake        endif
1215*86a4271fSThilina Rathnayake      endif
1216*86a4271fSThilina Rathnayake
1217*86a4271fSThilina Rathnayake      nx     = nx1-1
1218*86a4271fSThilina Rathnayake      ndof   = nelgt           ! ndofs
1219*86a4271fSThilina Rathnayake      ndof   = ndof*(nx**ldim) ! ndofs
1220*86a4271fSThilina Rathnayake      nppp   = ndof/np         ! ndofs/proc
1221*86a4271fSThilina Rathnayake
1222*86a4271fSThilina Rathnayake      dofpss = ndof/telaps     ! DOF/sec - scalar form
1223*86a4271fSThilina Rathnayake      titers = telaps/maxits   ! time per iteration
1224*86a4271fSThilina Rathnayake      tppp_s = titers/nppp     ! time per iteraton per local point
1225*86a4271fSThilina Rathnayake
1226*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
1227*86a4271fSThilina Rathnayake     $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s
1228*86a4271fSThilina Rathnayake
1229*86a4271fSThilina RathnayakeC     Solve libCEED algebraic RHS
1230*86a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
1231*86a4271fSThilina Rathnayake      maxit = 100
1232*86a4271fSThilina Rathnayake      tstart = dnekclock()
1233*86a4271fSThilina Rathnayake      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1234*86a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
1235*86a4271fSThilina Rathnayake      tstop  = dnekclock()
1236*86a4271fSThilina Rathnayake
1237*86a4271fSThilina RathnayakeC     Output
1238*86a4271fSThilina Rathnayake      telaps = (tstop-tstart)
1239*86a4271fSThilina Rathnayake      maxits = maxit
1240*86a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
1241*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1242*86a4271fSThilina Rathnayake
1243*86a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
1244*86a4271fSThilina Rathnayake        if (maxit>=100) then
1245*86a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
1246*86a4271fSThilina Rathnayake        endif
1247*86a4271fSThilina Rathnayake        if (dabs(er1)>1e-9) then
1248*86a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
1249*86a4271fSThilina Rathnayake        endif
1250*86a4271fSThilina Rathnayake      endif
1251*86a4271fSThilina Rathnayake
1252*86a4271fSThilina Rathnayake      nx     = nx1-1
1253*86a4271fSThilina Rathnayake      ndof   = nelgt           ! ndofs
1254*86a4271fSThilina Rathnayake      ndof   = ndof*(nx**ldim) ! ndofs
1255*86a4271fSThilina Rathnayake      nppp   = ndof/np         ! ndofs/proc
1256*86a4271fSThilina Rathnayake
1257*86a4271fSThilina Rathnayake      dofpss = ndof/telaps     ! DOF/sec - scalar form
1258*86a4271fSThilina Rathnayake      titers = telaps/maxits   ! time per iteration
1259*86a4271fSThilina Rathnayake      tppp_s = titers/nppp     ! time per iteraton per local point
1260*86a4271fSThilina Rathnayake
1261*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
1262*86a4271fSThilina Rathnayake     $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s
1263*86a4271fSThilina Rathnayake
1264*86a4271fSThilina RathnayakeC     Solve Nek5000 algebraic RHS
1265*86a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
1266*86a4271fSThilina Rathnayake      maxit = 100
1267*86a4271fSThilina Rathnayake      tstart = dnekclock()
1268*86a4271fSThilina Rathnayake      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
1269*86a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
1270*86a4271fSThilina Rathnayake      tstop  = dnekclock()
1271*86a4271fSThilina Rathnayake
1272*86a4271fSThilina RathnayakeC     Output
1273*86a4271fSThilina Rathnayake      telaps = (tstop-tstart)
1274*86a4271fSThilina Rathnayake      maxits = maxit
1275*86a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
1276*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
1277*86a4271fSThilina Rathnayake
1278*86a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
1279*86a4271fSThilina Rathnayake        if (maxit>=100) then
1280*86a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
1281*86a4271fSThilina Rathnayake        endif
1282*86a4271fSThilina Rathnayake        if (dabs(er1)>1e-9) then
1283*86a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
1284*86a4271fSThilina Rathnayake        endif
1285*86a4271fSThilina Rathnayake      endif
1286*86a4271fSThilina Rathnayake
1287*86a4271fSThilina Rathnayake      nx     = nx1-1
1288*86a4271fSThilina Rathnayake      ndof   = nelgt           ! ndofs
1289*86a4271fSThilina Rathnayake      ndof   = ndof*(nx**ldim) ! ndofs
1290*86a4271fSThilina Rathnayake      nppp   = ndof/np         ! ndofs/proc
1291*86a4271fSThilina Rathnayake
1292*86a4271fSThilina Rathnayake      dofpss = ndof/telaps     ! DOF/sec - scalar form
1293*86a4271fSThilina Rathnayake      titers = telaps/maxits   ! time per iteration
1294*86a4271fSThilina Rathnayake      tppp_s = titers/nppp     ! time per iteraton per local point
1295*86a4271fSThilina Rathnayake
1296*86a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
1297*86a4271fSThilina Rathnayake     $ ,np,nx,nelt,nelgt,ndof,nppp,maxits,telaps,dofpss,titers,tppp_s
1298*86a4271fSThilina Rathnayake
1299*86a4271fSThilina Rathnayake    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
1300*86a4271fSThilina Rathnayake    3 format(i3,i9,e12.4,1x,a8,i9)
1301*86a4271fSThilina Rathnayake
1302*86a4271fSThilina RathnayakeC     Destroy ceed handles
1303*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_p1,err)
1304*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_ap1,err)
1305*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_rhs,err)
1306*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_qdata,err)
1307*86a4271fSThilina Rathnayake      call ceedvectordestroy(vec_coords,err)
1308*86a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctu,err)
1309*86a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctx,err)
1310*86a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctw,err)
1311*86a4271fSThilina Rathnayake      call ceedbasisdestroy(basisu,err)
1312*86a4271fSThilina Rathnayake      call ceedbasisdestroy(basisx,err)
1313*86a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_setup,err)
1314*86a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_diffusion,err)
1315*86a4271fSThilina Rathnayake      call ceedoperatordestroy(op_setup,err)
1316*86a4271fSThilina Rathnayake      call ceedoperatordestroy(op_diffusion,err)
1317*86a4271fSThilina Rathnayake      call ceeddestroy(ceed,err)
1318*86a4271fSThilina Rathnayake
1319*86a4271fSThilina Rathnayake      return
1320*86a4271fSThilina Rathnayake      end
1321*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1322*86a4271fSThilina Rathnayake      subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,
1323*86a4271fSThilina Rathnayake     $  vec_ap1,maxit,bpname)
1324*86a4271fSThilina RathnayakeC     Scalar conjugate gradient iteration for solution of uncoupled
1325*86a4271fSThilina RathnayakeC     Helmholtz equations
1326*86a4271fSThilina RathnayakeC     Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname
1327*86a4271fSThilina RathnayakeC     Output: u1,maxit
1328*86a4271fSThilina Rathnayake      include 'SIZE'
1329*86a4271fSThilina Rathnayake      include 'TOTAL'
1330*86a4271fSThilina Rathnayake      include 'DOMAIN'
1331*86a4271fSThilina Rathnayake      include 'FDMH1'
1332*86a4271fSThilina Rathnayake      character*3 bpname
1333*86a4271fSThilina Rathnayake
1334*86a4271fSThilina RathnayakeC     INPUT:  rhs1 - rhs
1335*86a4271fSThilina RathnayakeC             h1   - exact solution
1336*86a4271fSThilina Rathnayake
1337*86a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
1338*86a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
1339*86a4271fSThilina Rathnayake      real*8 u1(lt),r1(lt),h1(lt),h2(lt)
1340*86a4271fSThilina Rathnayake      real*8 rmult(1),binv(1)
1341*86a4271fSThilina Rathnayake      integer ceed,ceed_op,vec_ap1,vec_p1
1342*86a4271fSThilina Rathnayake      common /scrcg/ dpc(lt),p1(lt),z1(lt)
1343*86a4271fSThilina Rathnayake      common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4)
1344*86a4271fSThilina Rathnayake
1345*86a4271fSThilina Rathnayake      real*8 ap1(lt)
1346*86a4271fSThilina Rathnayake      equivalence (ap1,z1)
1347*86a4271fSThilina Rathnayake
1348*86a4271fSThilina Rathnayake      vol   = volfld(ifield)
1349*86a4271fSThilina Rathnayake      nel   = nelfld(ifield)
1350*86a4271fSThilina Rathnayake      nxyz  = lx1*ly1*lz1
1351*86a4271fSThilina Rathnayake      n     = nxyz*nel
1352*86a4271fSThilina Rathnayake      nx    = nx1-1                  ! Polynomial order (just for i/o)
1353*86a4271fSThilina Rathnayake
1354*86a4271fSThilina Rathnayake      tol=tin
1355*86a4271fSThilina Rathnayake
1356*86a4271fSThilina Rathnayake      if(bpname.ne.'bp1') then
1357*86a4271fSThilina Rathnayake        call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner
1358*86a4271fSThilina Rathnayake      else
1359*86a4271fSThilina Rathnayake        call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner
1360*86a4271fSThilina Rathnayake      endif
1361*86a4271fSThilina Rathnayake
1362*86a4271fSThilina Rathnayake      call rzero         (u1,n)      ! Initialize solution
1363*86a4271fSThilina Rathnayake
1364*86a4271fSThilina Rathnayake      wv(1)=0
1365*86a4271fSThilina Rathnayake      do i=1,n
1366*86a4271fSThilina Rathnayake         s=rmult(i)                  !      -1
1367*86a4271fSThilina Rathnayake         p1(i)=dpc(i)*r1(i)          ! p = M  r      T
1368*86a4271fSThilina Rathnayake         wv(1)=wv(1)+s*p1(i)*r1(i)   !              r p
1369*86a4271fSThilina Rathnayake      enddo
1370*86a4271fSThilina Rathnayake      call gop(wv(1),wk,'+  ',1)
1371*86a4271fSThilina Rathnayake      rpp1(1) = wv  (1)
1372*86a4271fSThilina Rathnayake
1373*86a4271fSThilina Rathnayake      do 1000 iter=1,maxit
1374*86a4271fSThilina Rathnayake         call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op,
1375*86a4271fSThilina Rathnayake     $     vec_ap1,vec_p1)
1376*86a4271fSThilina Rathnayake         call dssum    (ap1,nx1,ny1,nz1)
1377*86a4271fSThilina Rathnayake         if (bpname.ne.'bp1') call xmask1(ap1,h2,nel)
1378*86a4271fSThilina Rathnayake
1379*86a4271fSThilina Rathnayake         call gop      (pap,wk,'+  ',1)
1380*86a4271fSThilina Rathnayake         alph(1) = rpp1(1)/pap(1)
1381*86a4271fSThilina Rathnayake
1382*86a4271fSThilina Rathnayake         do i=1,n
1383*86a4271fSThilina Rathnayake            u1(i)=u1(i)+alph(1)* p1(i)
1384*86a4271fSThilina Rathnayake            r1(i)=r1(i)-alph(1)*ap1(i)
1385*86a4271fSThilina Rathnayake         enddo
1386*86a4271fSThilina Rathnayake
1387*86a4271fSThilina RathnayakeC        tolerance check here
1388*86a4271fSThilina Rathnayake         call rzero(wv,2)
1389*86a4271fSThilina Rathnayake         do i=1,n
1390*86a4271fSThilina Rathnayake            wv(1)=wv(1)+r1(i)*r1(i)            ! L2 error estimate
1391*86a4271fSThilina Rathnayake            z1(i)=dpc(i)*r1(i)                 ! z = M  r
1392*86a4271fSThilina Rathnayake            wv(2)=wv(2)+rmult(i)*z1(i)*r1(i)   ! r z
1393*86a4271fSThilina Rathnayake         enddo
1394*86a4271fSThilina Rathnayake         call gop(wv,wk,'+  ',2)
1395*86a4271fSThilina Rathnayake
1396*86a4271fSThilina RathnayakeC        if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1)
1397*86a4271fSThilina Rathnayake  1     format(i2,i9,i5,i4,1p1e12.4,' cggos')
1398*86a4271fSThilina Rathnayake
1399*86a4271fSThilina Rathnayake         enorm=sqrt(wv(1))
1400*86a4271fSThilina Rathnayake         if (enorm.lt.tol) then
1401*86a4271fSThilina Rathnayake            ifin = iter
1402*86a4271fSThilina Rathnayake            if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol
1403*86a4271fSThilina Rathnayake            goto 9999
1404*86a4271fSThilina Rathnayake         endif
1405*86a4271fSThilina RathnayakeC        if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha'
1406*86a4271fSThilina Rathnayake 2      format(i5,1p3e12.4,2x,a5)
1407*86a4271fSThilina Rathnayake
1408*86a4271fSThilina Rathnayake         rpp2(1)=rpp1(1)
1409*86a4271fSThilina Rathnayake         rpp1(1)=wv  (2)
1410*86a4271fSThilina Rathnayake         beta1  =rpp1(1)/rpp2(1)
1411*86a4271fSThilina Rathnayake         do i=1,n
1412*86a4271fSThilina Rathnayake            p1(i)=z1(i) + beta1*p1(i)
1413*86a4271fSThilina Rathnayake         enddo
1414*86a4271fSThilina Rathnayake
1415*86a4271fSThilina Rathnayake 1000 continue
1416*86a4271fSThilina Rathnayake
1417*86a4271fSThilina Rathnayake      rbnorm=sqrt(wv(1))
1418*86a4271fSThilina Rathnayake      if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol
1419*86a4271fSThilina Rathnayake      iter = iter-1
1420*86a4271fSThilina Rathnayake
1421*86a4271fSThilina Rathnayake 9999 continue
1422*86a4271fSThilina Rathnayake
1423*86a4271fSThilina Rathnayake      maxit=iter
1424*86a4271fSThilina Rathnayake
1425*86a4271fSThilina Rathnayake 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4)
1426*86a4271fSThilina Rathnayake 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6)
1427*86a4271fSThilina Rathnayake
1428*86a4271fSThilina Rathnayake      return
1429*86a4271fSThilina Rathnayake      end
1430*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1431*86a4271fSThilina Rathnayake      subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op,
1432*86a4271fSThilina Rathnayake     $  vec_ap1,vec_p1)
1433*86a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of uncoupled
1434*86a4271fSThilina RathnayakeC     Helmholtz equations
1435*86a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1
1436*86a4271fSThilina RathnayakeC     Output: ap1
1437*86a4271fSThilina Rathnayake      include 'SIZE'
1438*86a4271fSThilina Rathnayake      include 'TOTAL'
1439*86a4271fSThilina Rathnayake      include 'ceedf.h'
1440*86a4271fSThilina Rathnayake
1441*86a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
1442*86a4271fSThilina Rathnayake      real*8       gf(lg,lx,lelt)             ! Equivalence new gf() data
1443*86a4271fSThilina Rathnayake      equivalence (gf,g1m1)                   ! layout to g1m1...g6m1
1444*86a4271fSThilina Rathnayake
1445*86a4271fSThilina Rathnayake      real*8   pap(3)
1446*86a4271fSThilina Rathnayake      real*8   ap1(lx,lelt)
1447*86a4271fSThilina Rathnayake      real*8    p1(lx,lelt)
1448*86a4271fSThilina Rathnayake      real*8    h1(lx,lelt),h2(lx,lelt)
1449*86a4271fSThilina Rathnayake      integer ceed,ceed_op,vec_ap1,vec_p1,err
1450*86a4271fSThilina Rathnayake      integer i,e
1451*86a4271fSThilina Rathnayake      integer*8 offset
1452*86a4271fSThilina Rathnayake
1453*86a4271fSThilina Rathnayake      offset=0
1454*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer,
1455*86a4271fSThilina Rathnayake     $  p1,offset,err)
1456*86a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer,
1457*86a4271fSThilina Rathnayake     $  ap1,offset,err)
1458*86a4271fSThilina Rathnayake
1459*86a4271fSThilina Rathnayake      call ceedoperatorapply(ceed_op,vec_p1,vec_ap1,
1460*86a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
1461*86a4271fSThilina Rathnayake
1462*86a4271fSThilina Rathnayake      call ceedvectorsyncarray(vec_ap1,ceed_mem_host,err)
1463*86a4271fSThilina Rathnayake
1464*86a4271fSThilina Rathnayake      pap(1)=0.
1465*86a4271fSThilina Rathnayake
1466*86a4271fSThilina Rathnayake      do e=1,nelt
1467*86a4271fSThilina Rathnayake         do i=1,lx
1468*86a4271fSThilina Rathnayake           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
1469*86a4271fSThilina Rathnayake         enddo
1470*86a4271fSThilina Rathnayake      enddo
1471*86a4271fSThilina Rathnayake
1472*86a4271fSThilina Rathnayake      return
1473*86a4271fSThilina Rathnayake      end
1474*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1475*86a4271fSThilina Rathnayake      subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut)
1476*86a4271fSThilina RathnayakeC     Local matrix-vector for solution of BP3 (stiffness matrix)
1477*86a4271fSThilina RathnayakeC     Input: u,g,h1,h2,b,ju,us,ut   Output: w
1478*86a4271fSThilina Rathnayake      include 'SIZE'
1479*86a4271fSThilina Rathnayake      include 'TOTAL'
1480*86a4271fSThilina Rathnayake
1481*86a4271fSThilina Rathnayake      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
1482*86a4271fSThilina Rathnayake      real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz)
1483*86a4271fSThilina Rathnayake      real*8 ju(lxyz),us(lxyz),ut(lxyz)
1484*86a4271fSThilina Rathnayake
1485*86a4271fSThilina Rathnayake      nxq = nx1+1 ! Number of quadrature points
1486*86a4271fSThilina Rathnayake
1487*86a4271fSThilina Rathnayake      lxyzq = nxq**ldim
1488*86a4271fSThilina Rathnayake
1489*86a4271fSThilina Rathnayake      call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation
1490*86a4271fSThilina Rathnayake      do i=1,lxyzq
1491*86a4271fSThilina Rathnayake         ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts
1492*86a4271fSThilina Rathnayake      enddo
1493*86a4271fSThilina Rathnayake      call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u
1494*86a4271fSThilina Rathnayake
1495*86a4271fSThilina Rathnayake      return
1496*86a4271fSThilina Rathnayake      end
1497*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1498*86a4271fSThilina Rathnayake      subroutine axhm1_bp1(pap,ap1,p1,h1,h2)
1499*86a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of BP1 (mass matrix)
1500*86a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2           Output: ap1
1501*86a4271fSThilina Rathnayake      include 'SIZE'
1502*86a4271fSThilina Rathnayake      include 'TOTAL'
1503*86a4271fSThilina Rathnayake
1504*86a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
1505*86a4271fSThilina Rathnayake      real*8         gf(lg,lx,lelt)             ! Equivalence new gf() data
1506*86a4271fSThilina Rathnayake      equivalence (gf,g1m1)                     ! layout to g1m1...g6m1
1507*86a4271fSThilina Rathnayake
1508*86a4271fSThilina Rathnayake      real*8 pap(3)
1509*86a4271fSThilina Rathnayake      real*8 ap1(lx,lelt)
1510*86a4271fSThilina Rathnayake      real*8  p1(lx,lelt)
1511*86a4271fSThilina Rathnayake      real*8  h1(lx,lelt),h2(lx,lelt)
1512*86a4271fSThilina Rathnayake
1513*86a4271fSThilina Rathnayake      real*8 ur(lx),us(lx),ut(lx)
1514*86a4271fSThilina Rathnayake      common /ctmp1/ ur,us,ut
1515*86a4271fSThilina Rathnayake
1516*86a4271fSThilina Rathnayake      integer e
1517*86a4271fSThilina Rathnayake
1518*86a4271fSThilina Rathnayake      pap(1)=0.
1519*86a4271fSThilina Rathnayake
1520*86a4271fSThilina Rathnayake      k=1
1521*86a4271fSThilina Rathnayake      nxq = nx1+1
1522*86a4271fSThilina Rathnayake
1523*86a4271fSThilina Rathnayake      do e=1,nelt
1524*86a4271fSThilina Rathnayake
1525*86a4271fSThilina Rathnayake         call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1)
1526*86a4271fSThilina Rathnayake     $                                          ,bm1(1,1,1,e),ur,us,ut)
1527*86a4271fSThilina Rathnayake         do i=1,lx
1528*86a4271fSThilina Rathnayake           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
1529*86a4271fSThilina Rathnayake         enddo
1530*86a4271fSThilina Rathnayake         k=k+nxq*nxq*nxq
1531*86a4271fSThilina Rathnayake
1532*86a4271fSThilina Rathnayake      enddo
1533*86a4271fSThilina Rathnayake
1534*86a4271fSThilina Rathnayake      return
1535*86a4271fSThilina Rathnayake      end
1536*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1537*86a4271fSThilina Rathnayake      subroutine ax_e_bp3(w,u,g,ur,us,ut,wk)
1538*86a4271fSThilina RathnayakeC     Local matrix-vector for solution of BP3 (stiffness matrix)
1539*86a4271fSThilina RathnayakeC     Input: u,g,ur,us,ut,wk        Output: w
1540*86a4271fSThilina Rathnayake      include 'SIZE'
1541*86a4271fSThilina Rathnayake      include 'TOTAL'
1542*86a4271fSThilina Rathnayake
1543*86a4271fSThilina Rathnayake      parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq)
1544*86a4271fSThilina Rathnayake
1545*86a4271fSThilina Rathnayake      common /ctmp0/ tmp(lxyzq)
1546*86a4271fSThilina Rathnayake      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
1547*86a4271fSThilina Rathnayake
1548*86a4271fSThilina Rathnayake      real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq)
1549*86a4271fSThilina Rathnayake      real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq)
1550*86a4271fSThilina Rathnayake
1551*86a4271fSThilina Rathnayake      n = lzq-1
1552*86a4271fSThilina Rathnayake
1553*86a4271fSThilina Rathnayake      call intp_rstd  (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation
1554*86a4271fSThilina Rathnayake      call loc_grad3  (ur,us,ut,wk,n,dxmq,dxtmq)
1555*86a4271fSThilina Rathnayake
1556*86a4271fSThilina Rathnayake      do i=1,lxyzq
1557*86a4271fSThilina Rathnayake         wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i)
1558*86a4271fSThilina Rathnayake         ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i)
1559*86a4271fSThilina Rathnayake         wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i)
1560*86a4271fSThilina Rathnayake         ur(i) = wr
1561*86a4271fSThilina Rathnayake         us(i) = ws
1562*86a4271fSThilina Rathnayake         ut(i) = wt
1563*86a4271fSThilina Rathnayake      enddo
1564*86a4271fSThilina Rathnayake
1565*86a4271fSThilina Rathnayake      call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp)
1566*86a4271fSThilina Rathnayake      call intp_rstd  (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u
1567*86a4271fSThilina Rathnayake
1568*86a4271fSThilina Rathnayake      return
1569*86a4271fSThilina Rathnayake      end
1570*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1571*86a4271fSThilina Rathnayake      subroutine axhm1_bp3(pap,ap1,p1,h1,h2)
1572*86a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of BP3 (stiffness matrix)
1573*86a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2           Output: ap1
1574*86a4271fSThilina Rathnayake      include 'SIZE'
1575*86a4271fSThilina Rathnayake      include 'TOTAL'
1576*86a4271fSThilina Rathnayake
1577*86a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
1578*86a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
1579*86a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq)
1580*86a4271fSThilina Rathnayake
1581*86a4271fSThilina Rathnayake      real*8 pap(3)
1582*86a4271fSThilina Rathnayake      real*8 ap1(lx,lelt)
1583*86a4271fSThilina Rathnayake      real*8  p1(lx,lelt)
1584*86a4271fSThilina Rathnayake      real*8  h1(lx,lelt),h2(lx,lelt)
1585*86a4271fSThilina Rathnayake
1586*86a4271fSThilina Rathnayake      common /ctmp1/ ur,us,ut,wk
1587*86a4271fSThilina Rathnayake      real*8 ur(lq),us(lq),ut(lq),wk(lq)
1588*86a4271fSThilina Rathnayake
1589*86a4271fSThilina Rathnayake      integer e
1590*86a4271fSThilina Rathnayake
1591*86a4271fSThilina Rathnayake      pap(1)=0.
1592*86a4271fSThilina Rathnayake
1593*86a4271fSThilina Rathnayake      do e=1,nelt
1594*86a4271fSThilina Rathnayake
1595*86a4271fSThilina Rathnayake         call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk)
1596*86a4271fSThilina Rathnayake         do i=1,lx
1597*86a4271fSThilina Rathnayake           pap(1)=pap(1)+p1(i,e)*ap1(i,e)
1598*86a4271fSThilina Rathnayake         enddo
1599*86a4271fSThilina Rathnayake
1600*86a4271fSThilina Rathnayake      enddo
1601*86a4271fSThilina Rathnayake
1602*86a4271fSThilina Rathnayake      return
1603*86a4271fSThilina Rathnayake      end
1604*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1605*86a4271fSThilina Rathnayake      subroutine axhm1(pap,ap1,p1,h1,h2,bpname)
1606*86a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of uncoupled
1607*86a4271fSThilina RathnayakeC     Helmholtz equations
1608*86a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2,bpname    Output: ap1
1609*86a4271fSThilina Rathnayake      include 'SIZE'
1610*86a4271fSThilina Rathnayake      include 'TOTAL'
1611*86a4271fSThilina Rathnayake
1612*86a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1)
1613*86a4271fSThilina Rathnayake
1614*86a4271fSThilina Rathnayake      real*8 pap(3),ap1(lx,lelt),p1(lx,lelt)
1615*86a4271fSThilina Rathnayake      real*8 h1(lx,lelt),h2(lx,lelt)
1616*86a4271fSThilina Rathnayake
1617*86a4271fSThilina Rathnayake      character*3 bpname
1618*86a4271fSThilina Rathnayake
1619*86a4271fSThilina Rathnayake      if (bpname.eq.'bp1') then
1620*86a4271fSThilina Rathnayake         call axhm1_bp1(pap,ap1,p1,h1,h2)
1621*86a4271fSThilina Rathnayake
1622*86a4271fSThilina Rathnayake      elseif (bpname.eq.'bp3') then
1623*86a4271fSThilina Rathnayake         call axhm1_bp3(pap,ap1,p1,h1,h2)
1624*86a4271fSThilina Rathnayake
1625*86a4271fSThilina Rathnayake      else
1626*86a4271fSThilina Rathnayake         write(6,*) bpname,' axhm1 bpname error'
1627*86a4271fSThilina Rathnayake         stop
1628*86a4271fSThilina Rathnayake
1629*86a4271fSThilina Rathnayake      endif
1630*86a4271fSThilina Rathnayake
1631*86a4271fSThilina Rathnayake      return
1632*86a4271fSThilina Rathnayake      end
1633*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1634*86a4271fSThilina Rathnayake      subroutine get_bp(bp)
1635*86a4271fSThilina RathnayakeC     Get BP to run
1636*86a4271fSThilina RathnayakeC     Input:                        Output: bp
1637*86a4271fSThilina Rathnayake      integer i,bp
1638*86a4271fSThilina Rathnayake      character*64 bpval
1639*86a4271fSThilina Rathnayake
1640*86a4271fSThilina Rathnayake      bp=0
1641*86a4271fSThilina Rathnayake      if(iargc().ge.1) then
1642*86a4271fSThilina Rathnayake        call getarg(1,bpval)
1643*86a4271fSThilina Rathnayake      endif
1644*86a4271fSThilina Rathnayake      if(bpval.eq."bp1") then
1645*86a4271fSThilina Rathnayake        bp=1
1646*86a4271fSThilina Rathnayake      elseif(bpval.eq."bp3") then
1647*86a4271fSThilina Rathnayake        bp=3
1648*86a4271fSThilina Rathnayake      endif
1649*86a4271fSThilina Rathnayake
1650*86a4271fSThilina Rathnayake      return
1651*86a4271fSThilina Rathnayake      end
1652*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1653*86a4271fSThilina Rathnayake      subroutine get_spec(spec)
1654*86a4271fSThilina RathnayakeC     Get CEED backend specification
1655*86a4271fSThilina RathnayakeC     Input:                        Output: spec
1656*86a4271fSThilina Rathnayake      integer i
1657*86a4271fSThilina Rathnayake      character*64 spec
1658*86a4271fSThilina Rathnayake
1659*86a4271fSThilina Rathnayake      spec = '/cpu/self'
1660*86a4271fSThilina Rathnayake      if(iargc().ge.2) then
1661*86a4271fSThilina Rathnayake        call getarg(2,spec)
1662*86a4271fSThilina Rathnayake      endif
1663*86a4271fSThilina Rathnayake
1664*86a4271fSThilina Rathnayake      return
1665*86a4271fSThilina Rathnayake      end
1666*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1667*86a4271fSThilina Rathnayake      subroutine get_test(test)
1668*86a4271fSThilina RathnayakeC     Get test mode flag
1669*86a4271fSThilina RathnayakeC     Input:                        Output: test
1670*86a4271fSThilina Rathnayake      integer i,test
1671*86a4271fSThilina Rathnayake      character*64 testval
1672*86a4271fSThilina Rathnayake
1673*86a4271fSThilina Rathnayake      test=0
1674*86a4271fSThilina Rathnayake      if(iargc().ge.3) then
1675*86a4271fSThilina Rathnayake        call getarg(3,testval)
1676*86a4271fSThilina Rathnayake      endif
1677*86a4271fSThilina Rathnayake      if(testval.eq."test") then
1678*86a4271fSThilina Rathnayake        test=1
1679*86a4271fSThilina Rathnayake      endif
1680*86a4271fSThilina Rathnayake
1681*86a4271fSThilina Rathnayake      return
1682*86a4271fSThilina Rathnayake      end
1683*86a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1684