xref: /libCEED/examples/nek/bps/bps.usr (revision ec3da8bcb94d9f0073544b37b5081a06981a86f7)
186a4271fSThilina RathnayakeC Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
286a4271fSThilina RathnayakeC the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
386a4271fSThilina RathnayakeC reserved. See files LICENSE and NOTICE for details.
486a4271fSThilina RathnayakeC
586a4271fSThilina RathnayakeC This file is part of CEED, a collection of benchmarks, miniapps, software
686a4271fSThilina RathnayakeC libraries and APIs for efficient high-order finite element and spectral
786a4271fSThilina RathnayakeC element discretizations for exascale applications. For more information and
886a4271fSThilina RathnayakeC source code availability see http://github.com/ceed.
986a4271fSThilina RathnayakeC
1086a4271fSThilina RathnayakeC The CEED research is supported by the Exascale Computing Project (17-SC-20-SC)
1186a4271fSThilina RathnayakeC a collaborative effort of two U.S. Department of Energy organizations (Office
1286a4271fSThilina RathnayakeC of Science and the National Nuclear Security Administration) responsible for
1386a4271fSThilina RathnayakeC the planning and preparation of a capable exascale ecosystem, including
1486a4271fSThilina RathnayakeC software, applications, hardware, advanced system engineering and early
1586a4271fSThilina RathnayakeC testbed platforms, in support of the nation's exascale computing imperative.
1686a4271fSThilina Rathnayake
1786a4271fSThilina RathnayakeC> @file
1886a4271fSThilina RathnayakeC> Mass and diffusion operators examples using Nek5000
19dc8efd83SLeila GhaffariC_TESTARGS(name="BP1") -c {ceed_resource} -e bp1 -n 1 -b 4 -test
20dc8efd83SLeila GhaffariC_TESTARGS(name="BP3") -c {ceed_resource} -e bp3 -n 1 -b 4 -test
2186a4271fSThilina Rathnayake
2286a4271fSThilina RathnayakeC-----------------------------------------------------------------------
2386a4271fSThilina Rathnayake      subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
2486a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
2586a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
2686a4271fSThilina RathnayakeC     Set up mass operator
2786a4271fSThilina RathnayakeC     Input: u1,u2,u3,q             Output: v1,v2,ierr
2886a4271fSThilina Rathnayake      integer q,ierr
2986a4271fSThilina Rathnayake      real*8 ctx(1)
3086a4271fSThilina Rathnayake      real*8 u1(3*q)
3186a4271fSThilina Rathnayake      real*8 u2(9*q)
3286a4271fSThilina Rathnayake      real*8 u3(q)
3386a4271fSThilina Rathnayake      real*8 v1(q)
3486a4271fSThilina Rathnayake      real*8 v2(q)
3586a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
3686a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
3786a4271fSThilina Rathnayake      real*8 jacmq
3886a4271fSThilina Rathnayake
39ee07ded2SValeria BarraC     Quadrature Point Loop
4086a4271fSThilina Rathnayake      do i=1,q
4186a4271fSThilina Rathnayake        a11=u2(i+q*0)
4286a4271fSThilina Rathnayake        a12=u2(i+q*3)
4386a4271fSThilina Rathnayake        a13=u2(i+q*6)
4486a4271fSThilina Rathnayake
4586a4271fSThilina Rathnayake        a21=u2(i+q*1)
4686a4271fSThilina Rathnayake        a22=u2(i+q*4)
4786a4271fSThilina Rathnayake        a23=u2(i+q*7)
4886a4271fSThilina Rathnayake
4986a4271fSThilina Rathnayake        a31=u2(i+q*2)
5086a4271fSThilina Rathnayake        a32=u2(i+q*5)
5186a4271fSThilina Rathnayake        a33=u2(i+q*8)
5286a4271fSThilina Rathnayake
5386a4271fSThilina Rathnayake        g11 = (a22*a33-a23*a32)
5486a4271fSThilina Rathnayake        g12 = (a13*a32-a33*a12)
5586a4271fSThilina Rathnayake        g13 = (a12*a23-a22*a13)
5686a4271fSThilina Rathnayake
5786a4271fSThilina Rathnayake        g21 = (a23*a31-a21*a33)
5886a4271fSThilina Rathnayake        g22 = (a11*a33-a31*a13)
5986a4271fSThilina Rathnayake        g23 = (a13*a21-a23*a11)
6086a4271fSThilina Rathnayake
6186a4271fSThilina Rathnayake        g31 = (a21*a32-a22*a31)
6286a4271fSThilina Rathnayake        g32 = (a12*a31-a32*a11)
6386a4271fSThilina Rathnayake        g33 = (a11*a22-a21*a12)
6486a4271fSThilina Rathnayake
6586a4271fSThilina Rathnayake        jacmq = a11*g11+a21*g12+a31*g13
6686a4271fSThilina Rathnayake
6786a4271fSThilina RathnayakeC       Rho
6886a4271fSThilina Rathnayake        v1(i)=u3(i)*jacmq
6986a4271fSThilina Rathnayake
7086a4271fSThilina RathnayakeC       RHS
7186a4271fSThilina Rathnayake        v2(i)=u3(i)*jacmq
7286a4271fSThilina Rathnayake     $             *dsqrt(u1(i+q*0)*u1(i+q*0)
7386a4271fSThilina Rathnayake     $                   +u1(i+q*1)*u1(i+q*1)
7486a4271fSThilina Rathnayake     $                   +u1(i+q*2)*u1(i+q*2))
7586a4271fSThilina Rathnayake      enddo
7686a4271fSThilina Rathnayake
7786a4271fSThilina Rathnayake      ierr=0
7886a4271fSThilina Rathnayake      end
7986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
8086a4271fSThilina Rathnayake      subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
8186a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
8286a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
8386a4271fSThilina RathnayakeC     Apply mass operator
8486a4271fSThilina RathnayakeC     Input: u1,u2,q                Output: v1,ierr
8586a4271fSThilina Rathnayake      integer q,ierr
8686a4271fSThilina Rathnayake      real*8 ctx(1)
8786a4271fSThilina Rathnayake      real*8 u1(q)
8886a4271fSThilina Rathnayake      real*8 u2(q)
8986a4271fSThilina Rathnayake      real*8 v1(q)
9086a4271fSThilina Rathnayake
91ee07ded2SValeria BarraC     Quadrature Point Loop
9286a4271fSThilina Rathnayake      do i=1,q
9386a4271fSThilina Rathnayake        v1(i)=u2(i)*u1(i)
9486a4271fSThilina Rathnayake      enddo
9586a4271fSThilina Rathnayake
9686a4271fSThilina Rathnayake      ierr=0
9786a4271fSThilina Rathnayake      end
9886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
9986a4271fSThilina Rathnayake      subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
10086a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
10186a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
10286a4271fSThilina RathnayakeC     Set up diffusion operator
10386a4271fSThilina RathnayakeC     Input: u1,u2,u3,q             Output: v1,v2,ierr
10486a4271fSThilina Rathnayake      integer q,ierr
10586a4271fSThilina Rathnayake      real*8 ctx(1)
10686a4271fSThilina Rathnayake      real*8 u1(3*q)
10786a4271fSThilina Rathnayake      real*8 u2(9*q)
10886a4271fSThilina Rathnayake      real*8 u3(q)
10986a4271fSThilina Rathnayake      real*8 v1(6*q)
11086a4271fSThilina Rathnayake      real*8 v2(q)
11186a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
11286a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
11386a4271fSThilina Rathnayake      real*8 jacmq,scl
11486a4271fSThilina Rathnayake      real*8 c(3),k(3)
11586a4271fSThilina Rathnayake
116ee07ded2SValeria BarraC     Quadrature Point Loop
11786a4271fSThilina Rathnayake      do i=1,q
11886a4271fSThilina Rathnayake        pi = 3.14159265358979323846
11986a4271fSThilina Rathnayake
12086a4271fSThilina Rathnayake        c(1)=0.
12186a4271fSThilina Rathnayake        c(2)=1.
12286a4271fSThilina Rathnayake        c(3)=2.
12386a4271fSThilina Rathnayake        k(1)=1.
12486a4271fSThilina Rathnayake        k(2)=2.
12586a4271fSThilina Rathnayake        k(3)=3.
12686a4271fSThilina Rathnayake
12786a4271fSThilina Rathnayake        a11=u2(i+q*0)
12886a4271fSThilina Rathnayake        a12=u2(i+q*3)
12986a4271fSThilina Rathnayake        a13=u2(i+q*6)
13086a4271fSThilina Rathnayake
13186a4271fSThilina Rathnayake        a21=u2(i+q*1)
13286a4271fSThilina Rathnayake        a22=u2(i+q*4)
13386a4271fSThilina Rathnayake        a23=u2(i+q*7)
13486a4271fSThilina Rathnayake
13586a4271fSThilina Rathnayake        a31=u2(i+q*2)
13686a4271fSThilina Rathnayake        a32=u2(i+q*5)
13786a4271fSThilina Rathnayake        a33=u2(i+q*8)
13886a4271fSThilina Rathnayake
13986a4271fSThilina Rathnayake        g11 = (a22*a33-a23*a32)
14086a4271fSThilina Rathnayake        g12 = (a13*a32-a33*a12)
14186a4271fSThilina Rathnayake        g13 = (a12*a23-a22*a13)
14286a4271fSThilina Rathnayake
14386a4271fSThilina Rathnayake        g21 = (a23*a31-a21*a33)
14486a4271fSThilina Rathnayake        g22 = (a11*a33-a31*a13)
14586a4271fSThilina Rathnayake        g23 = (a13*a21-a23*a11)
14686a4271fSThilina Rathnayake
14786a4271fSThilina Rathnayake        g31 = (a21*a32-a22*a31)
14886a4271fSThilina Rathnayake        g32 = (a12*a31-a32*a11)
14986a4271fSThilina Rathnayake        g33 = (a11*a22-a21*a12)
15086a4271fSThilina Rathnayake
15186a4271fSThilina Rathnayake        jacmq = a11*g11+a21*g12+a31*g13
15286a4271fSThilina Rathnayake
15386a4271fSThilina Rathnayake        scl = u3(i)/jacmq
15486a4271fSThilina Rathnayake
15586a4271fSThilina RathnayakeC       Geometric factors
156288c0443SJeremy L ThompsonC       Stored in Voigt convention
157288c0443SJeremy L ThompsonC       0 5 4
158288c0443SJeremy L ThompsonC       5 1 3
159288c0443SJeremy L ThompsonC       4 3 2
16086a4271fSThilina Rathnayake        v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr
161288c0443SJeremy L Thompson        v1(i+1*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss
162288c0443SJeremy L Thompson        v1(i+2*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt
163288c0443SJeremy L Thompson        v1(i+3*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst
164288c0443SJeremy L Thompson        v1(i+4*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt
165288c0443SJeremy L Thompson        v1(i+5*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs
16686a4271fSThilina Rathnayake
16786a4271fSThilina RathnayakeC       RHS
16886a4271fSThilina Rathnayake        v2(i) = u3(i)*jacmq*pi*pi
16986a4271fSThilina Rathnayake     $            *dsin(pi*(c(1)+k(1)*u1(i+0*q)))
17086a4271fSThilina Rathnayake     $            *dsin(pi*(c(2)+k(2)*u1(i+1*q)))
17186a4271fSThilina Rathnayake     $            *dsin(pi*(c(3)+k(3)*u1(i+2*q)))
17286a4271fSThilina Rathnayake     $            *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3))
17386a4271fSThilina Rathnayake
17486a4271fSThilina Rathnayake      enddo
17586a4271fSThilina Rathnayake
17686a4271fSThilina Rathnayake      ierr=0
17786a4271fSThilina Rathnayake      end
17886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
17986a4271fSThilina Rathnayake      subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
18086a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
18186a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
18286a4271fSThilina RathnayakeC     Apply diffusion operator
18386a4271fSThilina RathnayakeC     Input: u1,u2,q                Output: v1,ierr
18486a4271fSThilina Rathnayake      integer q,ierr
18586a4271fSThilina Rathnayake      real*8 ctx(1)
18686a4271fSThilina Rathnayake      real*8 u1(3*q)
18786a4271fSThilina Rathnayake      real*8 u2(6*q)
18886a4271fSThilina Rathnayake      real*8 v1(3*q)
18986a4271fSThilina Rathnayake
190ee07ded2SValeria BarraC     Quadrature Point Loop
19186a4271fSThilina Rathnayake      do i=1,q
19286a4271fSThilina Rathnayake        v1(i+0*q)=
193288c0443SJeremy L Thompson     $     u2(i+0*q)*u1(i)+u2(i+5*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q)
19486a4271fSThilina Rathnayake        v1(i+1*q)=
195288c0443SJeremy L Thompson     $     u2(i+5*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+3*q)*u1(i+2*q)
19686a4271fSThilina Rathnayake        v1(i+2*q)=
197288c0443SJeremy L Thompson     $     u2(i+4*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q)
19886a4271fSThilina Rathnayake      enddo
19986a4271fSThilina Rathnayake
20086a4271fSThilina Rathnayake      ierr=0
20186a4271fSThilina Rathnayake      end
20286a4271fSThilina RathnayakeC-----------------------------------------------------------------------
20386a4271fSThilina Rathnayake      subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq)
20486a4271fSThilina RathnayakeC     Set h2 as rhoJac
20586a4271fSThilina RathnayakeC     Input: bmq,nxq                Output: h2
20686a4271fSThilina Rathnayake      include 'SIZE'
20786a4271fSThilina Rathnayake      real*8 h2(1),bmq(1)
20886a4271fSThilina Rathnayake
20986a4271fSThilina Rathnayake      common /ctmp77/ wd(lxd),zd(lxd)
21086a4271fSThilina Rathnayake      integer e,i,L
21186a4271fSThilina Rathnayake
21286a4271fSThilina Rathnayake      call zwgl(zd,wd,nxq)  ! nxq = number of points
21386a4271fSThilina Rathnayake
21486a4271fSThilina Rathnayake      q = 1.0               ! Later, this can be a function of position...
21586a4271fSThilina Rathnayake
21686a4271fSThilina Rathnayake      L = 0
21786a4271fSThilina Rathnayake      do e=1,lelt
21886a4271fSThilina Rathnayake      do i=1,nxq**ldim
21986a4271fSThilina Rathnayake         L=L+1
22086a4271fSThilina Rathnayake         h2(L) = q*bmq(L)
22186a4271fSThilina Rathnayake      enddo
22286a4271fSThilina Rathnayake      enddo
22386a4271fSThilina Rathnayake
22486a4271fSThilina Rathnayake      return
22586a4271fSThilina Rathnayake      end
22686a4271fSThilina RathnayakeC-----------------------------------------------------------------------
22786a4271fSThilina Rathnayake      subroutine dist_fld_h1(e)
22886a4271fSThilina RathnayakeC     Set distance initial condition for BP1
22986a4271fSThilina RathnayakeC     Input:                        Output: e
23086a4271fSThilina Rathnayake      include 'SIZE'
23186a4271fSThilina Rathnayake      include 'TOTAL'
23286a4271fSThilina Rathnayake      real*8 x,y,z
23386a4271fSThilina Rathnayake      real*8 e(1)
23486a4271fSThilina Rathnayake
23586a4271fSThilina Rathnayake      n=lx1*ly1*lz1*nelt
23686a4271fSThilina Rathnayake
23786a4271fSThilina Rathnayake      do i=1,n
23886a4271fSThilina Rathnayake        x=xm1(i,1,1,1)
23986a4271fSThilina Rathnayake        y=ym1(i,1,1,1)
24086a4271fSThilina Rathnayake        z=zm1(i,1,1,1)
24186a4271fSThilina Rathnayake
24286a4271fSThilina Rathnayake        e(i) = dsqrt(x*x+y*y+z*z)
24386a4271fSThilina Rathnayake      enddo
24486a4271fSThilina Rathnayake
24586a4271fSThilina Rathnayake      call dsavg(e)  ! This is requisite for random fields
24686a4271fSThilina Rathnayake
24786a4271fSThilina Rathnayake      return
24886a4271fSThilina Rathnayake      end
24986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
25086a4271fSThilina Rathnayake      subroutine sin_fld_h1(e)
25186a4271fSThilina RathnayakeC     Set sine initial condition for BP3
25286a4271fSThilina RathnayakeC     Input:                        Output: e
25386a4271fSThilina Rathnayake      include 'SIZE'
25486a4271fSThilina Rathnayake      include 'TOTAL'
25586a4271fSThilina Rathnayake      real*8 x,y,z
25686a4271fSThilina Rathnayake      real*8 e(1)
25786a4271fSThilina Rathnayake      real*8 c(3),k(3)
25886a4271fSThilina Rathnayake
25986a4271fSThilina Rathnayake      n=lx1*ly1*lz1*nelt
26086a4271fSThilina Rathnayake      pi = 3.14159265358979323846
26186a4271fSThilina Rathnayake
26286a4271fSThilina Rathnayake      c(1)=0.
26386a4271fSThilina Rathnayake      c(2)=1.
26486a4271fSThilina Rathnayake      c(3)=2.
26586a4271fSThilina Rathnayake      k(1)=1.
26686a4271fSThilina Rathnayake      k(2)=2.
26786a4271fSThilina Rathnayake      k(3)=3.
26886a4271fSThilina Rathnayake
26986a4271fSThilina Rathnayake      do i=1,n
27086a4271fSThilina Rathnayake        x=xm1(i,1,1,1)
27186a4271fSThilina Rathnayake        y=ym1(i,1,1,1)
27286a4271fSThilina Rathnayake        z=zm1(i,1,1,1)
27386a4271fSThilina Rathnayake
27486a4271fSThilina Rathnayake        e(i) = dsin(pi*(c(1)+k(1)*x))
27586a4271fSThilina Rathnayake     &        *dsin(pi*(c(2)+k(2)*y))
27686a4271fSThilina Rathnayake     &        *dsin(pi*(c(3)+k(3)*z))
27786a4271fSThilina Rathnayake
27886a4271fSThilina Rathnayake      enddo
27986a4271fSThilina Rathnayake
28086a4271fSThilina Rathnayake      call dsavg(e)  ! This is requisite for random fields
28186a4271fSThilina Rathnayake
28286a4271fSThilina Rathnayake      return
28386a4271fSThilina Rathnayake      end
28486a4271fSThilina RathnayakeC-----------------------------------------------------------------------
28586a4271fSThilina Rathnayake      subroutine uservp(ix,iy,iz,eg) ! set variable properties
28686a4271fSThilina Rathnayake      include 'SIZE'
28786a4271fSThilina Rathnayake      include 'TOTAL'
28886a4271fSThilina Rathnayake      include 'NEKUSE'
28986a4271fSThilina Rathnayake      integer e,f,eg
29086a4271fSThilina RathnayakeC     e = gllel(eg)
29186a4271fSThilina Rathnayake
29286a4271fSThilina Rathnayake      udiff  = 0.0
29386a4271fSThilina Rathnayake      utrans = 0.0
29486a4271fSThilina Rathnayake
29586a4271fSThilina Rathnayake      return
29686a4271fSThilina Rathnayake      end
29786a4271fSThilina RathnayakeC-----------------------------------------------------------------------
29886a4271fSThilina Rathnayake      subroutine userf(ix,iy,iz,eg) ! set acceleration term
29986a4271fSThilina RathnayakeC
30086a4271fSThilina RathnayakeC     Note: this is an acceleration term, NOT a force!
30186a4271fSThilina RathnayakeC     Thus, ffx will subsequently be multiplied by rho(x,t).
30286a4271fSThilina RathnayakeC
30386a4271fSThilina Rathnayake      include 'SIZE'
30486a4271fSThilina Rathnayake      include 'TOTAL'
30586a4271fSThilina Rathnayake      include 'NEKUSE'
30686a4271fSThilina Rathnayake      integer e,f,eg
30786a4271fSThilina RathnayakeC     e = gllel(eg)
30886a4271fSThilina Rathnayake
30986a4271fSThilina Rathnayake      ffx = 0.0
31086a4271fSThilina Rathnayake      ffy = 0.0
31186a4271fSThilina Rathnayake      ffz = 0.0
31286a4271fSThilina Rathnayake
31386a4271fSThilina Rathnayake      return
31486a4271fSThilina Rathnayake      end
31586a4271fSThilina RathnayakeC-----------------------------------------------------------------------
31686a4271fSThilina Rathnayake      subroutine userq(i,j,k,eg) ! set source term
31786a4271fSThilina Rathnayake      include 'SIZE'
31886a4271fSThilina Rathnayake      include 'TOTAL'
31986a4271fSThilina Rathnayake      include 'NEKUSE'
32086a4271fSThilina Rathnayake      integer e,f,eg
32186a4271fSThilina Rathnayake      e = gllel(eg)
32286a4271fSThilina Rathnayake
32386a4271fSThilina Rathnayake      qvol   = 0
32486a4271fSThilina Rathnayake
32586a4271fSThilina Rathnayake      return
32686a4271fSThilina Rathnayake      end
32786a4271fSThilina RathnayakeC-----------------------------------------------------------------------
32886a4271fSThilina Rathnayake      subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions
32986a4271fSThilina RathnayakeC     NOTE ::: This subroutine MAY NOT be called by every process
33086a4271fSThilina Rathnayake      include 'SIZE'
33186a4271fSThilina Rathnayake      include 'TOTAL'
33286a4271fSThilina Rathnayake      include 'NEKUSE'
33386a4271fSThilina Rathnayake      integer e,f,eg
33486a4271fSThilina Rathnayake
33586a4271fSThilina Rathnayake      ux   = 0.0
33686a4271fSThilina Rathnayake      uy   = 0.0
33786a4271fSThilina Rathnayake      uz   = 0.0
33886a4271fSThilina Rathnayake      temp = 0.0
33986a4271fSThilina Rathnayake
34086a4271fSThilina Rathnayake      return
34186a4271fSThilina Rathnayake      end
34286a4271fSThilina RathnayakeC-----------------------------------------------------------------------
34386a4271fSThilina Rathnayake      subroutine useric(ix,iy,iz,eg) ! set up initial conditions
34486a4271fSThilina Rathnayake      include 'SIZE'
34586a4271fSThilina Rathnayake      include 'TOTAL'
34686a4271fSThilina Rathnayake      include 'NEKUSE'
34786a4271fSThilina Rathnayake      integer e,f,eg
34886a4271fSThilina Rathnayake
34986a4271fSThilina Rathnayake      ux   = 0.0
35086a4271fSThilina Rathnayake      uy   = 0.0
35186a4271fSThilina Rathnayake      uz   = 0.0
35286a4271fSThilina Rathnayake      temp = 0.0
35386a4271fSThilina Rathnayake
35486a4271fSThilina Rathnayake      return
35586a4271fSThilina Rathnayake      end
35686a4271fSThilina RathnayakeC-----------------------------------------------------------------------
35786a4271fSThilina Rathnayake      subroutine usrdat   ! This routine to modify element vertices
35886a4271fSThilina Rathnayake      include 'SIZE'
35986a4271fSThilina Rathnayake      include 'TOTAL'
36086a4271fSThilina Rathnayake
36186a4271fSThilina Rathnayake      return
36286a4271fSThilina Rathnayake      end
36386a4271fSThilina RathnayakeC-----------------------------------------------------------------------
36486a4271fSThilina Rathnayake      subroutine usrdat2  ! This routine to modify mesh coordinates
36586a4271fSThilina Rathnayake      include 'SIZE'
36686a4271fSThilina Rathnayake      include 'TOTAL'
36786a4271fSThilina Rathnayake
36886a4271fSThilina Rathnayake      x0 = 0
36986a4271fSThilina Rathnayake      x1 = 1
37086a4271fSThilina Rathnayake      call rescale_x(xm1,x0,x1)
37186a4271fSThilina Rathnayake      call rescale_x(ym1,x0,x1)
37286a4271fSThilina Rathnayake      call rescale_x(zm1,x0,x1)
37386a4271fSThilina Rathnayake
37486a4271fSThilina Rathnayake      param(59)=1  ! Force Nek to use the "deformed element" formulation
37586a4271fSThilina Rathnayake
37686a4271fSThilina Rathnayake      return
37786a4271fSThilina Rathnayake      end
37886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
37986a4271fSThilina Rathnayake      subroutine usrdat3
38086a4271fSThilina Rathnayake      include 'SIZE'
38186a4271fSThilina Rathnayake      include 'TOTAL'
38286a4271fSThilina Rathnayake
38386a4271fSThilina Rathnayake      return
38486a4271fSThilina Rathnayake      end
38586a4271fSThilina RathnayakeC-----------------------------------------------------------------------
38686a4271fSThilina Rathnayake      subroutine xmask1   (r1,h2,nel)
38786a4271fSThilina RathnayakeC     Apply zero Dirichlet boundary conditions
38886a4271fSThilina RathnayakeC     Input: h2,nel                 Output: r1
38986a4271fSThilina Rathnayake      include 'SIZE'
39086a4271fSThilina Rathnayake      include 'TOTAL'
39186a4271fSThilina Rathnayake      real*8 r1(1),h2(1)
39286a4271fSThilina Rathnayake
39386a4271fSThilina Rathnayake      n=nx1*ny1*nz1*nel
39486a4271fSThilina Rathnayake      do i=1,n
39586a4271fSThilina Rathnayake         r1(i)=r1(i)*h2(i)
39686a4271fSThilina Rathnayake      enddo
39786a4271fSThilina Rathnayake
39886a4271fSThilina Rathnayake      return
39986a4271fSThilina Rathnayake      end
40086a4271fSThilina RathnayakeC-----------------------------------------------------------------------
40186a4271fSThilina Rathnayake      function glrdif(x,y,n)
40286a4271fSThilina RathnayakeC     Compute Linfty norm of (x-y)
40386a4271fSThilina RathnayakeC     Input: x,y                    Output: n
40486a4271fSThilina Rathnayake      real*8 x(n),y(n)
40586a4271fSThilina Rathnayake
40686a4271fSThilina Rathnayake      dmx=0
40786a4271fSThilina Rathnayake      xmx=0
40886a4271fSThilina Rathnayake      ymx=0
40986a4271fSThilina Rathnayake
41086a4271fSThilina Rathnayake      do i=1,n
41186a4271fSThilina Rathnayake         diff=abs(x(i)-y(i))
41286a4271fSThilina Rathnayake         dmx =max(dmx,diff)
41386a4271fSThilina Rathnayake         xmx =max(xmx,x(i))
41486a4271fSThilina Rathnayake         ymx =max(ymx,y(i))
41586a4271fSThilina Rathnayake      enddo
41686a4271fSThilina Rathnayake
41786a4271fSThilina Rathnayake      xmx = max(xmx,ymx)
41886a4271fSThilina Rathnayake      dmx = glmax(dmx,1) ! max across processors
41986a4271fSThilina Rathnayake      xmx = glmax(xmx,1)
42086a4271fSThilina Rathnayake
42186a4271fSThilina Rathnayake      if (xmx.gt.0) then
42286a4271fSThilina Rathnayake         glrdif = dmx/xmx
42386a4271fSThilina Rathnayake      else
42486a4271fSThilina Rathnayake         glrdif = -dmx   ! Negative indicates something strange
42586a4271fSThilina Rathnayake      endif
42686a4271fSThilina Rathnayake
42786a4271fSThilina Rathnayake      return
42886a4271fSThilina Rathnayake      end
42986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
43086a4271fSThilina Rathnayake      subroutine loc_grad3(ur,us,ut,u,N,D,Dt)
43186a4271fSThilina RathnayakeC     3D transpose of local gradient
43286a4271fSThilina RathnayakeC     Input: u,N,D,Dt               Output: ur,us,ut
43386a4271fSThilina Rathnayake      real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
43486a4271fSThilina Rathnayake      real*8 u (0:N,0:N,0:N)
43586a4271fSThilina Rathnayake      real*8 D (0:N,0:N),Dt(0:N,0:N)
43686a4271fSThilina Rathnayake
43786a4271fSThilina Rathnayake      m1 = N+1
43886a4271fSThilina Rathnayake      m2 = m1*m1
43986a4271fSThilina Rathnayake
44086a4271fSThilina Rathnayake      call mxm(D ,m1,u(0,0,0),m1,ur,m2)
44186a4271fSThilina Rathnayake      do k=0,N
44286a4271fSThilina Rathnayake         call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1)
44386a4271fSThilina Rathnayake      enddo
44486a4271fSThilina Rathnayake      call mxm(u(0,0,0),m2,Dt,m1,ut,m1)
44586a4271fSThilina Rathnayake
44686a4271fSThilina Rathnayake      return
44786a4271fSThilina Rathnayake      end
44886a4271fSThilina Rathnayakec-----------------------------------------------------------------------
44986a4271fSThilina Rathnayake      subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w)
45086a4271fSThilina RathnayakeC     3D transpose of local gradient
45186a4271fSThilina RathnayakeC     Input: ur,us,ut,N,D,Dt        Output: u
45286a4271fSThilina Rathnayake       real*8 u (0:N,0:N,0:N)
45386a4271fSThilina Rathnayake       real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
45486a4271fSThilina Rathnayake       real*8 D (0:N,0:N),Dt(0:N,0:N)
45586a4271fSThilina Rathnayake       real*8 w (0:N,0:N,0:N)
45686a4271fSThilina Rathnayake
45786a4271fSThilina Rathnayake       m1 = N+1
45886a4271fSThilina Rathnayake       m2 = m1*m1
45986a4271fSThilina Rathnayake       m3 = m1*m1*m1
46086a4271fSThilina Rathnayake
46186a4271fSThilina Rathnayake       call mxm(Dt,m1,ur,m1,u(0,0,0),m2)
46286a4271fSThilina Rathnayake       do k=0,N
46386a4271fSThilina Rathnayake          call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1)
46486a4271fSThilina Rathnayake       enddo
46586a4271fSThilina Rathnayake       call add2(u(0,0,0),w,m3)
46686a4271fSThilina Rathnayake       call mxm(ut,m2,D ,m1,w,m1)
46786a4271fSThilina Rathnayake       call add2(u(0,0,0),w,m3)
46886a4271fSThilina Rathnayake
46986a4271fSThilina Rathnayake      return
47086a4271fSThilina Rathnayake      end
47186a4271fSThilina RathnayakeC-----------------------------------------------------------------------
47286a4271fSThilina Rathnayake      subroutine geodatq(gf,bmq,w3mq,nxq)
47386a4271fSThilina RathnayakeC     Routine to generate elemental geometric matrices on mesh 1
47486a4271fSThilina RathnayakeC     (Gauss-Legendre Lobatto mesh).
47586a4271fSThilina Rathnayake      include 'SIZE'
47686a4271fSThilina Rathnayake      include 'TOTAL'
47786a4271fSThilina Rathnayake
47886a4271fSThilina Rathnayake      parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim)
47986a4271fSThilina Rathnayake
48086a4271fSThilina Rathnayake      real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq)
48186a4271fSThilina Rathnayake
48286a4271fSThilina Rathnayake      common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd)
48386a4271fSThilina Rathnayake      common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd)
48486a4271fSThilina Rathnayake     $ ,             zr(lxyd),zs(lxyd),zt(lxyd)
48586a4271fSThilina Rathnayake
48686a4271fSThilina Rathnayake      common /ctmp77/ wd(lxd),zd(lxd)
48786a4271fSThilina Rathnayake      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
48886a4271fSThilina Rathnayake
48986a4271fSThilina Rathnayake      integer e
49086a4271fSThilina Rathnayake      real*8 tmp(lxyd)
49186a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
49286a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
49386a4271fSThilina Rathnayake      real*8 jacmq
49486a4271fSThilina Rathnayake
49586a4271fSThilina Rathnayake      if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq)
49686a4271fSThilina Rathnayake
49786a4271fSThilina Rathnayake      call zwgl    (zd,wd,lzq)                            ! nxq = number of points
49886a4271fSThilina Rathnayake      call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp)
49986a4271fSThilina Rathnayake
50086a4271fSThilina Rathnayake      do k=1,nxq
50186a4271fSThilina Rathnayake      do j=1,nxq
50286a4271fSThilina Rathnayake      do i=1,nxq
50386a4271fSThilina Rathnayake         w3mq(i,j,k) = wd(i)*wd(j)*wd(k)
50486a4271fSThilina Rathnayake      enddo
50586a4271fSThilina Rathnayake      enddo
50686a4271fSThilina Rathnayake      enddo
50786a4271fSThilina Rathnayake
50886a4271fSThilina Rathnayake      nxyzq = nxq**ldim
50986a4271fSThilina Rathnayake      nxqm1 = lzq-1
51086a4271fSThilina Rathnayake
51186a4271fSThilina Rathnayake      do e=1,nelt
51286a4271fSThilina Rathnayake         call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation
51386a4271fSThilina Rathnayake         call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq)
51486a4271fSThilina Rathnayake
51586a4271fSThilina Rathnayake         call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0)
51686a4271fSThilina Rathnayake         call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq)
51786a4271fSThilina Rathnayake
51886a4271fSThilina Rathnayake         call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0)
51986a4271fSThilina Rathnayake         call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq)
52086a4271fSThilina Rathnayake
52186a4271fSThilina Rathnayake         do i=1,nxyzq
52286a4271fSThilina Rathnayake            a11 = xr(i)
52386a4271fSThilina Rathnayake            a12 = xs(i)
52486a4271fSThilina Rathnayake            a13 = xt(i)
52586a4271fSThilina Rathnayake
52686a4271fSThilina Rathnayake            a21 = yr(i)
52786a4271fSThilina Rathnayake            a22 = ys(i)
52886a4271fSThilina Rathnayake            a23 = yt(i)
52986a4271fSThilina Rathnayake
53086a4271fSThilina Rathnayake            a31 = zr(i)
53186a4271fSThilina Rathnayake            a32 = zs(i)
53286a4271fSThilina Rathnayake            a33 = zt(i)
53386a4271fSThilina Rathnayake
53486a4271fSThilina Rathnayake            g11 = (a22*a33-a23*a32)
53586a4271fSThilina Rathnayake            g12 = (a13*a32-a33*a12)
53686a4271fSThilina Rathnayake            g13 = (a12*a23-a22*a13)
53786a4271fSThilina Rathnayake
53886a4271fSThilina Rathnayake            g21 = (a23*a31-a21*a33)
53986a4271fSThilina Rathnayake            g22 = (a11*a33-a31*a13)
54086a4271fSThilina Rathnayake            g23 = (a13*a21-a23*a11)
54186a4271fSThilina Rathnayake
54286a4271fSThilina Rathnayake            g31 = (a21*a32-a22*a31)
54386a4271fSThilina Rathnayake            g32 = (a12*a31-a32*a11)
54486a4271fSThilina Rathnayake            g33 = (a11*a22-a21*a12)
54586a4271fSThilina Rathnayake
54686a4271fSThilina Rathnayake            jacmq = a11*g11+a21*g12+a31*g13
54786a4271fSThilina Rathnayake
54886a4271fSThilina Rathnayake            bmq(i,e)  = w3mq(i,1,1)*jacmq
54986a4271fSThilina Rathnayake            scale     = w3mq(i,1,1)/jacmq
55086a4271fSThilina Rathnayake
55186a4271fSThilina Rathnayake            gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr
55286a4271fSThilina Rathnayake            gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs
55386a4271fSThilina Rathnayake            gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt
55486a4271fSThilina Rathnayake            gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss
55586a4271fSThilina Rathnayake            gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst
55686a4271fSThilina Rathnayake            gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt
55786a4271fSThilina Rathnayake
55886a4271fSThilina Rathnayake         enddo
55986a4271fSThilina Rathnayake      enddo
56086a4271fSThilina Rathnayake
56186a4271fSThilina Rathnayake      return
56286a4271fSThilina Rathnayake      end
56386a4271fSThilina RathnayakeC-----------------------------------------------------------------------
56486a4271fSThilina Rathnayake      subroutine setprecn_bp1 (d,h1,h2)
56586a4271fSThilina RathnayakeC     Generate diagonal preconditioner for Helmholtz operator
56686a4271fSThilina RathnayakeC     Input: h1,h2                  Output: d
56786a4271fSThilina Rathnayake      include 'SIZE'
56886a4271fSThilina Rathnayake      include 'TOTAL'
56986a4271fSThilina Rathnayake
57086a4271fSThilina Rathnayake      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
57186a4271fSThilina Rathnayake
57286a4271fSThilina Rathnayake      real*8    d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt)
57386a4271fSThilina Rathnayake      integer e
57486a4271fSThilina Rathnayake
57586a4271fSThilina Rathnayake      real*8         gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data
57686a4271fSThilina Rathnayake      equivalence (gf,g1m1)                  ! layout to g1m1...g6m1
57786a4271fSThilina Rathnayake
57886a4271fSThilina Rathnayake      real*8 ysm1(ly1)
57986a4271fSThilina Rathnayake
58086a4271fSThilina Rathnayake      nel   = nelfld(ifield)
58186a4271fSThilina Rathnayake      n     = nel*lx1*ly1*lz1
58286a4271fSThilina Rathnayake      nxyz  = lx1*ly1*lz1
58386a4271fSThilina Rathnayake
58486a4271fSThilina Rathnayake      call copy    (d,bm1,n)   ! Mass matrix preconditioning full mass matrix
58586a4271fSThilina Rathnayake      call dssum   (d,nx1,ny1,nz1)
58686a4271fSThilina Rathnayake      call invcol1 (d,n)
58786a4271fSThilina Rathnayake      return
58886a4271fSThilina Rathnayake
58986a4271fSThilina Rathnayake      call dsset(lx1,ly1,lz1)
59086a4271fSThilina Rathnayake
59186a4271fSThilina Rathnayake      do 1000 e=1,nel
59286a4271fSThilina Rathnayake
59386a4271fSThilina Rathnayake        call rzero(d(1,1,1,e),nxyz)
59486a4271fSThilina Rathnayake
59586a4271fSThilina Rathnayake        do 320 iz=1,lz1
59686a4271fSThilina Rathnayake         do 320 iy=1,ly1
59786a4271fSThilina Rathnayake         do 320 ix=1,lx1
59886a4271fSThilina Rathnayake         do 320 iq=1,lx1
59986a4271fSThilina Rathnayake           d(ix,iy,iz,e) = d(ix,iy,iz,e)
60086a4271fSThilina Rathnayake     $                   + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2
60186a4271fSThilina Rathnayake     $                   + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2
60286a4271fSThilina Rathnayake     $                   + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2
60386a4271fSThilina Rathnayake  320    continue
60486a4271fSThilina RathnayakeC
60586a4271fSThilina RathnayakeC        Add cross terms if element is deformed.
60686a4271fSThilina RathnayakeC
60786a4271fSThilina Rathnayake         if (lxyz.gt.0) then
60886a4271fSThilina Rathnayake
60986a4271fSThilina Rathnayake           do i2=1,ly1,ly1-1
61086a4271fSThilina Rathnayake           do i1=1,lx1,lx1-1
61186a4271fSThilina Rathnayake              d(1,i1,i2,e) = d(1,i1,i2,e)
61286a4271fSThilina Rathnayake     $            + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1)
61386a4271fSThilina Rathnayake     $            + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2)
61486a4271fSThilina Rathnayake              d(lx1,i1,i2,e) = d(lx1,i1,i2,e)
61586a4271fSThilina Rathnayake     $            + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1)
61686a4271fSThilina Rathnayake     $            + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2)
61786a4271fSThilina Rathnayake              d(i1,1,i2,e) = d(i1,1,i2,e)
61886a4271fSThilina Rathnayake     $            + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1)
61986a4271fSThilina Rathnayake     $            + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2)
62086a4271fSThilina Rathnayake              d(i1,ly1,i2,e) = d(i1,ly1,i2,e)
62186a4271fSThilina Rathnayake     $            + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1)
62286a4271fSThilina Rathnayake     $            + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2)
62386a4271fSThilina Rathnayake              d(i1,i2,1,e) = d(i1,i2,1,e)
62486a4271fSThilina Rathnayake     $            + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1)
62586a4271fSThilina Rathnayake     $            + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2)
62686a4271fSThilina Rathnayake              d(i1,i2,lz1,e) = d(i1,i2,lz1,e)
62786a4271fSThilina Rathnayake     $            + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1)
62886a4271fSThilina Rathnayake     $            + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2)
62986a4271fSThilina Rathnayake
63086a4271fSThilina Rathnayake           enddo
63186a4271fSThilina Rathnayake           enddo
63286a4271fSThilina Rathnayake         endif
63386a4271fSThilina Rathnayake
63486a4271fSThilina Rathnayake        do i=1,lxyz
63586a4271fSThilina Rathnayake           d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e)
63686a4271fSThilina Rathnayake        enddo
63786a4271fSThilina Rathnayake
63886a4271fSThilina Rathnayake 1000 continue ! element loop
63986a4271fSThilina Rathnayake
64086a4271fSThilina RathnayakeC     If axisymmetric, add a diagonal term in the radial direction (ISD=2)
64186a4271fSThilina Rathnayake
64286a4271fSThilina Rathnayake      if (ifaxis.and.(isd.eq.2)) then
64386a4271fSThilina Rathnayake         do 1200 e=1,nel
64486a4271fSThilina Rathnayake            if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1)
64586a4271fSThilina Rathnayake            k=0
64686a4271fSThilina Rathnayake            do 1190 j=1,ly1
64786a4271fSThilina Rathnayake            do 1190 i=1,lx1
64886a4271fSThilina Rathnayake               k=k+1
64986a4271fSThilina Rathnayake               if (ym1(i,j,1,e).ne.0.) then
65086a4271fSThilina Rathnayake                  term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2
65186a4271fSThilina Rathnayake                  if (ifrzer(e)) then
65286a4271fSThilina Rathnayake                     term2 =  wxm1(i)*wam1(1)*dam1(1,j)
65386a4271fSThilina Rathnayake     $                       *jacm1(i,1,1,e)/ysm1(i)
65486a4271fSThilina Rathnayake                  else
65586a4271fSThilina Rathnayake                     term2 = 0.
65686a4271fSThilina Rathnayake                  endif
65786a4271fSThilina Rathnayake                  d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2)
65886a4271fSThilina Rathnayake               endif
65986a4271fSThilina Rathnayake 1190       continue
66086a4271fSThilina Rathnayake 1200    continue
66186a4271fSThilina Rathnayake
66286a4271fSThilina Rathnayake      endif
66386a4271fSThilina Rathnayake      call dssum   (d,nx1,ny1,nz1)
66486a4271fSThilina Rathnayake      call invcol1 (d,n)
66586a4271fSThilina Rathnayake
66686a4271fSThilina Rathnayake      if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1)
66786a4271fSThilina Rathnayake   1  format(i9,1p4e12.4,' diag prec')
66886a4271fSThilina Rathnayake
66986a4271fSThilina Rathnayake      return
67086a4271fSThilina Rathnayake      end
67186a4271fSThilina RathnayakeC-----------------------------------------------------------------------
67286a4271fSThilina Rathnayake      subroutine setprecn_bp3 (d,h1,h2)
67386a4271fSThilina RathnayakeC     Generate dummy diagonal preconditioner for Helmholtz operator
67486a4271fSThilina RathnayakeC     Input: h1,h2                  Output: d
67586a4271fSThilina Rathnayake      include 'SIZE'
67686a4271fSThilina Rathnayake      include 'TOTAL'
67786a4271fSThilina Rathnayake
67886a4271fSThilina Rathnayake      parameter (n=lx1*ly1*lz1*lelt)
67986a4271fSThilina Rathnayake      real*8 d(n),h1(n),h2(n)
68086a4271fSThilina Rathnayake
68186a4271fSThilina Rathnayake      call rone (d,n)
68286a4271fSThilina Rathnayake
68386a4271fSThilina Rathnayake      return
68486a4271fSThilina Rathnayake      end
68586a4271fSThilina RathnayakeC-----------------------------------------------------------------------
68686a4271fSThilina Rathnayake      subroutine userchk
68786a4271fSThilina Rathnayake      include 'SIZE'
68886a4271fSThilina Rathnayake      include 'TOTAL'
68986a4271fSThilina Rathnayake
69086a4271fSThilina Rathnayake      integer bp
69186a4271fSThilina Rathnayake
69286a4271fSThilina Rathnayake      call get_bp(bp)
69386a4271fSThilina Rathnayake
69486a4271fSThilina Rathnayake      if (bp==1) then
69586a4271fSThilina Rathnayake        if (istep.gt.0) call bp1
69686a4271fSThilina Rathnayake      elseif (bp==3) then
69786a4271fSThilina Rathnayake        if (istep.gt.0) call bp3
69886a4271fSThilina Rathnayake      else
69986a4271fSThilina Rathnayake        write(6,*) "INVALID BP SPECIFICED"
70086a4271fSThilina Rathnayake      endif
70186a4271fSThilina Rathnayake
70286a4271fSThilina Rathnayake      return
70386a4271fSThilina Rathnayake      end
70486a4271fSThilina RathnayakeC-----------------------------------------------------------------------
70586a4271fSThilina Rathnayake      subroutine bp1
70686a4271fSThilina RathnayakeC     Solution to BP1 using libCEED
70786a4271fSThilina Rathnayake      include 'SIZE'
70886a4271fSThilina Rathnayake      include 'TOTAL'
70986a4271fSThilina Rathnayake      include 'CTIMER'  ! ifsync
71086a4271fSThilina Rathnayake      include 'FDMH1'
711*ec3da8bcSJed Brown      include 'ceed/fortran.h'
71286a4271fSThilina Rathnayake
71386a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
71486a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
71586a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
71686a4271fSThilina Rathnayake
71786a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
71886a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
71986a4271fSThilina Rathnayake      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
72086a4271fSThilina Rathnayake      common /vcrny/ e1(lt)
72186a4271fSThilina Rathnayake      common /vcrvh/ h1(lt),h2(lx*lelt),pap(3)
72286a4271fSThilina Rathnayake      real*8 coords(ldim*lx*lelt)
72386a4271fSThilina Rathnayake
72486a4271fSThilina Rathnayake      logical ifh3
725e2b2c771Svaleria      integer*8 nnode
72686a4271fSThilina Rathnayake      integer ceed,err,test
72786a4271fSThilina Rathnayake      character*64 spec
72886a4271fSThilina Rathnayake
72934d77899SValeria Barra      integer p,q,ncompx,ncompu,enode,lnode
73086a4271fSThilina Rathnayake      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
7317509a596Sjeremylt      integer stridesu(3),stridesx(3),stridesw(3)
73286a4271fSThilina Rathnayake      integer erstrctu,erstrctx,erstrctw
73386a4271fSThilina Rathnayake      integer basisu,basisx
73486a4271fSThilina Rathnayake      integer qf_mass,qf_setup
73586a4271fSThilina Rathnayake      integer op_mass,op_setup
73686a4271fSThilina Rathnayake      real*8  x,y,z
73786a4271fSThilina Rathnayake      integer*8 offset
73886a4271fSThilina Rathnayake
73986a4271fSThilina Rathnayake      external massf,masssetupf
74086a4271fSThilina Rathnayake
74186a4271fSThilina Rathnayake      ifield = 1
74286a4271fSThilina Rathnayake      nxq    = nx1+1
74386a4271fSThilina Rathnayake      n = nx1*ny1*nz1*nelt
74486a4271fSThilina Rathnayake
74586a4271fSThilina Rathnayake      ifsync = .false.
74686a4271fSThilina Rathnayake
74786a4271fSThilina RathnayakeC     Set up coordinates
74886a4271fSThilina Rathnayake      ii=0
74986a4271fSThilina Rathnayake      do j=0,nelt-1
75086a4271fSThilina Rathnayake      do i=1,lx
75186a4271fSThilina Rathnayake        ii=ii+1
75286a4271fSThilina Rathnayake        x = xm1(ii,1,1,1)
75386a4271fSThilina Rathnayake        y = ym1(ii,1,1,1)
75486a4271fSThilina Rathnayake        z = zm1(ii,1,1,1)
75586a4271fSThilina Rathnayake        coords(i+0*lx+3*j*lx)=x
75686a4271fSThilina Rathnayake        coords(i+1*lx+3*j*lx)=y
75786a4271fSThilina Rathnayake        coords(i+2*lx+3*j*lx)=z
75886a4271fSThilina Rathnayake      enddo
75986a4271fSThilina Rathnayake      enddo
76086a4271fSThilina Rathnayake
76186a4271fSThilina RathnayakeC     Init ceed library
76286a4271fSThilina Rathnayake      call get_spec(spec)
76386a4271fSThilina Rathnayake      call ceedinit(trim(spec)//char(0),ceed,err)
76486a4271fSThilina Rathnayake
76586a4271fSThilina Rathnayake      call get_test(test)
76686a4271fSThilina Rathnayake
76786a4271fSThilina RathnayakeC     Set up Nek geometry data
76886a4271fSThilina Rathnayake      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
76986a4271fSThilina Rathnayake      call set_h2_as_rhoJac_GL(h2,bmq,nxq)
77086a4271fSThilina Rathnayake
77186a4271fSThilina RathnayakeC     Set up true soln
77286a4271fSThilina Rathnayake      call dist_fld_h1   (e1)
77386a4271fSThilina Rathnayake      call copy          (h1,e1,n)                 ! Save exact soln in h1
77486a4271fSThilina Rathnayake
77586a4271fSThilina RathnayakeC     Set up solver parameters
77686a4271fSThilina Rathnayake      tol       = 1e-10
77786a4271fSThilina Rathnayake      param(22) = tol
77886a4271fSThilina Rathnayake      maxit     = 100
77986a4271fSThilina Rathnayake
78086a4271fSThilina Rathnayake      call nekgsync()
78186a4271fSThilina Rathnayake
78286a4271fSThilina RathnayakeC     Create ceed basis for mesh and computation
78386a4271fSThilina Rathnayake      p=nx1
78486a4271fSThilina Rathnayake      q=p+1
78534d77899SValeria Barra      ncompu=1
78634d77899SValeria Barra      ncompx=ldim
78786a4271fSThilina Rathnayake      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q,
78886a4271fSThilina Rathnayake     $  ceed_gauss,basisx,err)
78934d77899SValeria Barra      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q,
79086a4271fSThilina Rathnayake     $  ceed_gauss,basisu,err)
79186a4271fSThilina Rathnayake
79286a4271fSThilina RathnayakeC     Create ceed element restrictions for mesh and computation
793e2b2c771Svaleria      enode=p**ldim
79434d77899SValeria Barra      lnode=enode*nelt*ncompu
7957509a596Sjeremylt      stridesx=[1,enode,enode*ldim]
796d979a051Sjeremylt      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim,
797d979a051Sjeremylt     $  ldim*lnode,stridesx,erstrctx,err)
7987509a596Sjeremylt      stridesu=[1,enode,enode*ncompu]
799d979a051Sjeremylt      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu,
800d979a051Sjeremylt     $  ncompu*lnode,stridesu,erstrctu,err)
8017509a596Sjeremylt      call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim,
802d979a051Sjeremylt     $  1,nelt*q**ldim,ceed_strides_backend,erstrctw,err)
80386a4271fSThilina Rathnayake
80486a4271fSThilina RathnayakeC     Create ceed vectors
805e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_p1,err)
806e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
807e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
80886a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
80986a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err)
81086a4271fSThilina Rathnayake
81186a4271fSThilina Rathnayake      offset=0
81286a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_coords,ceed_mem_host,
81386a4271fSThilina Rathnayake     $  ceed_use_pointer,coords,offset,err)
81486a4271fSThilina Rathnayake
81586a4271fSThilina RathnayakeC     Create ceed qfunctions for masssetupf and massf
81686a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,masssetupf,
8172d50dd3dSjeremylt     $  EXAMPLE_DIR
8182d50dd3dSjeremylt     $  //'bps/bps.h:masssetupf',qf_setup,err)
81934d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
82086a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
82134d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
82286a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
82334d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
82486a4271fSThilina Rathnayake     $  ceed_eval_weight,err)
825a2fa7910SValeria Barra      call ceedqfunctionaddoutput(qf_setup,'qdata',ncompu,
82686a4271fSThilina Rathnayake     $  ceed_eval_none,err)
82734d77899SValeria Barra      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
82886a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
82986a4271fSThilina Rathnayake
83086a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,massf,
8312d50dd3dSjeremylt     $  EXAMPLE_DIR
8322d50dd3dSjeremylt     $  //'bps/bps.h:massf',qf_mass,err)
83334d77899SValeria Barra      call ceedqfunctionaddinput(qf_mass,'u',ncompu,
83486a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
835a2fa7910SValeria Barra      call ceedqfunctionaddinput(qf_mass,'qdata',ncompu,
83686a4271fSThilina Rathnayake     $  ceed_eval_none,err)
83734d77899SValeria Barra      call ceedqfunctionaddoutput(qf_mass,'v',ncompu,
83886a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
83986a4271fSThilina Rathnayake
84086a4271fSThilina RathnayakeC     Create ceed operators
84186a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_setup,
842442e7f0bSjeremylt     $  ceed_qfunction_none,ceed_qfunction_none,op_setup,err)
84386a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'x',erstrctx,
844a8d32208Sjeremylt     $  basisx,ceed_vector_active,err)
84586a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
846a8d32208Sjeremylt     $  basisx,ceed_vector_active,err)
84715910d16Sjeremylt      call ceedoperatorsetfield(op_setup,'weight',
84815910d16Sjeremylt     $  ceed_elemrestriction_none,basisx,ceed_vector_none,err)
849a2fa7910SValeria Barra      call ceedoperatorsetfield(op_setup,'qdata',erstrctw,
850a8d32208Sjeremylt     $  ceed_basis_collocated,ceed_vector_active,err)
85186a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
852a8d32208Sjeremylt     $  basisu,vec_rhs,err)
85386a4271fSThilina Rathnayake
85486a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_mass,
855442e7f0bSjeremylt     $  ceed_qfunction_none,ceed_qfunction_none,op_mass,err)
85686a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_mass,'u',erstrctu,
857a8d32208Sjeremylt     $  basisu,ceed_vector_active,err)
858a2fa7910SValeria Barra      call ceedoperatorsetfield(op_mass,'qdata',erstrctw,
859a8d32208Sjeremylt     $  ceed_basis_collocated,vec_qdata,err)
86086a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_mass,'v',erstrctu,
861a8d32208Sjeremylt     $  basisu,ceed_vector_active,err)
86286a4271fSThilina Rathnayake
86386a4271fSThilina RathnayakeC     Compute setup data
86486a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
86586a4271fSThilina Rathnayake     $  ceed_use_pointer,r1,offset,err)
86686a4271fSThilina Rathnayake      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
86786a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
86886a4271fSThilina Rathnayake
86986a4271fSThilina RathnayakeC     Set up true RHS
87086a4271fSThilina Rathnayake      call dssum         (r1,nx1,ny1,nz1)          ! r1
87186a4271fSThilina Rathnayake
87286a4271fSThilina RathnayakeC     Set up algebraic RHS with libCEED
87386a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,
87486a4271fSThilina Rathnayake     $  ceed_use_pointer,h1,offset,err)
87586a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
87686a4271fSThilina Rathnayake     $  ceed_use_pointer,r2,offset,err)
87786a4271fSThilina Rathnayake      call ceedoperatorapply(op_mass,vec_p1,vec_ap1,
87886a4271fSThilina Rathnayake     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
87986a4271fSThilina Rathnayake      call dssum         (r2,nx1,ny1,nz1)
88086a4271fSThilina Rathnayake
88186a4271fSThilina RathnayakeC     Set up algebraic RHS with Nek5000
88286a4271fSThilina Rathnayake      call axhm1         (pap,r3,h1,h1,h2,'bp1')   ! r3 = A_nek5k*h1
88386a4271fSThilina Rathnayake      call dssum         (r3,nx1,ny1,nz1)
88486a4271fSThilina Rathnayake
88586a4271fSThilina Rathnayake      call nekgsync()
88686a4271fSThilina Rathnayake
88786a4271fSThilina RathnayakeC     Solve true RHS
88886a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED true RHS"
88986a4271fSThilina Rathnayake      tstart = dnekclock()
89086a4271fSThilina Rathnayake      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass,
89186a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
89286a4271fSThilina Rathnayake      tstop  = dnekclock()
89386a4271fSThilina Rathnayake
89486a4271fSThilina RathnayakeC     Output
89586a4271fSThilina Rathnayake      telaps = (tstop-tstart)
89686a4271fSThilina Rathnayake      maxits = maxit
89786a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
89886a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
89986a4271fSThilina Rathnayake
90086a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
90186a4271fSThilina Rathnayake        if (maxit>=100) then
90286a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
90386a4271fSThilina Rathnayake        endif
90486a4271fSThilina Rathnayake        if (dabs(er1)>5e-3) then
90586a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
90686a4271fSThilina Rathnayake        endif
90786a4271fSThilina Rathnayake      endif
90886a4271fSThilina Rathnayake
90986a4271fSThilina Rathnayake      nx     = nx1-1
910e2b2c771Svaleria      nnode  = nelgt            ! nnodes
911e2b2c771Svaleria      nnode  = nnode*(nx**ldim) ! nnodes
912e2b2c771Svaleria      nppp   = nnode/np         ! nnodes/proc
91386a4271fSThilina Rathnayake
91405939c60Sjeremylt      dofps  = nnode/telaps     ! DOF/sec - scalar form
91586a4271fSThilina Rathnayake      titers = telaps/maxits    ! time per iteration
91686a4271fSThilina Rathnayake      tppp_s = titers/nppp      ! time per iteraton per local point
91786a4271fSThilina Rathnayake
91886a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
91905939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
92086a4271fSThilina Rathnayake
92186a4271fSThilina RathnayakeC     Solve libCEED algebraic RHS
92286a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
92386a4271fSThilina Rathnayake      maxit = 100
92486a4271fSThilina Rathnayake      tstart = dnekclock()
92586a4271fSThilina Rathnayake      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass,
92686a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
92786a4271fSThilina Rathnayake      tstop  = dnekclock()
92886a4271fSThilina Rathnayake
92986a4271fSThilina RathnayakeC     Output
93086a4271fSThilina Rathnayake      telaps = (tstop-tstart)
93186a4271fSThilina Rathnayake      maxits = maxit
93286a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
93386a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
93486a4271fSThilina Rathnayake
93586a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
93686a4271fSThilina Rathnayake        if (maxit>=100) then
93786a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
93886a4271fSThilina Rathnayake        endif
93986a4271fSThilina Rathnayake        if (dabs(er1)>1e-5) then
94086a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
94186a4271fSThilina Rathnayake        endif
94286a4271fSThilina Rathnayake      endif
94386a4271fSThilina Rathnayake
94486a4271fSThilina Rathnayake      nx      = nx1-1
945e2b2c771Svaleria      nnode   = nelgt            ! nnodes
946e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
947e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
94886a4271fSThilina Rathnayake
94905939c60Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
95086a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
95186a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
95286a4271fSThilina Rathnayake
95386a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
95405939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
95586a4271fSThilina Rathnayake
95686a4271fSThilina RathnayakeC     Solve Nek5000 algebraic RHS
95786a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
95886a4271fSThilina Rathnayake      maxit = 100
95986a4271fSThilina Rathnayake      tstart = dnekclock()
96086a4271fSThilina Rathnayake      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass,
96186a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
96286a4271fSThilina Rathnayake      tstop  = dnekclock()
96386a4271fSThilina Rathnayake
96486a4271fSThilina RathnayakeC     Output
96586a4271fSThilina Rathnayake      telaps = (tstop-tstart)
96686a4271fSThilina Rathnayake      maxits = maxit
96786a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
96886a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
96986a4271fSThilina Rathnayake
97086a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
97186a4271fSThilina Rathnayake        if (maxit>=100) then
97286a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
97386a4271fSThilina Rathnayake        endif
97486a4271fSThilina Rathnayake        if (dabs(er1)>1e-5) then
97586a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
97686a4271fSThilina Rathnayake        endif
97786a4271fSThilina Rathnayake      endif
97886a4271fSThilina Rathnayake
97986a4271fSThilina Rathnayake      nx      = nx1-1
980e2b2c771Svaleria      nnode   = nelgt            ! nnodes
981e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
982e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
98386a4271fSThilina Rathnayake
98405939c60Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
98586a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
98686a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
98786a4271fSThilina Rathnayake
98886a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
98905939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
99086a4271fSThilina Rathnayake
99186a4271fSThilina Rathnayake    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
99286a4271fSThilina Rathnayake    3 format(i3,i9,e12.4,1x,a8,i9)
99386a4271fSThilina Rathnayake
99486a4271fSThilina RathnayakeC     Destroy ceed handles
99586a4271fSThilina Rathnayake      call ceedvectordestroy(vec_p1,err)
99686a4271fSThilina Rathnayake      call ceedvectordestroy(vec_ap1,err)
99786a4271fSThilina Rathnayake      call ceedvectordestroy(vec_rhs,err)
99886a4271fSThilina Rathnayake      call ceedvectordestroy(vec_qdata,err)
99986a4271fSThilina Rathnayake      call ceedvectordestroy(vec_coords,err)
100086a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctu,err)
100186a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctx,err)
100286a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctw,err)
100386a4271fSThilina Rathnayake      call ceedbasisdestroy(basisu,err)
100486a4271fSThilina Rathnayake      call ceedbasisdestroy(basisx,err)
100586a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_setup,err)
100686a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_mass,err)
100786a4271fSThilina Rathnayake      call ceedoperatordestroy(op_setup,err)
100886a4271fSThilina Rathnayake      call ceedoperatordestroy(op_mass,err)
100986a4271fSThilina Rathnayake      call ceeddestroy(ceed,err)
101086a4271fSThilina Rathnayake
101186a4271fSThilina Rathnayake      return
101286a4271fSThilina Rathnayake      end
101386a4271fSThilina RathnayakeC-----------------------------------------------------------------------
101486a4271fSThilina Rathnayake      subroutine bp3
101586a4271fSThilina RathnayakeC     Solution to BP3 using libCEED
101686a4271fSThilina Rathnayake      include 'SIZE'
101786a4271fSThilina Rathnayake      include 'TOTAL'
101886a4271fSThilina Rathnayake      include 'CTIMER'  ! ifsync
101986a4271fSThilina Rathnayake      include 'FDMH1'
1020*ec3da8bcSJed Brown      include 'ceed/fortran.h'
102186a4271fSThilina Rathnayake
102286a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
102386a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
102486a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
102586a4271fSThilina Rathnayake
102686a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
102786a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
102886a4271fSThilina Rathnayake      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
102986a4271fSThilina Rathnayake      common /vcrny/ e1(lt)
103086a4271fSThilina Rathnayake      common /vcrvh/ h1(lt),h2(ld),pap(3)
103186a4271fSThilina Rathnayake      real*8 coords(ldim*lx*lelt)
103286a4271fSThilina Rathnayake
103386a4271fSThilina Rathnayake      logical ifh3
1034e2b2c771Svaleria      integer*8 nnode
103586a4271fSThilina Rathnayake      integer ceed,err,test
103686a4271fSThilina Rathnayake      character*64 spec
103786a4271fSThilina Rathnayake
103834d77899SValeria Barra      integer p,q,ncompx,ncompu,enode,lnode
103986a4271fSThilina Rathnayake      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
10407509a596Sjeremylt      integer stridesu(3),stridesx(3),stridesw(3)
104186a4271fSThilina Rathnayake      integer erstrctu,erstrctx,erstrctw
104286a4271fSThilina Rathnayake      integer basisu,basisx
104386a4271fSThilina Rathnayake      integer qf_diffusion,qf_setup
104486a4271fSThilina Rathnayake      integer op_diffusion,op_setup
104586a4271fSThilina Rathnayake      integer ii,i,ngeo
104686a4271fSThilina Rathnayake      real*8  x,y,z
104786a4271fSThilina Rathnayake      integer*8 offset
104886a4271fSThilina Rathnayake
104986a4271fSThilina Rathnayake      external diffusionf,diffsetupf
105086a4271fSThilina Rathnayake
105186a4271fSThilina Rathnayake      ifield = 1
105286a4271fSThilina Rathnayake      nxq    = nx1+1
105386a4271fSThilina Rathnayake      n = nx1*ny1*nz1*nelt
105486a4271fSThilina Rathnayake
105586a4271fSThilina Rathnayake      ifsync = .false.
105686a4271fSThilina Rathnayake
105786a4271fSThilina RathnayakeC     Set up coordinates and mask
105886a4271fSThilina Rathnayake      ii=0
105986a4271fSThilina Rathnayake      do j=0,nelt-1
106086a4271fSThilina Rathnayake      do i=1,lx
106186a4271fSThilina Rathnayake        ii=ii+1
106286a4271fSThilina Rathnayake        x = xm1(ii,1,1,1)
106386a4271fSThilina Rathnayake        y = ym1(ii,1,1,1)
106486a4271fSThilina Rathnayake        z = zm1(ii,1,1,1)
106586a4271fSThilina Rathnayake        coords(i+0*lx+3*j*lx)=x
106686a4271fSThilina Rathnayake        coords(i+1*lx+3*j*lx)=y
106786a4271fSThilina Rathnayake        coords(i+2*lx+3*j*lx)=z
106886a4271fSThilina Rathnayake        if ( x.eq.0.or.x.eq.1
106986a4271fSThilina Rathnayake     $   .or.y.eq.0.or.y.eq.1
107086a4271fSThilina Rathnayake     $   .or.z.eq.0.or.z.eq.1 ) then
107186a4271fSThilina Rathnayake          h2(ii)=0.
107286a4271fSThilina Rathnayake        else
107386a4271fSThilina Rathnayake          h2(ii)=1.
107486a4271fSThilina Rathnayake        endif
107586a4271fSThilina Rathnayake      enddo
107686a4271fSThilina Rathnayake      enddo
107786a4271fSThilina Rathnayake
107886a4271fSThilina RathnayakeC     Init ceed library
107986a4271fSThilina Rathnayake      call get_spec(spec)
108086a4271fSThilina Rathnayake      call ceedinit(trim(spec)//char(0),ceed,err)
108186a4271fSThilina Rathnayake
108286a4271fSThilina Rathnayake      call get_test(test)
108386a4271fSThilina Rathnayake
108486a4271fSThilina RathnayakeC     Set up Nek geometry data
108586a4271fSThilina Rathnayake      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
108686a4271fSThilina Rathnayake
108786a4271fSThilina RathnayakeC     Set up true soln
108886a4271fSThilina Rathnayake      call sin_fld_h1    (e1)
108986a4271fSThilina Rathnayake      call xmask1        (e1,h2,nelt)
109086a4271fSThilina Rathnayake      call copy          (h1,e1,n)                 ! Save exact soln in h1
109186a4271fSThilina Rathnayake
109286a4271fSThilina RathnayakeC     Set up solver parameters
109386a4271fSThilina Rathnayake      tol       = 1e-10
109486a4271fSThilina Rathnayake      param(22) = tol
109586a4271fSThilina Rathnayake      maxit     = 100
109686a4271fSThilina Rathnayake
109786a4271fSThilina Rathnayake      call nekgsync()
109886a4271fSThilina Rathnayake
109986a4271fSThilina RathnayakeC     Create ceed basis for mesh and computation
110086a4271fSThilina Rathnayake      p=nx1
110186a4271fSThilina Rathnayake      q=p+1
110234d77899SValeria Barra      ncompu=1
110334d77899SValeria Barra      ncompx=ldim
110434d77899SValeria Barra      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q,
110586a4271fSThilina Rathnayake     $  ceed_gauss,basisx,err)
110634d77899SValeria Barra      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q,
110786a4271fSThilina Rathnayake     $  ceed_gauss,basisu,err)
110886a4271fSThilina Rathnayake
110986a4271fSThilina RathnayakeC     Create ceed element restrictions for mesh and computation
1110e2b2c771Svaleria      enode=p**ldim
111134d77899SValeria Barra      lnode=enode*nelt*ncompu
111286a4271fSThilina Rathnayake      ngeo=(ldim*(ldim+1))/2
11137509a596Sjeremylt      stridesx=[1,enode,enode*ldim]
1114d979a051Sjeremylt      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ldim,
1115d979a051Sjeremylt     $  ldim*lnode,stridesx,erstrctx,err)
11167509a596Sjeremylt      stridesu=[1,enode,enode*ncompu]
1117d979a051Sjeremylt      call ceedelemrestrictioncreatestrided(ceed,nelt,enode,ncompu,
1118d979a051Sjeremylt     $  ncompu*lnode,stridesu,erstrctu,err)
11197509a596Sjeremylt      stridesw=[1,q**ldim,ngeo*q**ldim]
11207509a596Sjeremylt      call ceedelemrestrictioncreatestrided(ceed,nelt,q**ldim,
1121d979a051Sjeremylt     $  ngeo,nelt*q**ldim*ngeo,stridesw,erstrctw,err)
112286a4271fSThilina Rathnayake
112386a4271fSThilina RathnayakeC     Create ceed vectors
1124e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_p1,err)
1125e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
1126e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
112786a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
112886a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err)
112986a4271fSThilina Rathnayake
113086a4271fSThilina Rathnayake      offset=0
113186a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_coords,ceed_mem_host,
113286a4271fSThilina Rathnayake     $  ceed_use_pointer,coords,offset,err)
113386a4271fSThilina Rathnayake
113486a4271fSThilina RathnayakeC     Create ceed qfunctions for diffsetupf and diffusionf
113586a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,diffsetupf,
11362d50dd3dSjeremylt     $  EXAMPLE_DIR
11372d50dd3dSjeremylt     $  //'bps/bps.h:diffsetupf'//char(0),qf_setup,err)
113834d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
113986a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
114034d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
114186a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
114234d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
114386a4271fSThilina Rathnayake     $  ceed_eval_weight,err)
1144a2fa7910SValeria Barra      call ceedqfunctionaddoutput(qf_setup,'qdata',ngeo,
114586a4271fSThilina Rathnayake     $  ceed_eval_none,err)
114634d77899SValeria Barra      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
114786a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
114886a4271fSThilina Rathnayake
114986a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,diffusionf,
11502d50dd3dSjeremylt     $  EXAMPLE_DIR
11512d50dd3dSjeremylt     $  //'bps/bps.h:diffusionf'//char(0),qf_diffusion,err)
115234d77899SValeria Barra      call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim,
115386a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
1154a2fa7910SValeria Barra      call ceedqfunctionaddinput(qf_diffusion,'qdata',ngeo,
115586a4271fSThilina Rathnayake     $  ceed_eval_none,err)
115634d77899SValeria Barra      call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim,
115786a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
115886a4271fSThilina Rathnayake
115986a4271fSThilina RathnayakeC     Create ceed operators
116086a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_setup,
1161442e7f0bSjeremylt     $  ceed_qfunction_none,ceed_qfunction_none,op_setup,err)
116286a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'x',erstrctx,
1163a8d32208Sjeremylt     $  basisx,ceed_vector_active,err)
116486a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
1165a8d32208Sjeremylt     $  basisx,ceed_vector_active,err)
116615910d16Sjeremylt      call ceedoperatorsetfield(op_setup,'weight',
116715910d16Sjeremylt     $  ceed_elemrestriction_none,basisx,ceed_vector_none,err)
1168a2fa7910SValeria Barra      call ceedoperatorsetfield(op_setup,'qdata',erstrctw,
1169a8d32208Sjeremylt     $  ceed_basis_collocated,ceed_vector_active,err)
117086a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
1171a8d32208Sjeremylt     $  basisu,vec_rhs,err)
117286a4271fSThilina Rathnayake
117386a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_diffusion,
117461dbc9d2Sjeremylt     $  ceed_qfunction_none,ceed_qfunction_none,op_diffusion,err)
117586a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_diffusion,'u',erstrctu,
1176a8d32208Sjeremylt     $  basisu,ceed_vector_active,err)
1177a2fa7910SValeria Barra      call ceedoperatorsetfield(op_diffusion,'qdata',erstrctw,
1178a8d32208Sjeremylt     $  ceed_basis_collocated,vec_qdata,err)
117986a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_diffusion,'v',erstrctu,
1180a8d32208Sjeremylt     $  basisu,ceed_vector_active,err)
118186a4271fSThilina Rathnayake
118286a4271fSThilina RathnayakeC     Compute setup data
118386a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
118486a4271fSThilina Rathnayake     $  ceed_use_pointer,r1,offset,err)
118586a4271fSThilina Rathnayake      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
118686a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
118786a4271fSThilina Rathnayake
118886a4271fSThilina RathnayakeC     Set up true RHS
118986a4271fSThilina Rathnayake      call dssum         (r1,nx1,ny1,nz1)          ! r1
119086a4271fSThilina Rathnayake      call xmask1        (r1,h2,nelt)
119186a4271fSThilina Rathnayake
119286a4271fSThilina RathnayakeC     Set up algebraic RHS with libCEED
119386a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,
119486a4271fSThilina Rathnayake     $  ceed_use_pointer,h1,offset,err)
119586a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
119686a4271fSThilina Rathnayake     $  ceed_use_pointer,r2,offset,err)
119786a4271fSThilina Rathnayake      call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1,
119886a4271fSThilina Rathnayake     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
119986a4271fSThilina Rathnayake      call dssum         (r2,nx1,ny1,nz1)
120086a4271fSThilina Rathnayake      call xmask1        (r2,h2,nelt)
120186a4271fSThilina Rathnayake
120286a4271fSThilina RathnayakeC     Set up algebraic RHS with Nek5000
120386a4271fSThilina Rathnayake      call axhm1         (pap,r3,h1,h1,h2,'bp3')   ! r3 = A_nek5k*h1
120486a4271fSThilina Rathnayake      call dssum         (r3,nx1,ny1,nz1)
120586a4271fSThilina Rathnayake      call xmask1        (r3,h2,nelt)
120686a4271fSThilina Rathnayake
120786a4271fSThilina Rathnayake      call nekgsync()
120886a4271fSThilina Rathnayake
120986a4271fSThilina RathnayakeC     Solve true RHS
121086a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED true RHS"
121186a4271fSThilina Rathnayake      tstart = dnekclock()
121286a4271fSThilina Rathnayake      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
121386a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
121486a4271fSThilina Rathnayake      tstop  = dnekclock()
121586a4271fSThilina Rathnayake
121686a4271fSThilina RathnayakeC     Output
121786a4271fSThilina Rathnayake      telaps = (tstop-tstart)
121886a4271fSThilina Rathnayake      maxits = maxit
121986a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
122086a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
122186a4271fSThilina Rathnayake
122286a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
122386a4271fSThilina Rathnayake        if (maxit>=100) then
122486a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
122586a4271fSThilina Rathnayake        endif
122686a4271fSThilina Rathnayake        if (dabs(er1)>1e-3) then
122786a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
122886a4271fSThilina Rathnayake        endif
122986a4271fSThilina Rathnayake      endif
123086a4271fSThilina Rathnayake
123186a4271fSThilina Rathnayake      nx      = nx1-1
1232e2b2c771Svaleria      nnode   = nelgt            ! nnodes
1233e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
1234e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
123586a4271fSThilina Rathnayake
123605939c60Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
123786a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
123886a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
123986a4271fSThilina Rathnayake
124086a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
124105939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
124286a4271fSThilina Rathnayake
124386a4271fSThilina RathnayakeC     Solve libCEED algebraic RHS
124486a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
124586a4271fSThilina Rathnayake      maxit = 100
124686a4271fSThilina Rathnayake      tstart = dnekclock()
124786a4271fSThilina Rathnayake      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
124886a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
124986a4271fSThilina Rathnayake      tstop  = dnekclock()
125086a4271fSThilina Rathnayake
125186a4271fSThilina RathnayakeC     Output
125286a4271fSThilina Rathnayake      telaps = (tstop-tstart)
125386a4271fSThilina Rathnayake      maxits = maxit
125486a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
125586a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
125686a4271fSThilina Rathnayake
125786a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
125886a4271fSThilina Rathnayake        if (maxit>=100) then
125986a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
126086a4271fSThilina Rathnayake        endif
126186a4271fSThilina Rathnayake        if (dabs(er1)>1e-9) then
126286a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
126386a4271fSThilina Rathnayake        endif
126486a4271fSThilina Rathnayake      endif
126586a4271fSThilina Rathnayake
126686a4271fSThilina Rathnayake      nx      = nx1-1
1267e2b2c771Svaleria      nnode   = nelgt            ! nnodes
1268e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
1269e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
127086a4271fSThilina Rathnayake
127105939c60Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
127286a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
127386a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
127486a4271fSThilina Rathnayake
127586a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
127605939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
127786a4271fSThilina Rathnayake
127886a4271fSThilina RathnayakeC     Solve Nek5000 algebraic RHS
127986a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
128086a4271fSThilina Rathnayake      maxit = 100
128186a4271fSThilina Rathnayake      tstart = dnekclock()
128286a4271fSThilina Rathnayake      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
128386a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
128486a4271fSThilina Rathnayake      tstop  = dnekclock()
128586a4271fSThilina Rathnayake
128686a4271fSThilina RathnayakeC     Output
128786a4271fSThilina Rathnayake      telaps = (tstop-tstart)
128886a4271fSThilina Rathnayake      maxits = maxit
128986a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
129086a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
129186a4271fSThilina Rathnayake
129286a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
129386a4271fSThilina Rathnayake        if (maxit>=100) then
129486a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
129586a4271fSThilina Rathnayake        endif
129686a4271fSThilina Rathnayake        if (dabs(er1)>1e-9) then
129786a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
129886a4271fSThilina Rathnayake        endif
129986a4271fSThilina Rathnayake      endif
130086a4271fSThilina Rathnayake
130186a4271fSThilina Rathnayake      nx      = nx1-1
1302e2b2c771Svaleria      nnode   = nelgt            ! nnodes
1303e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
1304e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
130586a4271fSThilina Rathnayake
130661dbc9d2Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
130786a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
130886a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
130986a4271fSThilina Rathnayake
131086a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
131161dbc9d2Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
131286a4271fSThilina Rathnayake
131386a4271fSThilina Rathnayake    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
131486a4271fSThilina Rathnayake    3 format(i3,i9,e12.4,1x,a8,i9)
131586a4271fSThilina Rathnayake
131686a4271fSThilina RathnayakeC     Destroy ceed handles
131786a4271fSThilina Rathnayake      call ceedvectordestroy(vec_p1,err)
131886a4271fSThilina Rathnayake      call ceedvectordestroy(vec_ap1,err)
131986a4271fSThilina Rathnayake      call ceedvectordestroy(vec_rhs,err)
132086a4271fSThilina Rathnayake      call ceedvectordestroy(vec_qdata,err)
132186a4271fSThilina Rathnayake      call ceedvectordestroy(vec_coords,err)
132286a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctu,err)
132386a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctx,err)
132486a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctw,err)
132586a4271fSThilina Rathnayake      call ceedbasisdestroy(basisu,err)
132686a4271fSThilina Rathnayake      call ceedbasisdestroy(basisx,err)
132786a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_setup,err)
132886a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_diffusion,err)
132986a4271fSThilina Rathnayake      call ceedoperatordestroy(op_setup,err)
133086a4271fSThilina Rathnayake      call ceedoperatordestroy(op_diffusion,err)
133186a4271fSThilina Rathnayake      call ceeddestroy(ceed,err)
133286a4271fSThilina Rathnayake
133386a4271fSThilina Rathnayake      return
133486a4271fSThilina Rathnayake      end
133586a4271fSThilina RathnayakeC-----------------------------------------------------------------------
133686a4271fSThilina Rathnayake      subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,
133786a4271fSThilina Rathnayake     $  vec_ap1,maxit,bpname)
133886a4271fSThilina RathnayakeC     Scalar conjugate gradient iteration for solution of uncoupled
133986a4271fSThilina RathnayakeC     Helmholtz equations
134086a4271fSThilina RathnayakeC     Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname
134186a4271fSThilina RathnayakeC     Output: u1,maxit
134286a4271fSThilina Rathnayake      include 'SIZE'
134386a4271fSThilina Rathnayake      include 'TOTAL'
134486a4271fSThilina Rathnayake      include 'DOMAIN'
134586a4271fSThilina Rathnayake      include 'FDMH1'
134686a4271fSThilina Rathnayake      character*3 bpname
134786a4271fSThilina Rathnayake
134886a4271fSThilina RathnayakeC     INPUT:  rhs1 - rhs
134986a4271fSThilina RathnayakeC             h1   - exact solution
135086a4271fSThilina Rathnayake
135186a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
135286a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
135386a4271fSThilina Rathnayake      real*8 u1(lt),r1(lt),h1(lt),h2(lt)
135486a4271fSThilina Rathnayake      real*8 rmult(1),binv(1)
135586a4271fSThilina Rathnayake      integer ceed,ceed_op,vec_ap1,vec_p1
135686a4271fSThilina Rathnayake      common /scrcg/ dpc(lt),p1(lt),z1(lt)
135786a4271fSThilina Rathnayake      common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4)
135886a4271fSThilina Rathnayake
135986a4271fSThilina Rathnayake      real*8 ap1(lt)
136086a4271fSThilina Rathnayake      equivalence (ap1,z1)
136186a4271fSThilina Rathnayake
136286a4271fSThilina Rathnayake      vol   = volfld(ifield)
136386a4271fSThilina Rathnayake      nel   = nelfld(ifield)
136486a4271fSThilina Rathnayake      nxyz  = lx1*ly1*lz1
136586a4271fSThilina Rathnayake      n     = nxyz*nel
136686a4271fSThilina Rathnayake      nx    = nx1-1                  ! Polynomial order (just for i/o)
136786a4271fSThilina Rathnayake
136886a4271fSThilina Rathnayake      tol=tin
136986a4271fSThilina Rathnayake
137086a4271fSThilina Rathnayake      if(bpname.ne.'bp1') then
137186a4271fSThilina Rathnayake        call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner
137286a4271fSThilina Rathnayake      else
137386a4271fSThilina Rathnayake        call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner
137486a4271fSThilina Rathnayake      endif
137586a4271fSThilina Rathnayake
137686a4271fSThilina Rathnayake      call rzero         (u1,n)      ! Initialize solution
137786a4271fSThilina Rathnayake
137886a4271fSThilina Rathnayake      wv(1)=0
137986a4271fSThilina Rathnayake      do i=1,n
138086a4271fSThilina Rathnayake         s=rmult(i)                  !      -1
138186a4271fSThilina Rathnayake         p1(i)=dpc(i)*r1(i)          ! p = M  r      T
138286a4271fSThilina Rathnayake         wv(1)=wv(1)+s*p1(i)*r1(i)   !              r p
138386a4271fSThilina Rathnayake      enddo
138486a4271fSThilina Rathnayake      call gop(wv(1),wk,'+  ',1)
138586a4271fSThilina Rathnayake      rpp1(1) = wv  (1)
138686a4271fSThilina Rathnayake
138786a4271fSThilina Rathnayake      do 1000 iter=1,maxit
138886a4271fSThilina Rathnayake         call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op,
138986a4271fSThilina Rathnayake     $     vec_ap1,vec_p1)
139086a4271fSThilina Rathnayake         call dssum    (ap1,nx1,ny1,nz1)
139186a4271fSThilina Rathnayake         if (bpname.ne.'bp1') call xmask1(ap1,h2,nel)
139286a4271fSThilina Rathnayake
139386a4271fSThilina Rathnayake         call gop      (pap,wk,'+  ',1)
139486a4271fSThilina Rathnayake         alph(1) = rpp1(1)/pap(1)
139586a4271fSThilina Rathnayake
139686a4271fSThilina Rathnayake         do i=1,n
139786a4271fSThilina Rathnayake            u1(i)=u1(i)+alph(1)* p1(i)
139886a4271fSThilina Rathnayake            r1(i)=r1(i)-alph(1)*ap1(i)
139986a4271fSThilina Rathnayake         enddo
140086a4271fSThilina Rathnayake
140186a4271fSThilina RathnayakeC        tolerance check here
140286a4271fSThilina Rathnayake         call rzero(wv,2)
140386a4271fSThilina Rathnayake         do i=1,n
140486a4271fSThilina Rathnayake            wv(1)=wv(1)+r1(i)*r1(i)            ! L2 error estimate
140586a4271fSThilina Rathnayake            z1(i)=dpc(i)*r1(i)                 ! z = M  r
140686a4271fSThilina Rathnayake            wv(2)=wv(2)+rmult(i)*z1(i)*r1(i)   ! r z
140786a4271fSThilina Rathnayake         enddo
140886a4271fSThilina Rathnayake         call gop(wv,wk,'+  ',2)
140986a4271fSThilina Rathnayake
141086a4271fSThilina RathnayakeC        if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1)
141186a4271fSThilina Rathnayake  1     format(i2,i9,i5,i4,1p1e12.4,' cggos')
141286a4271fSThilina Rathnayake
141386a4271fSThilina Rathnayake         enorm=sqrt(wv(1))
141486a4271fSThilina Rathnayake         if (enorm.lt.tol) then
141586a4271fSThilina Rathnayake            ifin = iter
141686a4271fSThilina Rathnayake            if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol
141786a4271fSThilina Rathnayake            goto 9999
141886a4271fSThilina Rathnayake         endif
141986a4271fSThilina RathnayakeC        if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha'
142086a4271fSThilina Rathnayake 2      format(i5,1p3e12.4,2x,a5)
142186a4271fSThilina Rathnayake
142286a4271fSThilina Rathnayake         rpp2(1)=rpp1(1)
142386a4271fSThilina Rathnayake         rpp1(1)=wv  (2)
142486a4271fSThilina Rathnayake         beta1  =rpp1(1)/rpp2(1)
142586a4271fSThilina Rathnayake         do i=1,n
142686a4271fSThilina Rathnayake            p1(i)=z1(i) + beta1*p1(i)
142786a4271fSThilina Rathnayake         enddo
142886a4271fSThilina Rathnayake
142986a4271fSThilina Rathnayake 1000 continue
143086a4271fSThilina Rathnayake
143186a4271fSThilina Rathnayake      rbnorm=sqrt(wv(1))
143286a4271fSThilina Rathnayake      if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol
143386a4271fSThilina Rathnayake      iter = iter-1
143486a4271fSThilina Rathnayake
143586a4271fSThilina Rathnayake 9999 continue
143686a4271fSThilina Rathnayake
143786a4271fSThilina Rathnayake      maxit=iter
143886a4271fSThilina Rathnayake
143986a4271fSThilina Rathnayake 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4)
144086a4271fSThilina Rathnayake 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6)
144186a4271fSThilina Rathnayake
144286a4271fSThilina Rathnayake      return
144386a4271fSThilina Rathnayake      end
144486a4271fSThilina RathnayakeC-----------------------------------------------------------------------
144586a4271fSThilina Rathnayake      subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op,
144686a4271fSThilina Rathnayake     $  vec_ap1,vec_p1)
144786a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of uncoupled
144886a4271fSThilina RathnayakeC     Helmholtz equations
144986a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1
145086a4271fSThilina RathnayakeC     Output: ap1
145186a4271fSThilina Rathnayake      include 'SIZE'
145286a4271fSThilina Rathnayake      include 'TOTAL'
1453*ec3da8bcSJed Brown      include 'ceed/fortran.h'
145486a4271fSThilina Rathnayake
145586a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
145686a4271fSThilina Rathnayake      real*8       gf(lg,lx,lelt)             ! Equivalence new gf() data
145786a4271fSThilina Rathnayake      equivalence (gf,g1m1)                   ! layout to g1m1...g6m1
145886a4271fSThilina Rathnayake
145986a4271fSThilina Rathnayake      real*8   pap(3)
146086a4271fSThilina Rathnayake      real*8   ap1(lx,lelt)
146186a4271fSThilina Rathnayake      real*8    p1(lx,lelt)
146286a4271fSThilina Rathnayake      real*8    h1(lx,lelt),h2(lx,lelt)
146386a4271fSThilina Rathnayake      integer ceed,ceed_op,vec_ap1,vec_p1,err
146486a4271fSThilina Rathnayake      integer i,e
146586a4271fSThilina Rathnayake      integer*8 offset
146686a4271fSThilina Rathnayake
146786a4271fSThilina Rathnayake      offset=0
146886a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer,
146986a4271fSThilina Rathnayake     $  p1,offset,err)
147086a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer,
147186a4271fSThilina Rathnayake     $  ap1,offset,err)
147286a4271fSThilina Rathnayake
147386a4271fSThilina Rathnayake      call ceedoperatorapply(ceed_op,vec_p1,vec_ap1,
147486a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
147586a4271fSThilina Rathnayake
14766a6c615bSJeremy L Thompson      call ceedvectortakearray(vec_p1,ceed_mem_host,0,offset,err)
14776a6c615bSJeremy L Thompson      call ceedvectortakearray(vec_ap1,ceed_mem_host,0,offset,err)
147886a4271fSThilina Rathnayake
147986a4271fSThilina Rathnayake      pap(1)=0.
148086a4271fSThilina Rathnayake
148186a4271fSThilina Rathnayake      do e=1,nelt
148286a4271fSThilina Rathnayake         do i=1,lx
148386a4271fSThilina Rathnayake           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
148486a4271fSThilina Rathnayake         enddo
148586a4271fSThilina Rathnayake      enddo
148686a4271fSThilina Rathnayake
148786a4271fSThilina Rathnayake      return
148886a4271fSThilina Rathnayake      end
148986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
149086a4271fSThilina Rathnayake      subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut)
149186a4271fSThilina RathnayakeC     Local matrix-vector for solution of BP3 (stiffness matrix)
149286a4271fSThilina RathnayakeC     Input: u,g,h1,h2,b,ju,us,ut   Output: w
149386a4271fSThilina Rathnayake      include 'SIZE'
149486a4271fSThilina Rathnayake      include 'TOTAL'
149586a4271fSThilina Rathnayake
149686a4271fSThilina Rathnayake      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
149786a4271fSThilina Rathnayake      real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz)
149886a4271fSThilina Rathnayake      real*8 ju(lxyz),us(lxyz),ut(lxyz)
149986a4271fSThilina Rathnayake
150086a4271fSThilina Rathnayake      nxq = nx1+1 ! Number of quadrature points
150186a4271fSThilina Rathnayake
150286a4271fSThilina Rathnayake      lxyzq = nxq**ldim
150386a4271fSThilina Rathnayake
150486a4271fSThilina Rathnayake      call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation
150586a4271fSThilina Rathnayake      do i=1,lxyzq
150686a4271fSThilina Rathnayake         ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts
150786a4271fSThilina Rathnayake      enddo
150886a4271fSThilina Rathnayake      call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u
150986a4271fSThilina Rathnayake
151086a4271fSThilina Rathnayake      return
151186a4271fSThilina Rathnayake      end
151286a4271fSThilina RathnayakeC-----------------------------------------------------------------------
151386a4271fSThilina Rathnayake      subroutine axhm1_bp1(pap,ap1,p1,h1,h2)
151486a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of BP1 (mass matrix)
151586a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2           Output: ap1
151686a4271fSThilina Rathnayake      include 'SIZE'
151786a4271fSThilina Rathnayake      include 'TOTAL'
151886a4271fSThilina Rathnayake
151986a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
152086a4271fSThilina Rathnayake      real*8         gf(lg,lx,lelt)             ! Equivalence new gf() data
152186a4271fSThilina Rathnayake      equivalence (gf,g1m1)                     ! layout to g1m1...g6m1
152286a4271fSThilina Rathnayake
152386a4271fSThilina Rathnayake      real*8 pap(3)
152486a4271fSThilina Rathnayake      real*8 ap1(lx,lelt)
152586a4271fSThilina Rathnayake      real*8  p1(lx,lelt)
152686a4271fSThilina Rathnayake      real*8  h1(lx,lelt),h2(lx,lelt)
152786a4271fSThilina Rathnayake
152886a4271fSThilina Rathnayake      real*8 ur(lx),us(lx),ut(lx)
152986a4271fSThilina Rathnayake      common /ctmp1/ ur,us,ut
153086a4271fSThilina Rathnayake
153186a4271fSThilina Rathnayake      integer e
153286a4271fSThilina Rathnayake
153386a4271fSThilina Rathnayake      pap(1)=0.
153486a4271fSThilina Rathnayake
153586a4271fSThilina Rathnayake      k=1
153686a4271fSThilina Rathnayake      nxq = nx1+1
153786a4271fSThilina Rathnayake
153886a4271fSThilina Rathnayake      do e=1,nelt
153986a4271fSThilina Rathnayake
154086a4271fSThilina Rathnayake         call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1)
154186a4271fSThilina Rathnayake     $                                          ,bm1(1,1,1,e),ur,us,ut)
154286a4271fSThilina Rathnayake         do i=1,lx
154386a4271fSThilina Rathnayake           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
154486a4271fSThilina Rathnayake         enddo
154586a4271fSThilina Rathnayake         k=k+nxq*nxq*nxq
154686a4271fSThilina Rathnayake
154786a4271fSThilina Rathnayake      enddo
154886a4271fSThilina Rathnayake
154986a4271fSThilina Rathnayake      return
155086a4271fSThilina Rathnayake      end
155186a4271fSThilina RathnayakeC-----------------------------------------------------------------------
155286a4271fSThilina Rathnayake      subroutine ax_e_bp3(w,u,g,ur,us,ut,wk)
155386a4271fSThilina RathnayakeC     Local matrix-vector for solution of BP3 (stiffness matrix)
155486a4271fSThilina RathnayakeC     Input: u,g,ur,us,ut,wk        Output: w
155586a4271fSThilina Rathnayake      include 'SIZE'
155686a4271fSThilina Rathnayake      include 'TOTAL'
155786a4271fSThilina Rathnayake
155886a4271fSThilina Rathnayake      parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq)
155986a4271fSThilina Rathnayake
156086a4271fSThilina Rathnayake      common /ctmp0/ tmp(lxyzq)
156186a4271fSThilina Rathnayake      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
156286a4271fSThilina Rathnayake
156386a4271fSThilina Rathnayake      real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq)
156486a4271fSThilina Rathnayake      real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq)
156586a4271fSThilina Rathnayake
156686a4271fSThilina Rathnayake      n = lzq-1
156786a4271fSThilina Rathnayake
156886a4271fSThilina Rathnayake      call intp_rstd  (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation
156986a4271fSThilina Rathnayake      call loc_grad3  (ur,us,ut,wk,n,dxmq,dxtmq)
157086a4271fSThilina Rathnayake
157186a4271fSThilina Rathnayake      do i=1,lxyzq
157286a4271fSThilina Rathnayake         wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i)
157386a4271fSThilina Rathnayake         ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i)
157486a4271fSThilina Rathnayake         wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i)
157586a4271fSThilina Rathnayake         ur(i) = wr
157686a4271fSThilina Rathnayake         us(i) = ws
157786a4271fSThilina Rathnayake         ut(i) = wt
157886a4271fSThilina Rathnayake      enddo
157986a4271fSThilina Rathnayake
158086a4271fSThilina Rathnayake      call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp)
158186a4271fSThilina Rathnayake      call intp_rstd  (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u
158286a4271fSThilina Rathnayake
158386a4271fSThilina Rathnayake      return
158486a4271fSThilina Rathnayake      end
158586a4271fSThilina RathnayakeC-----------------------------------------------------------------------
158686a4271fSThilina Rathnayake      subroutine axhm1_bp3(pap,ap1,p1,h1,h2)
158786a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of BP3 (stiffness matrix)
158886a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2           Output: ap1
158986a4271fSThilina Rathnayake      include 'SIZE'
159086a4271fSThilina Rathnayake      include 'TOTAL'
159186a4271fSThilina Rathnayake
159286a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
159386a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
159486a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq)
159586a4271fSThilina Rathnayake
159686a4271fSThilina Rathnayake      real*8 pap(3)
159786a4271fSThilina Rathnayake      real*8 ap1(lx,lelt)
159886a4271fSThilina Rathnayake      real*8  p1(lx,lelt)
159986a4271fSThilina Rathnayake      real*8  h1(lx,lelt),h2(lx,lelt)
160086a4271fSThilina Rathnayake
160186a4271fSThilina Rathnayake      common /ctmp1/ ur,us,ut,wk
160286a4271fSThilina Rathnayake      real*8 ur(lq),us(lq),ut(lq),wk(lq)
160386a4271fSThilina Rathnayake
160486a4271fSThilina Rathnayake      integer e
160586a4271fSThilina Rathnayake
160686a4271fSThilina Rathnayake      pap(1)=0.
160786a4271fSThilina Rathnayake
160886a4271fSThilina Rathnayake      do e=1,nelt
160986a4271fSThilina Rathnayake
161086a4271fSThilina Rathnayake         call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk)
161186a4271fSThilina Rathnayake         do i=1,lx
161286a4271fSThilina Rathnayake           pap(1)=pap(1)+p1(i,e)*ap1(i,e)
161386a4271fSThilina Rathnayake         enddo
161486a4271fSThilina Rathnayake
161586a4271fSThilina Rathnayake      enddo
161686a4271fSThilina Rathnayake
161786a4271fSThilina Rathnayake      return
161886a4271fSThilina Rathnayake      end
161986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
162086a4271fSThilina Rathnayake      subroutine axhm1(pap,ap1,p1,h1,h2,bpname)
162186a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of uncoupled
162286a4271fSThilina RathnayakeC     Helmholtz equations
162386a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2,bpname    Output: ap1
162486a4271fSThilina Rathnayake      include 'SIZE'
162586a4271fSThilina Rathnayake      include 'TOTAL'
162686a4271fSThilina Rathnayake
162786a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1)
162886a4271fSThilina Rathnayake
162986a4271fSThilina Rathnayake      real*8 pap(3),ap1(lx,lelt),p1(lx,lelt)
163086a4271fSThilina Rathnayake      real*8 h1(lx,lelt),h2(lx,lelt)
163186a4271fSThilina Rathnayake
163286a4271fSThilina Rathnayake      character*3 bpname
163386a4271fSThilina Rathnayake
163486a4271fSThilina Rathnayake      if (bpname.eq.'bp1') then
163586a4271fSThilina Rathnayake         call axhm1_bp1(pap,ap1,p1,h1,h2)
163686a4271fSThilina Rathnayake
163786a4271fSThilina Rathnayake      elseif (bpname.eq.'bp3') then
163886a4271fSThilina Rathnayake         call axhm1_bp3(pap,ap1,p1,h1,h2)
163986a4271fSThilina Rathnayake
164086a4271fSThilina Rathnayake      else
164186a4271fSThilina Rathnayake         write(6,*) bpname,' axhm1 bpname error'
164286a4271fSThilina Rathnayake         stop
164386a4271fSThilina Rathnayake
164486a4271fSThilina Rathnayake      endif
164586a4271fSThilina Rathnayake
164686a4271fSThilina Rathnayake      return
164786a4271fSThilina Rathnayake      end
164886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
164986a4271fSThilina Rathnayake      subroutine get_bp(bp)
165086a4271fSThilina RathnayakeC     Get BP to run
165186a4271fSThilina RathnayakeC     Input:                        Output: bp
165286a4271fSThilina Rathnayake      integer i,bp
165386a4271fSThilina Rathnayake      character*64 bpval
165486a4271fSThilina Rathnayake
165586a4271fSThilina Rathnayake      bp=0
165686a4271fSThilina Rathnayake      if(iargc().ge.1) then
165786a4271fSThilina Rathnayake        call getarg(1,bpval)
165886a4271fSThilina Rathnayake      endif
165986a4271fSThilina Rathnayake      if(bpval.eq."bp1") then
166086a4271fSThilina Rathnayake        bp=1
166186a4271fSThilina Rathnayake      elseif(bpval.eq."bp3") then
166286a4271fSThilina Rathnayake        bp=3
166386a4271fSThilina Rathnayake      endif
166486a4271fSThilina Rathnayake
166586a4271fSThilina Rathnayake      return
166686a4271fSThilina Rathnayake      end
166786a4271fSThilina RathnayakeC-----------------------------------------------------------------------
166886a4271fSThilina Rathnayake      subroutine get_spec(spec)
166986a4271fSThilina RathnayakeC     Get CEED backend specification
167086a4271fSThilina RathnayakeC     Input:                        Output: spec
167186a4271fSThilina Rathnayake      integer i
167286a4271fSThilina Rathnayake      character*64 spec
167386a4271fSThilina Rathnayake
167486a4271fSThilina Rathnayake      spec = '/cpu/self'
167586a4271fSThilina Rathnayake      if(iargc().ge.2) then
167686a4271fSThilina Rathnayake        call getarg(2,spec)
167786a4271fSThilina Rathnayake      endif
167886a4271fSThilina Rathnayake
167986a4271fSThilina Rathnayake      return
168086a4271fSThilina Rathnayake      end
168186a4271fSThilina RathnayakeC-----------------------------------------------------------------------
168286a4271fSThilina Rathnayake      subroutine get_test(test)
168386a4271fSThilina RathnayakeC     Get test mode flag
168486a4271fSThilina RathnayakeC     Input:                        Output: test
168586a4271fSThilina Rathnayake      integer i,test
168686a4271fSThilina Rathnayake      character*64 testval
168786a4271fSThilina Rathnayake
168886a4271fSThilina Rathnayake      test=0
168986a4271fSThilina Rathnayake      if(iargc().ge.3) then
169086a4271fSThilina Rathnayake        call getarg(3,testval)
169186a4271fSThilina Rathnayake      endif
169286a4271fSThilina Rathnayake      if(testval.eq."test") then
169386a4271fSThilina Rathnayake        test=1
169486a4271fSThilina Rathnayake      endif
169586a4271fSThilina Rathnayake
169686a4271fSThilina Rathnayake      return
169786a4271fSThilina Rathnayake      end
169886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
169961dbc9d2Sjeremylt
1700