xref: /libCEED/examples/nek/bps/bps.usr (revision ee07ded2da68b589c1255126ba09102d164f666f)
186a4271fSThilina RathnayakeC Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
286a4271fSThilina RathnayakeC the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
386a4271fSThilina RathnayakeC reserved. See files LICENSE and NOTICE for details.
486a4271fSThilina RathnayakeC
586a4271fSThilina RathnayakeC This file is part of CEED, a collection of benchmarks, miniapps, software
686a4271fSThilina RathnayakeC libraries and APIs for efficient high-order finite element and spectral
786a4271fSThilina RathnayakeC element discretizations for exascale applications. For more information and
886a4271fSThilina RathnayakeC source code availability see http://github.com/ceed.
986a4271fSThilina RathnayakeC
1086a4271fSThilina RathnayakeC The CEED research is supported by the Exascale Computing Project (17-SC-20-SC)
1186a4271fSThilina RathnayakeC a collaborative effort of two U.S. Department of Energy organizations (Office
1286a4271fSThilina RathnayakeC of Science and the National Nuclear Security Administration) responsible for
1386a4271fSThilina RathnayakeC the planning and preparation of a capable exascale ecosystem, including
1486a4271fSThilina RathnayakeC software, applications, hardware, advanced system engineering and early
1586a4271fSThilina RathnayakeC testbed platforms, in support of the nation's exascale computing imperative.
1686a4271fSThilina Rathnayake
1786a4271fSThilina RathnayakeC> @file
1886a4271fSThilina RathnayakeC> Mass and diffusion operators examples using Nek5000
1986a4271fSThilina RathnayakeC TESTARGS -c {ceed_resource} -e bp1 -n 1 -b 4 -test
2086a4271fSThilina Rathnayake
2186a4271fSThilina RathnayakeC-----------------------------------------------------------------------
2286a4271fSThilina Rathnayake      subroutine masssetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
2386a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
2486a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
2586a4271fSThilina RathnayakeC     Set up mass operator
2686a4271fSThilina RathnayakeC     Input: u1,u2,u3,q             Output: v1,v2,ierr
2786a4271fSThilina Rathnayake      integer q,ierr
2886a4271fSThilina Rathnayake      real*8 ctx(1)
2986a4271fSThilina Rathnayake      real*8 u1(3*q)
3086a4271fSThilina Rathnayake      real*8 u2(9*q)
3186a4271fSThilina Rathnayake      real*8 u3(q)
3286a4271fSThilina Rathnayake      real*8 v1(q)
3386a4271fSThilina Rathnayake      real*8 v2(q)
3486a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
3586a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
3686a4271fSThilina Rathnayake      real*8 jacmq
3786a4271fSThilina Rathnayake
38*ee07ded2SValeria BarraC     Quadrature Point Loop
3986a4271fSThilina Rathnayake      do i=1,q
4086a4271fSThilina Rathnayake        a11=u2(i+q*0)
4186a4271fSThilina Rathnayake        a12=u2(i+q*3)
4286a4271fSThilina Rathnayake        a13=u2(i+q*6)
4386a4271fSThilina Rathnayake
4486a4271fSThilina Rathnayake        a21=u2(i+q*1)
4586a4271fSThilina Rathnayake        a22=u2(i+q*4)
4686a4271fSThilina Rathnayake        a23=u2(i+q*7)
4786a4271fSThilina Rathnayake
4886a4271fSThilina Rathnayake        a31=u2(i+q*2)
4986a4271fSThilina Rathnayake        a32=u2(i+q*5)
5086a4271fSThilina Rathnayake        a33=u2(i+q*8)
5186a4271fSThilina Rathnayake
5286a4271fSThilina Rathnayake        g11 = (a22*a33-a23*a32)
5386a4271fSThilina Rathnayake        g12 = (a13*a32-a33*a12)
5486a4271fSThilina Rathnayake        g13 = (a12*a23-a22*a13)
5586a4271fSThilina Rathnayake
5686a4271fSThilina Rathnayake        g21 = (a23*a31-a21*a33)
5786a4271fSThilina Rathnayake        g22 = (a11*a33-a31*a13)
5886a4271fSThilina Rathnayake        g23 = (a13*a21-a23*a11)
5986a4271fSThilina Rathnayake
6086a4271fSThilina Rathnayake        g31 = (a21*a32-a22*a31)
6186a4271fSThilina Rathnayake        g32 = (a12*a31-a32*a11)
6286a4271fSThilina Rathnayake        g33 = (a11*a22-a21*a12)
6386a4271fSThilina Rathnayake
6486a4271fSThilina Rathnayake        jacmq = a11*g11+a21*g12+a31*g13
6586a4271fSThilina Rathnayake
6686a4271fSThilina RathnayakeC       Rho
6786a4271fSThilina Rathnayake        v1(i)=u3(i)*jacmq
6886a4271fSThilina Rathnayake
6986a4271fSThilina RathnayakeC       RHS
7086a4271fSThilina Rathnayake        v2(i)=u3(i)*jacmq
7186a4271fSThilina Rathnayake     $             *dsqrt(u1(i+q*0)*u1(i+q*0)
7286a4271fSThilina Rathnayake     $                   +u1(i+q*1)*u1(i+q*1)
7386a4271fSThilina Rathnayake     $                   +u1(i+q*2)*u1(i+q*2))
7486a4271fSThilina Rathnayake      enddo
7586a4271fSThilina Rathnayake
7686a4271fSThilina Rathnayake      ierr=0
7786a4271fSThilina Rathnayake      end
7886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
7986a4271fSThilina Rathnayake      subroutine massf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
8086a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
8186a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
8286a4271fSThilina RathnayakeC     Apply mass operator
8386a4271fSThilina RathnayakeC     Input: u1,u2,q                Output: v1,ierr
8486a4271fSThilina Rathnayake      integer q,ierr
8586a4271fSThilina Rathnayake      real*8 ctx(1)
8686a4271fSThilina Rathnayake      real*8 u1(q)
8786a4271fSThilina Rathnayake      real*8 u2(q)
8886a4271fSThilina Rathnayake      real*8 v1(q)
8986a4271fSThilina Rathnayake
90*ee07ded2SValeria BarraC     Quadrature Point Loop
9186a4271fSThilina Rathnayake      do i=1,q
9286a4271fSThilina Rathnayake        v1(i)=u2(i)*u1(i)
9386a4271fSThilina Rathnayake      enddo
9486a4271fSThilina Rathnayake
9586a4271fSThilina Rathnayake      ierr=0
9686a4271fSThilina Rathnayake      end
9786a4271fSThilina RathnayakeC-----------------------------------------------------------------------
9886a4271fSThilina Rathnayake      subroutine diffsetupf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
9986a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
10086a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
10186a4271fSThilina RathnayakeC     Set up diffusion operator
10286a4271fSThilina RathnayakeC     Input: u1,u2,u3,q             Output: v1,v2,ierr
10386a4271fSThilina Rathnayake      integer q,ierr
10486a4271fSThilina Rathnayake      real*8 ctx(1)
10586a4271fSThilina Rathnayake      real*8 u1(3*q)
10686a4271fSThilina Rathnayake      real*8 u2(9*q)
10786a4271fSThilina Rathnayake      real*8 u3(q)
10886a4271fSThilina Rathnayake      real*8 v1(6*q)
10986a4271fSThilina Rathnayake      real*8 v2(q)
11086a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
11186a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
11286a4271fSThilina Rathnayake      real*8 jacmq,scl
11386a4271fSThilina Rathnayake      real*8 c(3),k(3)
11486a4271fSThilina Rathnayake
115*ee07ded2SValeria BarraC     Quadrature Point Loop
11686a4271fSThilina Rathnayake      do i=1,q
11786a4271fSThilina Rathnayake        pi = 3.14159265358979323846
11886a4271fSThilina Rathnayake
11986a4271fSThilina Rathnayake        c(1)=0.
12086a4271fSThilina Rathnayake        c(2)=1.
12186a4271fSThilina Rathnayake        c(3)=2.
12286a4271fSThilina Rathnayake        k(1)=1.
12386a4271fSThilina Rathnayake        k(2)=2.
12486a4271fSThilina Rathnayake        k(3)=3.
12586a4271fSThilina Rathnayake
12686a4271fSThilina Rathnayake        a11=u2(i+q*0)
12786a4271fSThilina Rathnayake        a12=u2(i+q*3)
12886a4271fSThilina Rathnayake        a13=u2(i+q*6)
12986a4271fSThilina Rathnayake
13086a4271fSThilina Rathnayake        a21=u2(i+q*1)
13186a4271fSThilina Rathnayake        a22=u2(i+q*4)
13286a4271fSThilina Rathnayake        a23=u2(i+q*7)
13386a4271fSThilina Rathnayake
13486a4271fSThilina Rathnayake        a31=u2(i+q*2)
13586a4271fSThilina Rathnayake        a32=u2(i+q*5)
13686a4271fSThilina Rathnayake        a33=u2(i+q*8)
13786a4271fSThilina Rathnayake
13886a4271fSThilina Rathnayake        g11 = (a22*a33-a23*a32)
13986a4271fSThilina Rathnayake        g12 = (a13*a32-a33*a12)
14086a4271fSThilina Rathnayake        g13 = (a12*a23-a22*a13)
14186a4271fSThilina Rathnayake
14286a4271fSThilina Rathnayake        g21 = (a23*a31-a21*a33)
14386a4271fSThilina Rathnayake        g22 = (a11*a33-a31*a13)
14486a4271fSThilina Rathnayake        g23 = (a13*a21-a23*a11)
14586a4271fSThilina Rathnayake
14686a4271fSThilina Rathnayake        g31 = (a21*a32-a22*a31)
14786a4271fSThilina Rathnayake        g32 = (a12*a31-a32*a11)
14886a4271fSThilina Rathnayake        g33 = (a11*a22-a21*a12)
14986a4271fSThilina Rathnayake
15086a4271fSThilina Rathnayake        jacmq = a11*g11+a21*g12+a31*g13
15186a4271fSThilina Rathnayake
15286a4271fSThilina Rathnayake        scl = u3(i)/jacmq
15386a4271fSThilina Rathnayake
15486a4271fSThilina RathnayakeC       Geometric factors
15586a4271fSThilina Rathnayake        v1(i+0*q) = scl*(g11*g11+g12*g12+g13*g13) ! Grr
15686a4271fSThilina Rathnayake        v1(i+1*q) = scl*(g11*g21+g12*g22+g13*g23) ! Grs
15786a4271fSThilina Rathnayake        v1(i+2*q) = scl*(g11*g31+g12*g32+g13*g33) ! Grt
15886a4271fSThilina Rathnayake        v1(i+3*q) = scl*(g21*g21+g22*g22+g23*g23) ! Gss
15986a4271fSThilina Rathnayake        v1(i+4*q) = scl*(g21*g31+g22*g32+g23*g33) ! Gst
16086a4271fSThilina Rathnayake        v1(i+5*q) = scl*(g31*g31+g32*g32+g33*g33) ! Gtt
16186a4271fSThilina Rathnayake
16286a4271fSThilina RathnayakeC       RHS
16386a4271fSThilina Rathnayake        v2(i) = u3(i)*jacmq*pi*pi
16486a4271fSThilina Rathnayake     $            *dsin(pi*(c(1)+k(1)*u1(i+0*q)))
16586a4271fSThilina Rathnayake     $            *dsin(pi*(c(2)+k(2)*u1(i+1*q)))
16686a4271fSThilina Rathnayake     $            *dsin(pi*(c(3)+k(3)*u1(i+2*q)))
16786a4271fSThilina Rathnayake     $            *(k(1)*k(1)+k(2)*k(2)+k(3)*k(3))
16886a4271fSThilina Rathnayake
16986a4271fSThilina Rathnayake      enddo
17086a4271fSThilina Rathnayake
17186a4271fSThilina Rathnayake      ierr=0
17286a4271fSThilina Rathnayake      end
17386a4271fSThilina RathnayakeC-----------------------------------------------------------------------
17486a4271fSThilina Rathnayake      subroutine diffusionf(ctx,q,u1,u2,u3,u4,u5,u6,u7,
17586a4271fSThilina Rathnayake     $  u8,u9,u10,u11,u12,u13,u14,u15,u16,v1,v2,v3,v4,v5,v6,v7,v8,
17686a4271fSThilina Rathnayake     $  v9,v10,v11,v12,v13,v14,v15,v16,ierr)
17786a4271fSThilina RathnayakeC     Apply diffusion operator
17886a4271fSThilina RathnayakeC     Input: u1,u2,q                Output: v1,ierr
17986a4271fSThilina Rathnayake      integer q,ierr
18086a4271fSThilina Rathnayake      real*8 ctx(1)
18186a4271fSThilina Rathnayake      real*8 u1(3*q)
18286a4271fSThilina Rathnayake      real*8 u2(6*q)
18386a4271fSThilina Rathnayake      real*8 v1(3*q)
18486a4271fSThilina Rathnayake
185*ee07ded2SValeria BarraC     Quadrature Point Loop
18686a4271fSThilina Rathnayake      do i=1,q
18786a4271fSThilina Rathnayake        v1(i+0*q)=
18886a4271fSThilina Rathnayake     $     u2(i+0*q)*u1(i)+u2(i+1*q)*u1(i+q)+u2(i+2*q)*u1(i+2*q)
18986a4271fSThilina Rathnayake        v1(i+1*q)=
19086a4271fSThilina Rathnayake     $     u2(i+1*q)*u1(i)+u2(i+3*q)*u1(i+q)+u2(i+4*q)*u1(i+2*q)
19186a4271fSThilina Rathnayake        v1(i+2*q)=
19286a4271fSThilina Rathnayake     $     u2(i+2*q)*u1(i)+u2(i+4*q)*u1(i+q)+u2(i+5*q)*u1(i+2*q)
19386a4271fSThilina Rathnayake      enddo
19486a4271fSThilina Rathnayake
19586a4271fSThilina Rathnayake      ierr=0
19686a4271fSThilina Rathnayake      end
19786a4271fSThilina RathnayakeC-----------------------------------------------------------------------
19886a4271fSThilina Rathnayake      subroutine set_h2_as_rhoJac_GL(h2,bmq,nxq)
19986a4271fSThilina RathnayakeC     Set h2 as rhoJac
20086a4271fSThilina RathnayakeC     Input: bmq,nxq                Output: h2
20186a4271fSThilina Rathnayake      include 'SIZE'
20286a4271fSThilina Rathnayake      real*8 h2(1),bmq(1)
20386a4271fSThilina Rathnayake
20486a4271fSThilina Rathnayake      common /ctmp77/ wd(lxd),zd(lxd)
20586a4271fSThilina Rathnayake      integer e,i,L
20686a4271fSThilina Rathnayake
20786a4271fSThilina Rathnayake      call zwgl(zd,wd,nxq)  ! nxq = number of points
20886a4271fSThilina Rathnayake
20986a4271fSThilina Rathnayake      q = 1.0               ! Later, this can be a function of position...
21086a4271fSThilina Rathnayake
21186a4271fSThilina Rathnayake      L = 0
21286a4271fSThilina Rathnayake      do e=1,lelt
21386a4271fSThilina Rathnayake      do i=1,nxq**ldim
21486a4271fSThilina Rathnayake         L=L+1
21586a4271fSThilina Rathnayake         h2(L) = q*bmq(L)
21686a4271fSThilina Rathnayake      enddo
21786a4271fSThilina Rathnayake      enddo
21886a4271fSThilina Rathnayake
21986a4271fSThilina Rathnayake      return
22086a4271fSThilina Rathnayake      end
22186a4271fSThilina RathnayakeC-----------------------------------------------------------------------
22286a4271fSThilina Rathnayake      subroutine dist_fld_h1(e)
22386a4271fSThilina RathnayakeC     Set distance initial condition for BP1
22486a4271fSThilina RathnayakeC     Input:                        Output: e
22586a4271fSThilina Rathnayake      include 'SIZE'
22686a4271fSThilina Rathnayake      include 'TOTAL'
22786a4271fSThilina Rathnayake      real*8 x,y,z
22886a4271fSThilina Rathnayake      real*8 e(1)
22986a4271fSThilina Rathnayake
23086a4271fSThilina Rathnayake      n=lx1*ly1*lz1*nelt
23186a4271fSThilina Rathnayake
23286a4271fSThilina Rathnayake      do i=1,n
23386a4271fSThilina Rathnayake        x=xm1(i,1,1,1)
23486a4271fSThilina Rathnayake        y=ym1(i,1,1,1)
23586a4271fSThilina Rathnayake        z=zm1(i,1,1,1)
23686a4271fSThilina Rathnayake
23786a4271fSThilina Rathnayake        e(i) = dsqrt(x*x+y*y+z*z)
23886a4271fSThilina Rathnayake      enddo
23986a4271fSThilina Rathnayake
24086a4271fSThilina Rathnayake      call dsavg(e)  ! This is requisite for random fields
24186a4271fSThilina Rathnayake
24286a4271fSThilina Rathnayake      return
24386a4271fSThilina Rathnayake      end
24486a4271fSThilina RathnayakeC-----------------------------------------------------------------------
24586a4271fSThilina Rathnayake      subroutine sin_fld_h1(e)
24686a4271fSThilina RathnayakeC     Set sine initial condition for BP3
24786a4271fSThilina RathnayakeC     Input:                        Output: e
24886a4271fSThilina Rathnayake      include 'SIZE'
24986a4271fSThilina Rathnayake      include 'TOTAL'
25086a4271fSThilina Rathnayake      real*8 x,y,z
25186a4271fSThilina Rathnayake      real*8 e(1)
25286a4271fSThilina Rathnayake      real*8 c(3),k(3)
25386a4271fSThilina Rathnayake
25486a4271fSThilina Rathnayake      n=lx1*ly1*lz1*nelt
25586a4271fSThilina Rathnayake      pi = 3.14159265358979323846
25686a4271fSThilina Rathnayake
25786a4271fSThilina Rathnayake      c(1)=0.
25886a4271fSThilina Rathnayake      c(2)=1.
25986a4271fSThilina Rathnayake      c(3)=2.
26086a4271fSThilina Rathnayake      k(1)=1.
26186a4271fSThilina Rathnayake      k(2)=2.
26286a4271fSThilina Rathnayake      k(3)=3.
26386a4271fSThilina Rathnayake
26486a4271fSThilina Rathnayake      do i=1,n
26586a4271fSThilina Rathnayake        x=xm1(i,1,1,1)
26686a4271fSThilina Rathnayake        y=ym1(i,1,1,1)
26786a4271fSThilina Rathnayake        z=zm1(i,1,1,1)
26886a4271fSThilina Rathnayake
26986a4271fSThilina Rathnayake        e(i) = dsin(pi*(c(1)+k(1)*x))
27086a4271fSThilina Rathnayake     &        *dsin(pi*(c(2)+k(2)*y))
27186a4271fSThilina Rathnayake     &        *dsin(pi*(c(3)+k(3)*z))
27286a4271fSThilina Rathnayake
27386a4271fSThilina Rathnayake      enddo
27486a4271fSThilina Rathnayake
27586a4271fSThilina Rathnayake      call dsavg(e)  ! This is requisite for random fields
27686a4271fSThilina Rathnayake
27786a4271fSThilina Rathnayake      return
27886a4271fSThilina Rathnayake      end
27986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
28086a4271fSThilina Rathnayake      subroutine uservp(ix,iy,iz,eg) ! set variable properties
28186a4271fSThilina Rathnayake      include 'SIZE'
28286a4271fSThilina Rathnayake      include 'TOTAL'
28386a4271fSThilina Rathnayake      include 'NEKUSE'
28486a4271fSThilina Rathnayake      integer e,f,eg
28586a4271fSThilina RathnayakeC     e = gllel(eg)
28686a4271fSThilina Rathnayake
28786a4271fSThilina Rathnayake      udiff  = 0.0
28886a4271fSThilina Rathnayake      utrans = 0.0
28986a4271fSThilina Rathnayake
29086a4271fSThilina Rathnayake      return
29186a4271fSThilina Rathnayake      end
29286a4271fSThilina RathnayakeC-----------------------------------------------------------------------
29386a4271fSThilina Rathnayake      subroutine userf(ix,iy,iz,eg) ! set acceleration term
29486a4271fSThilina RathnayakeC
29586a4271fSThilina RathnayakeC     Note: this is an acceleration term, NOT a force!
29686a4271fSThilina RathnayakeC     Thus, ffx will subsequently be multiplied by rho(x,t).
29786a4271fSThilina RathnayakeC
29886a4271fSThilina Rathnayake      include 'SIZE'
29986a4271fSThilina Rathnayake      include 'TOTAL'
30086a4271fSThilina Rathnayake      include 'NEKUSE'
30186a4271fSThilina Rathnayake      integer e,f,eg
30286a4271fSThilina RathnayakeC     e = gllel(eg)
30386a4271fSThilina Rathnayake
30486a4271fSThilina Rathnayake      ffx = 0.0
30586a4271fSThilina Rathnayake      ffy = 0.0
30686a4271fSThilina Rathnayake      ffz = 0.0
30786a4271fSThilina Rathnayake
30886a4271fSThilina Rathnayake      return
30986a4271fSThilina Rathnayake      end
31086a4271fSThilina RathnayakeC-----------------------------------------------------------------------
31186a4271fSThilina Rathnayake      subroutine userq(i,j,k,eg) ! set source term
31286a4271fSThilina Rathnayake      include 'SIZE'
31386a4271fSThilina Rathnayake      include 'TOTAL'
31486a4271fSThilina Rathnayake      include 'NEKUSE'
31586a4271fSThilina Rathnayake      integer e,f,eg
31686a4271fSThilina Rathnayake      e = gllel(eg)
31786a4271fSThilina Rathnayake
31886a4271fSThilina Rathnayake      qvol   = 0
31986a4271fSThilina Rathnayake
32086a4271fSThilina Rathnayake      return
32186a4271fSThilina Rathnayake      end
32286a4271fSThilina RathnayakeC-----------------------------------------------------------------------
32386a4271fSThilina Rathnayake      subroutine userbc(ix,iy,iz,f,eg) ! set up boundary conditions
32486a4271fSThilina RathnayakeC     NOTE ::: This subroutine MAY NOT be called by every process
32586a4271fSThilina Rathnayake      include 'SIZE'
32686a4271fSThilina Rathnayake      include 'TOTAL'
32786a4271fSThilina Rathnayake      include 'NEKUSE'
32886a4271fSThilina Rathnayake      integer e,f,eg
32986a4271fSThilina Rathnayake
33086a4271fSThilina Rathnayake      ux   = 0.0
33186a4271fSThilina Rathnayake      uy   = 0.0
33286a4271fSThilina Rathnayake      uz   = 0.0
33386a4271fSThilina Rathnayake      temp = 0.0
33486a4271fSThilina Rathnayake
33586a4271fSThilina Rathnayake      return
33686a4271fSThilina Rathnayake      end
33786a4271fSThilina RathnayakeC-----------------------------------------------------------------------
33886a4271fSThilina Rathnayake      subroutine useric(ix,iy,iz,eg) ! set up initial conditions
33986a4271fSThilina Rathnayake      include 'SIZE'
34086a4271fSThilina Rathnayake      include 'TOTAL'
34186a4271fSThilina Rathnayake      include 'NEKUSE'
34286a4271fSThilina Rathnayake      integer e,f,eg
34386a4271fSThilina Rathnayake
34486a4271fSThilina Rathnayake      ux   = 0.0
34586a4271fSThilina Rathnayake      uy   = 0.0
34686a4271fSThilina Rathnayake      uz   = 0.0
34786a4271fSThilina Rathnayake      temp = 0.0
34886a4271fSThilina Rathnayake
34986a4271fSThilina Rathnayake      return
35086a4271fSThilina Rathnayake      end
35186a4271fSThilina RathnayakeC-----------------------------------------------------------------------
35286a4271fSThilina Rathnayake      subroutine usrdat   ! This routine to modify element vertices
35386a4271fSThilina Rathnayake      include 'SIZE'
35486a4271fSThilina Rathnayake      include 'TOTAL'
35586a4271fSThilina Rathnayake
35686a4271fSThilina Rathnayake      return
35786a4271fSThilina Rathnayake      end
35886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
35986a4271fSThilina Rathnayake      subroutine usrdat2  ! This routine to modify mesh coordinates
36086a4271fSThilina Rathnayake      include 'SIZE'
36186a4271fSThilina Rathnayake      include 'TOTAL'
36286a4271fSThilina Rathnayake
36386a4271fSThilina Rathnayake      x0 = 0
36486a4271fSThilina Rathnayake      x1 = 1
36586a4271fSThilina Rathnayake      call rescale_x(xm1,x0,x1)
36686a4271fSThilina Rathnayake      call rescale_x(ym1,x0,x1)
36786a4271fSThilina Rathnayake      call rescale_x(zm1,x0,x1)
36886a4271fSThilina Rathnayake
36986a4271fSThilina Rathnayake      param(59)=1  ! Force Nek to use the "deformed element" formulation
37086a4271fSThilina Rathnayake
37186a4271fSThilina Rathnayake      return
37286a4271fSThilina Rathnayake      end
37386a4271fSThilina RathnayakeC-----------------------------------------------------------------------
37486a4271fSThilina Rathnayake      subroutine usrdat3
37586a4271fSThilina Rathnayake      include 'SIZE'
37686a4271fSThilina Rathnayake      include 'TOTAL'
37786a4271fSThilina Rathnayake
37886a4271fSThilina Rathnayake      return
37986a4271fSThilina Rathnayake      end
38086a4271fSThilina RathnayakeC-----------------------------------------------------------------------
38186a4271fSThilina Rathnayake      subroutine xmask1   (r1,h2,nel)
38286a4271fSThilina RathnayakeC     Apply zero Dirichlet boundary conditions
38386a4271fSThilina RathnayakeC     Input: h2,nel                 Output: r1
38486a4271fSThilina Rathnayake      include 'SIZE'
38586a4271fSThilina Rathnayake      include 'TOTAL'
38686a4271fSThilina Rathnayake      real*8 r1(1),h2(1)
38786a4271fSThilina Rathnayake
38886a4271fSThilina Rathnayake      n=nx1*ny1*nz1*nel
38986a4271fSThilina Rathnayake      do i=1,n
39086a4271fSThilina Rathnayake         r1(i)=r1(i)*h2(i)
39186a4271fSThilina Rathnayake      enddo
39286a4271fSThilina Rathnayake
39386a4271fSThilina Rathnayake      return
39486a4271fSThilina Rathnayake      end
39586a4271fSThilina RathnayakeC-----------------------------------------------------------------------
39686a4271fSThilina Rathnayake      function glrdif(x,y,n)
39786a4271fSThilina RathnayakeC     Compute Linfty norm of (x-y)
39886a4271fSThilina RathnayakeC     Input: x,y                    Output: n
39986a4271fSThilina Rathnayake      real*8 x(n),y(n)
40086a4271fSThilina Rathnayake
40186a4271fSThilina Rathnayake      dmx=0
40286a4271fSThilina Rathnayake      xmx=0
40386a4271fSThilina Rathnayake      ymx=0
40486a4271fSThilina Rathnayake
40586a4271fSThilina Rathnayake      do i=1,n
40686a4271fSThilina Rathnayake         diff=abs(x(i)-y(i))
40786a4271fSThilina Rathnayake         dmx =max(dmx,diff)
40886a4271fSThilina Rathnayake         xmx =max(xmx,x(i))
40986a4271fSThilina Rathnayake         ymx =max(ymx,y(i))
41086a4271fSThilina Rathnayake      enddo
41186a4271fSThilina Rathnayake
41286a4271fSThilina Rathnayake      xmx = max(xmx,ymx)
41386a4271fSThilina Rathnayake      dmx = glmax(dmx,1) ! max across processors
41486a4271fSThilina Rathnayake      xmx = glmax(xmx,1)
41586a4271fSThilina Rathnayake
41686a4271fSThilina Rathnayake      if (xmx.gt.0) then
41786a4271fSThilina Rathnayake         glrdif = dmx/xmx
41886a4271fSThilina Rathnayake      else
41986a4271fSThilina Rathnayake         glrdif = -dmx   ! Negative indicates something strange
42086a4271fSThilina Rathnayake      endif
42186a4271fSThilina Rathnayake
42286a4271fSThilina Rathnayake      return
42386a4271fSThilina Rathnayake      end
42486a4271fSThilina RathnayakeC-----------------------------------------------------------------------
42586a4271fSThilina Rathnayake      subroutine loc_grad3(ur,us,ut,u,N,D,Dt)
42686a4271fSThilina RathnayakeC     3D transpose of local gradient
42786a4271fSThilina RathnayakeC     Input: u,N,D,Dt               Output: ur,us,ut
42886a4271fSThilina Rathnayake      real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
42986a4271fSThilina Rathnayake      real*8 u (0:N,0:N,0:N)
43086a4271fSThilina Rathnayake      real*8 D (0:N,0:N),Dt(0:N,0:N)
43186a4271fSThilina Rathnayake
43286a4271fSThilina Rathnayake      m1 = N+1
43386a4271fSThilina Rathnayake      m2 = m1*m1
43486a4271fSThilina Rathnayake
43586a4271fSThilina Rathnayake      call mxm(D ,m1,u(0,0,0),m1,ur,m2)
43686a4271fSThilina Rathnayake      do k=0,N
43786a4271fSThilina Rathnayake         call mxm(u(0,0,k),m1,Dt,m1,us(0,0,k),m1)
43886a4271fSThilina Rathnayake      enddo
43986a4271fSThilina Rathnayake      call mxm(u(0,0,0),m2,Dt,m1,ut,m1)
44086a4271fSThilina Rathnayake
44186a4271fSThilina Rathnayake      return
44286a4271fSThilina Rathnayake      end
44386a4271fSThilina Rathnayakec-----------------------------------------------------------------------
44486a4271fSThilina Rathnayake      subroutine loc_grad3t(u,ur,us,ut,N,D,Dt,w)
44586a4271fSThilina RathnayakeC     3D transpose of local gradient
44686a4271fSThilina RathnayakeC     Input: ur,us,ut,N,D,Dt        Output: u
44786a4271fSThilina Rathnayake       real*8 u (0:N,0:N,0:N)
44886a4271fSThilina Rathnayake       real*8 ur(0:N,0:N,0:N),us(0:N,0:N,0:N),ut(0:N,0:N,0:N)
44986a4271fSThilina Rathnayake       real*8 D (0:N,0:N),Dt(0:N,0:N)
45086a4271fSThilina Rathnayake       real*8 w (0:N,0:N,0:N)
45186a4271fSThilina Rathnayake
45286a4271fSThilina Rathnayake       m1 = N+1
45386a4271fSThilina Rathnayake       m2 = m1*m1
45486a4271fSThilina Rathnayake       m3 = m1*m1*m1
45586a4271fSThilina Rathnayake
45686a4271fSThilina Rathnayake       call mxm(Dt,m1,ur,m1,u(0,0,0),m2)
45786a4271fSThilina Rathnayake       do k=0,N
45886a4271fSThilina Rathnayake          call mxm(us(0,0,k),m1,D ,m1,w(0,0,k),m1)
45986a4271fSThilina Rathnayake       enddo
46086a4271fSThilina Rathnayake       call add2(u(0,0,0),w,m3)
46186a4271fSThilina Rathnayake       call mxm(ut,m2,D ,m1,w,m1)
46286a4271fSThilina Rathnayake       call add2(u(0,0,0),w,m3)
46386a4271fSThilina Rathnayake
46486a4271fSThilina Rathnayake      return
46586a4271fSThilina Rathnayake      end
46686a4271fSThilina RathnayakeC-----------------------------------------------------------------------
46786a4271fSThilina Rathnayake      subroutine geodatq(gf,bmq,w3mq,nxq)
46886a4271fSThilina RathnayakeC     Routine to generate elemental geometric matrices on mesh 1
46986a4271fSThilina RathnayakeC     (Gauss-Legendre Lobatto mesh).
47086a4271fSThilina Rathnayake      include 'SIZE'
47186a4271fSThilina Rathnayake      include 'TOTAL'
47286a4271fSThilina Rathnayake
47386a4271fSThilina Rathnayake      parameter (lg=3+3*(ldim-2),lzq=lx1+1,lxyd=lzq**ldim)
47486a4271fSThilina Rathnayake
47586a4271fSThilina Rathnayake      real*8 gf(lg,nxq**ldim,lelt),bmq(nxq**ldim,lelt),w3mq(nxq,nxq,nxq)
47686a4271fSThilina Rathnayake
47786a4271fSThilina Rathnayake      common /ctmp1/ xr(lxyd),xs(lxyd),xt(lxyd)
47886a4271fSThilina Rathnayake      common /sxrns/ yr(lxyd),ys(lxyd),yt(lxyd)
47986a4271fSThilina Rathnayake     $ ,             zr(lxyd),zs(lxyd),zt(lxyd)
48086a4271fSThilina Rathnayake
48186a4271fSThilina Rathnayake      common /ctmp77/ wd(lxd),zd(lxd)
48286a4271fSThilina Rathnayake      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
48386a4271fSThilina Rathnayake
48486a4271fSThilina Rathnayake      integer e
48586a4271fSThilina Rathnayake      real*8 tmp(lxyd)
48686a4271fSThilina Rathnayake      real*8 a11,a12,a13,a21,a22,a23,a31,a32,a33
48786a4271fSThilina Rathnayake      real*8 g11,g12,g13,g21,g22,g23,g31,g32,g33
48886a4271fSThilina Rathnayake      real*8 jacmq
48986a4271fSThilina Rathnayake
49086a4271fSThilina Rathnayake      if (nxq.gt.lzq) call exitti('ABORT: recompile with lzq=$',nxq)
49186a4271fSThilina Rathnayake
49286a4271fSThilina Rathnayake      call zwgl    (zd,wd,lzq)                            ! nxq = number of points
49386a4271fSThilina Rathnayake      call gen_dgl (dxmq,dxtmq,lzq,lzq,tmp)
49486a4271fSThilina Rathnayake
49586a4271fSThilina Rathnayake      do k=1,nxq
49686a4271fSThilina Rathnayake      do j=1,nxq
49786a4271fSThilina Rathnayake      do i=1,nxq
49886a4271fSThilina Rathnayake         w3mq(i,j,k) = wd(i)*wd(j)*wd(k)
49986a4271fSThilina Rathnayake      enddo
50086a4271fSThilina Rathnayake      enddo
50186a4271fSThilina Rathnayake      enddo
50286a4271fSThilina Rathnayake
50386a4271fSThilina Rathnayake      nxyzq = nxq**ldim
50486a4271fSThilina Rathnayake      nxqm1 = lzq-1
50586a4271fSThilina Rathnayake
50686a4271fSThilina Rathnayake      do e=1,nelt
50786a4271fSThilina Rathnayake         call intp_rstd (tmp,xm1(1,1,1,e),lx1,lzq,if3d,0) ! 0-->Fwd interpolation
50886a4271fSThilina Rathnayake         call loc_grad3 (xr,xs,xt,tmp,nxqm1,dxmq,dxtmq)
50986a4271fSThilina Rathnayake
51086a4271fSThilina Rathnayake         call intp_rstd (tmp,ym1(1,1,1,e),lx1,lzq,if3d,0)
51186a4271fSThilina Rathnayake         call loc_grad3 (yr,ys,yt,tmp,nxqm1,dxmq,dxtmq)
51286a4271fSThilina Rathnayake
51386a4271fSThilina Rathnayake         call intp_rstd (tmp,zm1(1,1,1,e),lx1,lzq,if3d,0)
51486a4271fSThilina Rathnayake         call loc_grad3 (zr,zs,zt,tmp,nxqm1,dxmq,dxtmq)
51586a4271fSThilina Rathnayake
51686a4271fSThilina Rathnayake         do i=1,nxyzq
51786a4271fSThilina Rathnayake            a11 = xr(i)
51886a4271fSThilina Rathnayake            a12 = xs(i)
51986a4271fSThilina Rathnayake            a13 = xt(i)
52086a4271fSThilina Rathnayake
52186a4271fSThilina Rathnayake            a21 = yr(i)
52286a4271fSThilina Rathnayake            a22 = ys(i)
52386a4271fSThilina Rathnayake            a23 = yt(i)
52486a4271fSThilina Rathnayake
52586a4271fSThilina Rathnayake            a31 = zr(i)
52686a4271fSThilina Rathnayake            a32 = zs(i)
52786a4271fSThilina Rathnayake            a33 = zt(i)
52886a4271fSThilina Rathnayake
52986a4271fSThilina Rathnayake            g11 = (a22*a33-a23*a32)
53086a4271fSThilina Rathnayake            g12 = (a13*a32-a33*a12)
53186a4271fSThilina Rathnayake            g13 = (a12*a23-a22*a13)
53286a4271fSThilina Rathnayake
53386a4271fSThilina Rathnayake            g21 = (a23*a31-a21*a33)
53486a4271fSThilina Rathnayake            g22 = (a11*a33-a31*a13)
53586a4271fSThilina Rathnayake            g23 = (a13*a21-a23*a11)
53686a4271fSThilina Rathnayake
53786a4271fSThilina Rathnayake            g31 = (a21*a32-a22*a31)
53886a4271fSThilina Rathnayake            g32 = (a12*a31-a32*a11)
53986a4271fSThilina Rathnayake            g33 = (a11*a22-a21*a12)
54086a4271fSThilina Rathnayake
54186a4271fSThilina Rathnayake            jacmq = a11*g11+a21*g12+a31*g13
54286a4271fSThilina Rathnayake
54386a4271fSThilina Rathnayake            bmq(i,e)  = w3mq(i,1,1)*jacmq
54486a4271fSThilina Rathnayake            scale     = w3mq(i,1,1)/jacmq
54586a4271fSThilina Rathnayake
54686a4271fSThilina Rathnayake            gf(1,i,e) = scale*(g11*g11+g12*g12+g13*g13) ! Grr
54786a4271fSThilina Rathnayake            gf(2,i,e) = scale*(g11*g21+g12*g22+g13*g23) ! Grs
54886a4271fSThilina Rathnayake            gf(3,i,e) = scale*(g11*g31+g12*g32+g13*g33) ! Grt
54986a4271fSThilina Rathnayake            gf(4,i,e) = scale*(g21*g21+g22*g22+g23*g23) ! Gss
55086a4271fSThilina Rathnayake            gf(5,i,e) = scale*(g21*g31+g22*g32+g23*g33) ! Gst
55186a4271fSThilina Rathnayake            gf(6,i,e) = scale*(g31*g31+g32*g32+g33*g33) ! Gtt
55286a4271fSThilina Rathnayake
55386a4271fSThilina Rathnayake         enddo
55486a4271fSThilina Rathnayake      enddo
55586a4271fSThilina Rathnayake
55686a4271fSThilina Rathnayake      return
55786a4271fSThilina Rathnayake      end
55886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
55986a4271fSThilina Rathnayake      subroutine setprecn_bp1 (d,h1,h2)
56086a4271fSThilina RathnayakeC     Generate diagonal preconditioner for Helmholtz operator
56186a4271fSThilina RathnayakeC     Input: h1,h2                  Output: d
56286a4271fSThilina Rathnayake      include 'SIZE'
56386a4271fSThilina Rathnayake      include 'TOTAL'
56486a4271fSThilina Rathnayake
56586a4271fSThilina Rathnayake      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
56686a4271fSThilina Rathnayake
56786a4271fSThilina Rathnayake      real*8    d(lx1,ly1,lz1,lelt),h1(lxyz,lelt),h2(lxyz,lelt)
56886a4271fSThilina Rathnayake      integer e
56986a4271fSThilina Rathnayake
57086a4271fSThilina Rathnayake      real*8         gf(lg,lx1,ly1,lz1,lelt) ! Equivalence new gf() data
57186a4271fSThilina Rathnayake      equivalence (gf,g1m1)                  ! layout to g1m1...g6m1
57286a4271fSThilina Rathnayake
57386a4271fSThilina Rathnayake      real*8 ysm1(ly1)
57486a4271fSThilina Rathnayake
57586a4271fSThilina Rathnayake      nel   = nelfld(ifield)
57686a4271fSThilina Rathnayake      n     = nel*lx1*ly1*lz1
57786a4271fSThilina Rathnayake      nxyz  = lx1*ly1*lz1
57886a4271fSThilina Rathnayake
57986a4271fSThilina Rathnayake      call copy    (d,bm1,n)   ! Mass matrix preconditioning full mass matrix
58086a4271fSThilina Rathnayake      call dssum   (d,nx1,ny1,nz1)
58186a4271fSThilina Rathnayake      call invcol1 (d,n)
58286a4271fSThilina Rathnayake      return
58386a4271fSThilina Rathnayake
58486a4271fSThilina Rathnayake      call dsset(lx1,ly1,lz1)
58586a4271fSThilina Rathnayake
58686a4271fSThilina Rathnayake      do 1000 e=1,nel
58786a4271fSThilina Rathnayake
58886a4271fSThilina Rathnayake        call rzero(d(1,1,1,e),nxyz)
58986a4271fSThilina Rathnayake
59086a4271fSThilina Rathnayake        do 320 iz=1,lz1
59186a4271fSThilina Rathnayake         do 320 iy=1,ly1
59286a4271fSThilina Rathnayake         do 320 ix=1,lx1
59386a4271fSThilina Rathnayake         do 320 iq=1,lx1
59486a4271fSThilina Rathnayake           d(ix,iy,iz,e) = d(ix,iy,iz,e)
59586a4271fSThilina Rathnayake     $                   + gf(1,iq,iy,iz,e) * dxm1(iq,ix)**2
59686a4271fSThilina Rathnayake     $                   + gf(2,ix,iq,iz,e) * dxm1(iq,iy)**2
59786a4271fSThilina Rathnayake     $                   + gf(3,ix,iy,iq,e) * dxm1(iq,iz)**2
59886a4271fSThilina Rathnayake  320    continue
59986a4271fSThilina RathnayakeC
60086a4271fSThilina RathnayakeC        Add cross terms if element is deformed.
60186a4271fSThilina RathnayakeC
60286a4271fSThilina Rathnayake         if (lxyz.gt.0) then
60386a4271fSThilina Rathnayake
60486a4271fSThilina Rathnayake           do i2=1,ly1,ly1-1
60586a4271fSThilina Rathnayake           do i1=1,lx1,lx1-1
60686a4271fSThilina Rathnayake              d(1,i1,i2,e) = d(1,i1,i2,e)
60786a4271fSThilina Rathnayake     $            + gf(4,1,i1,i2,e) * dxtm1(1,1)*dytm1(i1,i1)
60886a4271fSThilina Rathnayake     $            + gf(5,1,i1,i2,e) * dxtm1(1,1)*dztm1(i2,i2)
60986a4271fSThilina Rathnayake              d(lx1,i1,i2,e) = d(lx1,i1,i2,e)
61086a4271fSThilina Rathnayake     $            + gf(4,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dytm1(i1,i1)
61186a4271fSThilina Rathnayake     $            + gf(5,lx1,i1,i2,e) * dxtm1(lx1,lx1)*dztm1(i2,i2)
61286a4271fSThilina Rathnayake              d(i1,1,i2,e) = d(i1,1,i2,e)
61386a4271fSThilina Rathnayake     $            + gf(4,i1,1,i2,e) * dytm1(1,1)*dxtm1(i1,i1)
61486a4271fSThilina Rathnayake     $            + gf(6,i1,1,i2,e) * dytm1(1,1)*dztm1(i2,i2)
61586a4271fSThilina Rathnayake              d(i1,ly1,i2,e) = d(i1,ly1,i2,e)
61686a4271fSThilina Rathnayake     $            + gf(4,i1,ly1,i2,e) * dytm1(ly1,ly1)*dxtm1(i1,i1)
61786a4271fSThilina Rathnayake     $            + gf(6,i1,ly1,i2,e) * dytm1(ly1,ly1)*dztm1(i2,i2)
61886a4271fSThilina Rathnayake              d(i1,i2,1,e) = d(i1,i2,1,e)
61986a4271fSThilina Rathnayake     $            + gf(5,i1,i2,1,e) * dztm1(1,1)*dxtm1(i1,i1)
62086a4271fSThilina Rathnayake     $            + gf(6,i1,i2,1,e) * dztm1(1,1)*dytm1(i2,i2)
62186a4271fSThilina Rathnayake              d(i1,i2,lz1,e) = d(i1,i2,lz1,e)
62286a4271fSThilina Rathnayake     $            + gf(5,i1,i2,lz1,e) * dztm1(lz1,lz1)*dxtm1(i1,i1)
62386a4271fSThilina Rathnayake     $            + gf(6,i1,i2,lz1,e) * dztm1(lz1,lz1)*dytm1(i2,i2)
62486a4271fSThilina Rathnayake
62586a4271fSThilina Rathnayake           enddo
62686a4271fSThilina Rathnayake           enddo
62786a4271fSThilina Rathnayake         endif
62886a4271fSThilina Rathnayake
62986a4271fSThilina Rathnayake        do i=1,lxyz
63086a4271fSThilina Rathnayake           d(i,1,1,e)=d(i,1,1,e)*h1(i,e)+h2(i,e)*bm1(i,1,1,e)
63186a4271fSThilina Rathnayake        enddo
63286a4271fSThilina Rathnayake
63386a4271fSThilina Rathnayake 1000 continue ! element loop
63486a4271fSThilina Rathnayake
63586a4271fSThilina RathnayakeC     If axisymmetric, add a diagonal term in the radial direction (ISD=2)
63686a4271fSThilina Rathnayake
63786a4271fSThilina Rathnayake      if (ifaxis.and.(isd.eq.2)) then
63886a4271fSThilina Rathnayake         do 1200 e=1,nel
63986a4271fSThilina Rathnayake            if (ifrzer(e)) call mxm(ym1(1,1,1,e),lx1,datm1,ly1,ysm1,1)
64086a4271fSThilina Rathnayake            k=0
64186a4271fSThilina Rathnayake            do 1190 j=1,ly1
64286a4271fSThilina Rathnayake            do 1190 i=1,lx1
64386a4271fSThilina Rathnayake               k=k+1
64486a4271fSThilina Rathnayake               if (ym1(i,j,1,e).ne.0.) then
64586a4271fSThilina Rathnayake                  term1 = bm1(i,j,1,e)/ym1(i,j,1,e)**2
64686a4271fSThilina Rathnayake                  if (ifrzer(e)) then
64786a4271fSThilina Rathnayake                     term2 =  wxm1(i)*wam1(1)*dam1(1,j)
64886a4271fSThilina Rathnayake     $                       *jacm1(i,1,1,e)/ysm1(i)
64986a4271fSThilina Rathnayake                  else
65086a4271fSThilina Rathnayake                     term2 = 0.
65186a4271fSThilina Rathnayake                  endif
65286a4271fSThilina Rathnayake                  d(i,j,1,e) = d(i,j,1,e) + h1(k,e)*(term1+term2)
65386a4271fSThilina Rathnayake               endif
65486a4271fSThilina Rathnayake 1190       continue
65586a4271fSThilina Rathnayake 1200    continue
65686a4271fSThilina Rathnayake
65786a4271fSThilina Rathnayake      endif
65886a4271fSThilina Rathnayake      call dssum   (d,nx1,ny1,nz1)
65986a4271fSThilina Rathnayake      call invcol1 (d,n)
66086a4271fSThilina Rathnayake
66186a4271fSThilina Rathnayake      if (nio.eq.0) write(6,1) n,d(1,1,1,1),h1(1,1),h2(1,1),bm1(1,1,1,1)
66286a4271fSThilina Rathnayake   1  format(i9,1p4e12.4,' diag prec')
66386a4271fSThilina Rathnayake
66486a4271fSThilina Rathnayake      return
66586a4271fSThilina Rathnayake      end
66686a4271fSThilina RathnayakeC-----------------------------------------------------------------------
66786a4271fSThilina Rathnayake      subroutine setprecn_bp3 (d,h1,h2)
66886a4271fSThilina RathnayakeC     Generate dummy diagonal preconditioner for Helmholtz operator
66986a4271fSThilina RathnayakeC     Input: h1,h2                  Output: d
67086a4271fSThilina Rathnayake      include 'SIZE'
67186a4271fSThilina Rathnayake      include 'TOTAL'
67286a4271fSThilina Rathnayake
67386a4271fSThilina Rathnayake      parameter (n=lx1*ly1*lz1*lelt)
67486a4271fSThilina Rathnayake      real*8 d(n),h1(n),h2(n)
67586a4271fSThilina Rathnayake
67686a4271fSThilina Rathnayake      call rone (d,n)
67786a4271fSThilina Rathnayake
67886a4271fSThilina Rathnayake      return
67986a4271fSThilina Rathnayake      end
68086a4271fSThilina RathnayakeC-----------------------------------------------------------------------
68186a4271fSThilina Rathnayake      subroutine userchk
68286a4271fSThilina Rathnayake      include 'SIZE'
68386a4271fSThilina Rathnayake      include 'TOTAL'
68486a4271fSThilina Rathnayake
68586a4271fSThilina Rathnayake      integer bp
68686a4271fSThilina Rathnayake
68786a4271fSThilina Rathnayake      call get_bp(bp)
68886a4271fSThilina Rathnayake
68986a4271fSThilina Rathnayake      if (bp==1) then
69086a4271fSThilina Rathnayake        if (istep.gt.0) call bp1
69186a4271fSThilina Rathnayake      elseif (bp==3) then
69286a4271fSThilina Rathnayake        if (istep.gt.0) call bp3
69386a4271fSThilina Rathnayake      else
69486a4271fSThilina Rathnayake        write(6,*) "INVALID BP SPECIFICED"
69586a4271fSThilina Rathnayake      endif
69686a4271fSThilina Rathnayake
69786a4271fSThilina Rathnayake      return
69886a4271fSThilina Rathnayake      end
69986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
70086a4271fSThilina Rathnayake      subroutine bp1
70186a4271fSThilina RathnayakeC     Solution to BP1 using libCEED
70286a4271fSThilina Rathnayake      include 'SIZE'
70386a4271fSThilina Rathnayake      include 'TOTAL'
70486a4271fSThilina Rathnayake      include 'CTIMER'  ! ifsync
70586a4271fSThilina Rathnayake      include 'FDMH1'
70686a4271fSThilina Rathnayake      include 'ceedf.h'
70786a4271fSThilina Rathnayake
70886a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
70986a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
71086a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
71186a4271fSThilina Rathnayake
71286a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
71386a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
71486a4271fSThilina Rathnayake      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
71586a4271fSThilina Rathnayake      common /vcrny/ e1(lt)
71686a4271fSThilina Rathnayake      common /vcrvh/ h1(lt),h2(lx*lelt),pap(3)
71786a4271fSThilina Rathnayake      real*8 coords(ldim*lx*lelt)
71886a4271fSThilina Rathnayake
71986a4271fSThilina Rathnayake      logical ifh3
720e2b2c771Svaleria      integer*8 nnode
72186a4271fSThilina Rathnayake      integer ceed,err,test
72286a4271fSThilina Rathnayake      character*64 spec
72386a4271fSThilina Rathnayake
72434d77899SValeria Barra      integer p,q,ncompx,ncompu,enode,lnode
72586a4271fSThilina Rathnayake      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
72686a4271fSThilina Rathnayake      integer erstrctu,erstrctx,erstrctw
72786a4271fSThilina Rathnayake      integer basisu,basisx
72886a4271fSThilina Rathnayake      integer qf_mass,qf_setup
72986a4271fSThilina Rathnayake      integer op_mass,op_setup
73086a4271fSThilina Rathnayake      real*8  x,y,z
73186a4271fSThilina Rathnayake      integer*8 offset
73286a4271fSThilina Rathnayake
73386a4271fSThilina Rathnayake      external massf,masssetupf
73486a4271fSThilina Rathnayake
73586a4271fSThilina Rathnayake      ifield = 1
73686a4271fSThilina Rathnayake      nxq    = nx1+1
73786a4271fSThilina Rathnayake      n = nx1*ny1*nz1*nelt
73886a4271fSThilina Rathnayake
73986a4271fSThilina Rathnayake      ifsync = .false.
74086a4271fSThilina Rathnayake
74186a4271fSThilina RathnayakeC     Set up coordinates
74286a4271fSThilina Rathnayake      ii=0
74386a4271fSThilina Rathnayake      do j=0,nelt-1
74486a4271fSThilina Rathnayake      do i=1,lx
74586a4271fSThilina Rathnayake        ii=ii+1
74686a4271fSThilina Rathnayake        x = xm1(ii,1,1,1)
74786a4271fSThilina Rathnayake        y = ym1(ii,1,1,1)
74886a4271fSThilina Rathnayake        z = zm1(ii,1,1,1)
74986a4271fSThilina Rathnayake        coords(i+0*lx+3*j*lx)=x
75086a4271fSThilina Rathnayake        coords(i+1*lx+3*j*lx)=y
75186a4271fSThilina Rathnayake        coords(i+2*lx+3*j*lx)=z
75286a4271fSThilina Rathnayake      enddo
75386a4271fSThilina Rathnayake      enddo
75486a4271fSThilina Rathnayake
75586a4271fSThilina RathnayakeC     Init ceed library
75686a4271fSThilina Rathnayake      call get_spec(spec)
75786a4271fSThilina Rathnayake      call ceedinit(trim(spec)//char(0),ceed,err)
75886a4271fSThilina Rathnayake
75986a4271fSThilina Rathnayake      call get_test(test)
76086a4271fSThilina Rathnayake
76186a4271fSThilina RathnayakeC     Set up Nek geometry data
76286a4271fSThilina Rathnayake      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
76386a4271fSThilina Rathnayake      call set_h2_as_rhoJac_GL(h2,bmq,nxq)
76486a4271fSThilina Rathnayake
76586a4271fSThilina RathnayakeC     Set up true soln
76686a4271fSThilina Rathnayake      call dist_fld_h1   (e1)
76786a4271fSThilina Rathnayake      call copy          (h1,e1,n)                 ! Save exact soln in h1
76886a4271fSThilina Rathnayake
76986a4271fSThilina RathnayakeC     Set up solver parameters
77086a4271fSThilina Rathnayake      tol       = 1e-10
77186a4271fSThilina Rathnayake      param(22) = tol
77286a4271fSThilina Rathnayake      maxit     = 100
77386a4271fSThilina Rathnayake
77486a4271fSThilina Rathnayake      call nekgsync()
77586a4271fSThilina Rathnayake
77686a4271fSThilina RathnayakeC     Create ceed basis for mesh and computation
77786a4271fSThilina Rathnayake      p=nx1
77886a4271fSThilina Rathnayake      q=p+1
77934d77899SValeria Barra      ncompu=1
78034d77899SValeria Barra      ncompx=ldim
78186a4271fSThilina Rathnayake      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ndim,p,q,
78286a4271fSThilina Rathnayake     $  ceed_gauss,basisx,err)
78334d77899SValeria Barra      call ceedbasiscreatetensorh1lagrange(ceed,ndim,ncompu,p,q,
78486a4271fSThilina Rathnayake     $  ceed_gauss,basisu,err)
78586a4271fSThilina Rathnayake
78686a4271fSThilina RathnayakeC     Create ceed element restrictions for mesh and computation
787e2b2c771Svaleria      enode=p**ldim
78834d77899SValeria Barra      lnode=enode*nelt*ncompu
789e2b2c771Svaleria      call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode,
79086a4271fSThilina Rathnayake     $  ldim,erstrctx,err)
791e2b2c771Svaleria      call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode,
79234d77899SValeria Barra     $  ncompu,erstrctu,err)
79386a4271fSThilina Rathnayake      call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim,
79486a4271fSThilina Rathnayake     $  nelt*q**ldim,1,erstrctw,err)
79586a4271fSThilina Rathnayake
79686a4271fSThilina RathnayakeC     Create ceed vectors
797e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_p1,err)
798e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
799e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
80086a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
80186a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,nelt*q**ldim,vec_qdata,err)
80286a4271fSThilina Rathnayake
80386a4271fSThilina Rathnayake      offset=0
80486a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_coords,ceed_mem_host,
80586a4271fSThilina Rathnayake     $  ceed_use_pointer,coords,offset,err)
80686a4271fSThilina Rathnayake
80786a4271fSThilina RathnayakeC     Create ceed qfunctions for masssetupf and massf
80886a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,masssetupf,
80986a4271fSThilina Rathnayake     $  __FILE__
81086a4271fSThilina Rathnayake     $  //':masssetupf',qf_setup,err)
81134d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
81286a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
81334d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
81486a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
81534d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
81686a4271fSThilina Rathnayake     $  ceed_eval_weight,err)
81734d77899SValeria Barra      call ceedqfunctionaddoutput(qf_setup,'rho',ncompu,
81886a4271fSThilina Rathnayake     $  ceed_eval_none,err)
81934d77899SValeria Barra      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
82086a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
82186a4271fSThilina Rathnayake
82286a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,massf,
82386a4271fSThilina Rathnayake     $  __FILE__
82486a4271fSThilina Rathnayake     $  //':massf',qf_mass,err)
82534d77899SValeria Barra      call ceedqfunctionaddinput(qf_mass,'u',ncompu,
82686a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
82734d77899SValeria Barra      call ceedqfunctionaddinput(qf_mass,'rho',ncompu,
82886a4271fSThilina Rathnayake     $  ceed_eval_none,err)
82934d77899SValeria Barra      call ceedqfunctionaddoutput(qf_mass,'v',ncompu,
83086a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
83186a4271fSThilina Rathnayake
83286a4271fSThilina RathnayakeC     Create ceed operators
83386a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_setup,
83486a4271fSThilina Rathnayake     $  ceed_null,ceed_null,op_setup,err)
83586a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'x',erstrctx,
83686a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_active,err)
83786a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
83886a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_active,err)
83986a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'weight',erstrctx,
84086a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_none,err)
84186a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rho',erstrctw,
84286a4271fSThilina Rathnayake     $  ceed_notranspose,ceed_basis_collocated,
84386a4271fSThilina Rathnayake     $  ceed_vector_active,err)
84486a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
84586a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,vec_rhs,err)
84686a4271fSThilina Rathnayake
84786a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_mass,
84886a4271fSThilina Rathnayake     $  ceed_null,ceed_null,op_mass,err)
84986a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_mass,'u',erstrctu,
85086a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,ceed_vector_active,err)
85186a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_mass,'rho',erstrctw,
85286a4271fSThilina Rathnayake     $  ceed_notranspose,ceed_basis_collocated,
85386a4271fSThilina Rathnayake     $  vec_qdata,err)
85486a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_mass,'v',erstrctu,
85586a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,ceed_vector_active,err)
85686a4271fSThilina Rathnayake
85786a4271fSThilina RathnayakeC     Compute setup data
85886a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
85986a4271fSThilina Rathnayake     $  ceed_use_pointer,r1,offset,err)
86086a4271fSThilina Rathnayake      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
86186a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
86286a4271fSThilina Rathnayake
86386a4271fSThilina RathnayakeC     Set up true RHS
86486a4271fSThilina Rathnayake      call dssum         (r1,nx1,ny1,nz1)          ! r1
86586a4271fSThilina Rathnayake
86686a4271fSThilina RathnayakeC     Set up algebraic RHS with libCEED
86786a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,
86886a4271fSThilina Rathnayake     $  ceed_use_pointer,h1,offset,err)
86986a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
87086a4271fSThilina Rathnayake     $  ceed_use_pointer,r2,offset,err)
87186a4271fSThilina Rathnayake      call ceedoperatorapply(op_mass,vec_p1,vec_ap1,
87286a4271fSThilina Rathnayake     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
87386a4271fSThilina Rathnayake      call dssum         (r2,nx1,ny1,nz1)
87486a4271fSThilina Rathnayake
87586a4271fSThilina RathnayakeC     Set up algebraic RHS with Nek5000
87686a4271fSThilina Rathnayake      call axhm1         (pap,r3,h1,h1,h2,'bp1')   ! r3 = A_nek5k*h1
87786a4271fSThilina Rathnayake      call dssum         (r3,nx1,ny1,nz1)
87886a4271fSThilina Rathnayake
87986a4271fSThilina Rathnayake      call nekgsync()
88086a4271fSThilina Rathnayake
88186a4271fSThilina RathnayakeC     Solve true RHS
88286a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED true RHS"
88386a4271fSThilina Rathnayake      tstart = dnekclock()
88486a4271fSThilina Rathnayake      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_mass,
88586a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
88686a4271fSThilina Rathnayake      tstop  = dnekclock()
88786a4271fSThilina Rathnayake
88886a4271fSThilina RathnayakeC     Output
88986a4271fSThilina Rathnayake      telaps = (tstop-tstart)
89086a4271fSThilina Rathnayake      maxits = maxit
89186a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
89286a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
89386a4271fSThilina Rathnayake
89486a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
89586a4271fSThilina Rathnayake        if (maxit>=100) then
89686a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
89786a4271fSThilina Rathnayake        endif
89886a4271fSThilina Rathnayake        if (dabs(er1)>5e-3) then
89986a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
90086a4271fSThilina Rathnayake        endif
90186a4271fSThilina Rathnayake      endif
90286a4271fSThilina Rathnayake
90386a4271fSThilina Rathnayake      nx     = nx1-1
904e2b2c771Svaleria      nnode  = nelgt            ! nnodes
905e2b2c771Svaleria      nnode  = nnode*(nx**ldim) ! nnodes
906e2b2c771Svaleria      nppp   = nnode/np         ! nnodes/proc
90786a4271fSThilina Rathnayake
90805939c60Sjeremylt      dofps  = nnode/telaps     ! DOF/sec - scalar form
90986a4271fSThilina Rathnayake      titers = telaps/maxits    ! time per iteration
91086a4271fSThilina Rathnayake      tppp_s = titers/nppp      ! time per iteraton per local point
91186a4271fSThilina Rathnayake
91286a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
91305939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
91486a4271fSThilina Rathnayake
91586a4271fSThilina RathnayakeC     Solve libCEED algebraic RHS
91686a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
91786a4271fSThilina Rathnayake      maxit = 100
91886a4271fSThilina Rathnayake      tstart = dnekclock()
91986a4271fSThilina Rathnayake      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_mass,
92086a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
92186a4271fSThilina Rathnayake      tstop  = dnekclock()
92286a4271fSThilina Rathnayake
92386a4271fSThilina RathnayakeC     Output
92486a4271fSThilina Rathnayake      telaps = (tstop-tstart)
92586a4271fSThilina Rathnayake      maxits = maxit
92686a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
92786a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
92886a4271fSThilina Rathnayake
92986a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
93086a4271fSThilina Rathnayake        if (maxit>=100) then
93186a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
93286a4271fSThilina Rathnayake        endif
93386a4271fSThilina Rathnayake        if (dabs(er1)>1e-5) then
93486a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
93586a4271fSThilina Rathnayake        endif
93686a4271fSThilina Rathnayake      endif
93786a4271fSThilina Rathnayake
93886a4271fSThilina Rathnayake      nx      = nx1-1
939e2b2c771Svaleria      nnode   = nelgt            ! nnodes
940e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
941e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
94286a4271fSThilina Rathnayake
94305939c60Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
94486a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
94586a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
94686a4271fSThilina Rathnayake
94786a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
94805939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
94986a4271fSThilina Rathnayake
95086a4271fSThilina RathnayakeC     Solve Nek5000 algebraic RHS
95186a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
95286a4271fSThilina Rathnayake      maxit = 100
95386a4271fSThilina Rathnayake      tstart = dnekclock()
95486a4271fSThilina Rathnayake      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_mass,
95586a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp1')
95686a4271fSThilina Rathnayake      tstop  = dnekclock()
95786a4271fSThilina Rathnayake
95886a4271fSThilina RathnayakeC     Output
95986a4271fSThilina Rathnayake      telaps = (tstop-tstart)
96086a4271fSThilina Rathnayake      maxits = maxit
96186a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
96286a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
96386a4271fSThilina Rathnayake
96486a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
96586a4271fSThilina Rathnayake        if (maxit>=100) then
96686a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
96786a4271fSThilina Rathnayake        endif
96886a4271fSThilina Rathnayake        if (dabs(er1)>1e-5) then
96986a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
97086a4271fSThilina Rathnayake        endif
97186a4271fSThilina Rathnayake      endif
97286a4271fSThilina Rathnayake
97386a4271fSThilina Rathnayake      nx      = nx1-1
974e2b2c771Svaleria      nnode   = nelgt            ! nnodes
975e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
976e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
97786a4271fSThilina Rathnayake
97805939c60Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
97986a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
98086a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
98186a4271fSThilina Rathnayake
98286a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
98305939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
98486a4271fSThilina Rathnayake
98586a4271fSThilina Rathnayake    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
98686a4271fSThilina Rathnayake    3 format(i3,i9,e12.4,1x,a8,i9)
98786a4271fSThilina Rathnayake
98886a4271fSThilina RathnayakeC     Destroy ceed handles
98986a4271fSThilina Rathnayake      call ceedvectordestroy(vec_p1,err)
99086a4271fSThilina Rathnayake      call ceedvectordestroy(vec_ap1,err)
99186a4271fSThilina Rathnayake      call ceedvectordestroy(vec_rhs,err)
99286a4271fSThilina Rathnayake      call ceedvectordestroy(vec_qdata,err)
99386a4271fSThilina Rathnayake      call ceedvectordestroy(vec_coords,err)
99486a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctu,err)
99586a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctx,err)
99686a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctw,err)
99786a4271fSThilina Rathnayake      call ceedbasisdestroy(basisu,err)
99886a4271fSThilina Rathnayake      call ceedbasisdestroy(basisx,err)
99986a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_setup,err)
100086a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_mass,err)
100186a4271fSThilina Rathnayake      call ceedoperatordestroy(op_setup,err)
100286a4271fSThilina Rathnayake      call ceedoperatordestroy(op_mass,err)
100386a4271fSThilina Rathnayake      call ceeddestroy(ceed,err)
100486a4271fSThilina Rathnayake
100586a4271fSThilina Rathnayake      return
100686a4271fSThilina Rathnayake      end
100786a4271fSThilina RathnayakeC-----------------------------------------------------------------------
100886a4271fSThilina Rathnayake      subroutine bp3
100986a4271fSThilina RathnayakeC     Solution to BP3 using libCEED
101086a4271fSThilina Rathnayake      include 'SIZE'
101186a4271fSThilina Rathnayake      include 'TOTAL'
101286a4271fSThilina Rathnayake      include 'CTIMER'  ! ifsync
101386a4271fSThilina Rathnayake      include 'FDMH1'
101486a4271fSThilina Rathnayake      include 'ceedf.h'
101586a4271fSThilina Rathnayake
101686a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
101786a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
101886a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg*lq,lelt),bmq(lq,lelt),w3mq(lq)
101986a4271fSThilina Rathnayake
102086a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
102186a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
102286a4271fSThilina Rathnayake      common /vcrns/ u1(lt),r1(lt),r2(lt),r3(lt)
102386a4271fSThilina Rathnayake      common /vcrny/ e1(lt)
102486a4271fSThilina Rathnayake      common /vcrvh/ h1(lt),h2(ld),pap(3)
102586a4271fSThilina Rathnayake      real*8 coords(ldim*lx*lelt)
102686a4271fSThilina Rathnayake
102786a4271fSThilina Rathnayake      logical ifh3
1028e2b2c771Svaleria      integer*8 nnode
102986a4271fSThilina Rathnayake      integer ceed,err,test
103086a4271fSThilina Rathnayake      character*64 spec
103186a4271fSThilina Rathnayake
103234d77899SValeria Barra      integer p,q,ncompx,ncompu,enode,lnode
103386a4271fSThilina Rathnayake      integer vec_p1,vec_ap1,vec_qdata,vec_coords,vec_rhs
103486a4271fSThilina Rathnayake      integer erstrctu,erstrctx,erstrctw
103586a4271fSThilina Rathnayake      integer basisu,basisx
103686a4271fSThilina Rathnayake      integer qf_diffusion,qf_setup
103786a4271fSThilina Rathnayake      integer op_diffusion,op_setup
103886a4271fSThilina Rathnayake      integer ii,i,ngeo
103986a4271fSThilina Rathnayake      real*8  x,y,z
104086a4271fSThilina Rathnayake      integer*8 offset
104186a4271fSThilina Rathnayake
104286a4271fSThilina Rathnayake      external diffusionf,diffsetupf
104386a4271fSThilina Rathnayake
104486a4271fSThilina Rathnayake      ifield = 1
104586a4271fSThilina Rathnayake      nxq    = nx1+1
104686a4271fSThilina Rathnayake      n = nx1*ny1*nz1*nelt
104786a4271fSThilina Rathnayake
104886a4271fSThilina Rathnayake      ifsync = .false.
104986a4271fSThilina Rathnayake
105086a4271fSThilina RathnayakeC     Set up coordinates and mask
105186a4271fSThilina Rathnayake      ii=0
105286a4271fSThilina Rathnayake      do j=0,nelt-1
105386a4271fSThilina Rathnayake      do i=1,lx
105486a4271fSThilina Rathnayake        ii=ii+1
105586a4271fSThilina Rathnayake        x = xm1(ii,1,1,1)
105686a4271fSThilina Rathnayake        y = ym1(ii,1,1,1)
105786a4271fSThilina Rathnayake        z = zm1(ii,1,1,1)
105886a4271fSThilina Rathnayake        coords(i+0*lx+3*j*lx)=x
105986a4271fSThilina Rathnayake        coords(i+1*lx+3*j*lx)=y
106086a4271fSThilina Rathnayake        coords(i+2*lx+3*j*lx)=z
106186a4271fSThilina Rathnayake        if ( x.eq.0.or.x.eq.1
106286a4271fSThilina Rathnayake     $   .or.y.eq.0.or.y.eq.1
106386a4271fSThilina Rathnayake     $   .or.z.eq.0.or.z.eq.1 ) then
106486a4271fSThilina Rathnayake          h2(ii)=0.
106586a4271fSThilina Rathnayake        else
106686a4271fSThilina Rathnayake          h2(ii)=1.
106786a4271fSThilina Rathnayake        endif
106886a4271fSThilina Rathnayake      enddo
106986a4271fSThilina Rathnayake      enddo
107086a4271fSThilina Rathnayake
107186a4271fSThilina RathnayakeC     Init ceed library
107286a4271fSThilina Rathnayake      call get_spec(spec)
107386a4271fSThilina Rathnayake      call ceedinit(trim(spec)//char(0),ceed,err)
107486a4271fSThilina Rathnayake
107586a4271fSThilina Rathnayake      call get_test(test)
107686a4271fSThilina Rathnayake
107786a4271fSThilina RathnayakeC     Set up Nek geometry data
107886a4271fSThilina Rathnayake      call geodatq       (gf,bmq,w3mq,nxq)         ! Set up gf() arrays
107986a4271fSThilina Rathnayake
108086a4271fSThilina RathnayakeC     Set up true soln
108186a4271fSThilina Rathnayake      call sin_fld_h1    (e1)
108286a4271fSThilina Rathnayake      call xmask1        (e1,h2,nelt)
108386a4271fSThilina Rathnayake      call copy          (h1,e1,n)                 ! Save exact soln in h1
108486a4271fSThilina Rathnayake
108586a4271fSThilina RathnayakeC     Set up solver parameters
108686a4271fSThilina Rathnayake      tol       = 1e-10
108786a4271fSThilina Rathnayake      param(22) = tol
108886a4271fSThilina Rathnayake      maxit     = 100
108986a4271fSThilina Rathnayake
109086a4271fSThilina Rathnayake      call nekgsync()
109186a4271fSThilina Rathnayake
109286a4271fSThilina RathnayakeC     Create ceed basis for mesh and computation
109386a4271fSThilina Rathnayake      p=nx1
109486a4271fSThilina Rathnayake      q=p+1
109534d77899SValeria Barra      ncompu=1
109634d77899SValeria Barra      ncompx=ldim
109734d77899SValeria Barra      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompx,p,q,
109886a4271fSThilina Rathnayake     $  ceed_gauss,basisx,err)
109934d77899SValeria Barra      call ceedbasiscreatetensorh1lagrange(ceed,ldim,ncompu,p,q,
110086a4271fSThilina Rathnayake     $  ceed_gauss,basisu,err)
110186a4271fSThilina Rathnayake
110286a4271fSThilina RathnayakeC     Create ceed element restrictions for mesh and computation
1103e2b2c771Svaleria      enode=p**ldim
110434d77899SValeria Barra      lnode=enode*nelt*ncompu
110586a4271fSThilina Rathnayake      ngeo=(ldim*(ldim+1))/2
1106e2b2c771Svaleria      call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode,
110786a4271fSThilina Rathnayake     $  ldim,erstrctx,err)
1108e2b2c771Svaleria      call ceedelemrestrictioncreateidentity(ceed,nelt,enode,lnode,
110934d77899SValeria Barra     $  ncompu,erstrctu,err)
111086a4271fSThilina Rathnayake      call ceedelemrestrictioncreateidentity(ceed,nelt,q**ldim,
111186a4271fSThilina Rathnayake     $  nelt*q**ldim,ngeo,erstrctw,err)
111286a4271fSThilina Rathnayake
111386a4271fSThilina RathnayakeC     Create ceed vectors
1114e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_p1,err)
1115e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_ap1,err)
1116e2b2c771Svaleria      call ceedvectorcreate(ceed,lnode,vec_rhs,err)
111786a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,ldim*lx*nelt,vec_coords,err)
111886a4271fSThilina Rathnayake      call ceedvectorcreate(ceed,nelt*ngeo*q**ldim,vec_qdata,err)
111986a4271fSThilina Rathnayake
112086a4271fSThilina Rathnayake      offset=0
112186a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_coords,ceed_mem_host,
112286a4271fSThilina Rathnayake     $  ceed_use_pointer,coords,offset,err)
112386a4271fSThilina Rathnayake
112486a4271fSThilina RathnayakeC     Create ceed qfunctions for diffsetupf and diffusionf
112586a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,diffsetupf,
112686a4271fSThilina Rathnayake     $  __FILE__
112786a4271fSThilina Rathnayake     $  //':diffsetupf'//char(0),qf_setup,err)
112834d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'x',ncompx,
112986a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
113034d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'dx',ncompx*ldim,
113186a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
113234d77899SValeria Barra      call ceedqfunctionaddinput(qf_setup,'weight',ncompu,
113386a4271fSThilina Rathnayake     $  ceed_eval_weight,err)
113486a4271fSThilina Rathnayake      call ceedqfunctionaddoutput(qf_setup,'rho',ngeo,
113586a4271fSThilina Rathnayake     $  ceed_eval_none,err)
113634d77899SValeria Barra      call ceedqfunctionaddoutput(qf_setup,'rhs',ncompu,
113786a4271fSThilina Rathnayake     $  ceed_eval_interp,err)
113886a4271fSThilina Rathnayake
113986a4271fSThilina Rathnayake      call ceedqfunctioncreateinterior(ceed,1,diffusionf,
114086a4271fSThilina Rathnayake     $  __FILE__
114186a4271fSThilina Rathnayake     $  //':diffusionf'//char(0),qf_diffusion,err)
114234d77899SValeria Barra      call ceedqfunctionaddinput(qf_diffusion,'u',ncompu*ldim,
114386a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
114486a4271fSThilina Rathnayake      call ceedqfunctionaddinput(qf_diffusion,'rho',ngeo,
114586a4271fSThilina Rathnayake     $  ceed_eval_none,err)
114634d77899SValeria Barra      call ceedqfunctionaddoutput(qf_diffusion,'v',ncompu*ldim,
114786a4271fSThilina Rathnayake     $  ceed_eval_grad,err)
114886a4271fSThilina Rathnayake
114986a4271fSThilina RathnayakeC     Create ceed operators
115086a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_setup,
115186a4271fSThilina Rathnayake     $  ceed_null,ceed_null,op_setup,err)
115286a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'x',erstrctx,
115386a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_active,err)
115486a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'dx',erstrctx,
115586a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_active,err)
115686a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'weight',erstrctx,
115786a4271fSThilina Rathnayake     $  ceed_notranspose,basisx,ceed_vector_none,err)
115886a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rho',erstrctw,
115986a4271fSThilina Rathnayake     $  ceed_notranspose,ceed_basis_collocated,
116086a4271fSThilina Rathnayake     $  ceed_vector_active,err)
116186a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_setup,'rhs',erstrctu,
116286a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,vec_rhs,err)
116386a4271fSThilina Rathnayake
116486a4271fSThilina Rathnayake      call ceedoperatorcreate(ceed,qf_diffusion,
116586a4271fSThilina Rathnayake     $  ceed_null,ceed_null,op_diffusion,err)
116686a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_diffusion,'u',erstrctu,
116786a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,ceed_vector_active,err)
116886a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_diffusion,'rho',erstrctw,
116986a4271fSThilina Rathnayake     $  ceed_notranspose,ceed_basis_collocated,
117086a4271fSThilina Rathnayake     $  vec_qdata,err)
117186a4271fSThilina Rathnayake      call ceedoperatorsetfield(op_diffusion,'v',erstrctu,
117286a4271fSThilina Rathnayake     $  ceed_notranspose,basisu,ceed_vector_active,err)
117386a4271fSThilina Rathnayake
117486a4271fSThilina RathnayakeC     Compute setup data
117586a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_rhs,ceed_mem_host,
117686a4271fSThilina Rathnayake     $  ceed_use_pointer,r1,offset,err)
117786a4271fSThilina Rathnayake      call ceedoperatorapply(op_setup,vec_coords,vec_qdata,
117886a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
117986a4271fSThilina Rathnayake
118086a4271fSThilina RathnayakeC     Set up true RHS
118186a4271fSThilina Rathnayake      call dssum         (r1,nx1,ny1,nz1)          ! r1
118286a4271fSThilina Rathnayake      call xmask1        (r1,h2,nelt)
118386a4271fSThilina Rathnayake
118486a4271fSThilina RathnayakeC     Set up algebraic RHS with libCEED
118586a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,
118686a4271fSThilina Rathnayake     $  ceed_use_pointer,h1,offset,err)
118786a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,
118886a4271fSThilina Rathnayake     $  ceed_use_pointer,r2,offset,err)
118986a4271fSThilina Rathnayake      call ceedoperatorapply(op_diffusion,vec_p1,vec_ap1,
119086a4271fSThilina Rathnayake     $  ceed_request_immediate,err)                ! r2 = A_ceed*h1
119186a4271fSThilina Rathnayake      call dssum         (r2,nx1,ny1,nz1)
119286a4271fSThilina Rathnayake      call xmask1        (r2,h2,nelt)
119386a4271fSThilina Rathnayake
119486a4271fSThilina RathnayakeC     Set up algebraic RHS with Nek5000
119586a4271fSThilina Rathnayake      call axhm1         (pap,r3,h1,h1,h2,'bp3')   ! r3 = A_nek5k*h1
119686a4271fSThilina Rathnayake      call dssum         (r3,nx1,ny1,nz1)
119786a4271fSThilina Rathnayake      call xmask1        (r3,h2,nelt)
119886a4271fSThilina Rathnayake
119986a4271fSThilina Rathnayake      call nekgsync()
120086a4271fSThilina Rathnayake
120186a4271fSThilina RathnayakeC     Solve true RHS
120286a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED true RHS"
120386a4271fSThilina Rathnayake      tstart = dnekclock()
120486a4271fSThilina Rathnayake      call cggos(u1,r1,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
120586a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
120686a4271fSThilina Rathnayake      tstop  = dnekclock()
120786a4271fSThilina Rathnayake
120886a4271fSThilina RathnayakeC     Output
120986a4271fSThilina Rathnayake      telaps = (tstop-tstart)
121086a4271fSThilina Rathnayake      maxits = maxit
121186a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
121286a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
121386a4271fSThilina Rathnayake
121486a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
121586a4271fSThilina Rathnayake        if (maxit>=100) then
121686a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
121786a4271fSThilina Rathnayake        endif
121886a4271fSThilina Rathnayake        if (dabs(er1)>1e-3) then
121986a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
122086a4271fSThilina Rathnayake        endif
122186a4271fSThilina Rathnayake      endif
122286a4271fSThilina Rathnayake
122386a4271fSThilina Rathnayake      nx      = nx1-1
1224e2b2c771Svaleria      nnode   = nelgt            ! nnodes
1225e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
1226e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
122786a4271fSThilina Rathnayake
122805939c60Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
122986a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
123086a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
123186a4271fSThilina Rathnayake
123286a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
123305939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
123486a4271fSThilina Rathnayake
123586a4271fSThilina RathnayakeC     Solve libCEED algebraic RHS
123686a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "libCEED algebraic RHS"
123786a4271fSThilina Rathnayake      maxit = 100
123886a4271fSThilina Rathnayake      tstart = dnekclock()
123986a4271fSThilina Rathnayake      call cggos(u1,r2,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
124086a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
124186a4271fSThilina Rathnayake      tstop  = dnekclock()
124286a4271fSThilina Rathnayake
124386a4271fSThilina RathnayakeC     Output
124486a4271fSThilina Rathnayake      telaps = (tstop-tstart)
124586a4271fSThilina Rathnayake      maxits = maxit
124686a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
124786a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
124886a4271fSThilina Rathnayake
124986a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
125086a4271fSThilina Rathnayake        if (maxit>=100) then
125186a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
125286a4271fSThilina Rathnayake        endif
125386a4271fSThilina Rathnayake        if (dabs(er1)>1e-9) then
125486a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
125586a4271fSThilina Rathnayake        endif
125686a4271fSThilina Rathnayake      endif
125786a4271fSThilina Rathnayake
125886a4271fSThilina Rathnayake      nx      = nx1-1
1259e2b2c771Svaleria      nnode   = nelgt            ! nnodes
1260e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
1261e2b2c771Svaleria      nppp    = nnode/np         ! nnodes/proc
126286a4271fSThilina Rathnayake
126305939c60Sjeremylt      dofps   = nnode/telaps     ! DOF/sec - scalar form
126486a4271fSThilina Rathnayake      titers  = telaps/maxits    ! time per iteration
126586a4271fSThilina Rathnayake      tppp_s  = titers/nppp      ! time per iteraton per local point
126686a4271fSThilina Rathnayake
126786a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
126805939c60Sjeremylt     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,dofps,titers,tppp_s
126986a4271fSThilina Rathnayake
127086a4271fSThilina RathnayakeC     Solve Nek5000 algebraic RHS
127186a4271fSThilina Rathnayake      if (nid.eq.0) write (6,*) "Nek5000 algebraic RHS"
127286a4271fSThilina Rathnayake      maxit = 100
127386a4271fSThilina Rathnayake      tstart = dnekclock()
127486a4271fSThilina Rathnayake      call cggos(u1,r3,h1,h2,vmult,binvm1,tol,ceed,op_diffusion,
127586a4271fSThilina Rathnayake     $  vec_p1,vec_ap1,maxit,'bp3')
127686a4271fSThilina Rathnayake      tstop  = dnekclock()
127786a4271fSThilina Rathnayake
127886a4271fSThilina RathnayakeC     Output
127986a4271fSThilina Rathnayake      telaps = (tstop-tstart)
128086a4271fSThilina Rathnayake      maxits = maxit
128186a4271fSThilina Rathnayake      er1 = glrdif(u1,e1,n)
128286a4271fSThilina Rathnayake      if (nid.eq.0) write(6,3) lx1,nelgv,er1,' error ',maxit
128386a4271fSThilina Rathnayake
128486a4271fSThilina Rathnayake      if (test.eq.1.and.nid.eq.0) then
128586a4271fSThilina Rathnayake        if (maxit>=100) then
128686a4271fSThilina Rathnayake          write(6,*) "UNCONVERGED CG"
128786a4271fSThilina Rathnayake        endif
128886a4271fSThilina Rathnayake        if (dabs(er1)>1e-9) then
128986a4271fSThilina Rathnayake          write(6,*) "ERROR IS TOO LARGE"
129086a4271fSThilina Rathnayake        endif
129186a4271fSThilina Rathnayake      endif
129286a4271fSThilina Rathnayake
129386a4271fSThilina Rathnayake      nx     = nx1-1
1294e2b2c771Svaleria      nnode   = nelgt           ! nnodes
1295e2b2c771Svaleria      nnode   = nnode*(nx**ldim) ! nnodes
1296e2b2c771Svaleria      nppp   = nnode/np         ! nnodes/proc
129786a4271fSThilina Rathnayake
1298e2b2c771Svaleria      nodepss = nnode/telaps     ! DOF/sec - scalar form
129986a4271fSThilina Rathnayake      titers = telaps/maxits   ! time per iteration
130086a4271fSThilina Rathnayake      tppp_s = titers/nppp     ! time per iteraton per local point
130186a4271fSThilina Rathnayake
130286a4271fSThilina Rathnayake      if (nid.eq.0) write(6,1) 'case scalar:'
1303e2b2c771Svaleria     $ ,np,nx,nelt,nelgt,nnode,nppp,maxits,telaps,nodepss,titers,tppp_s
130486a4271fSThilina Rathnayake
130586a4271fSThilina Rathnayake    1 format(a12,i7,i3,i7,i10,i14,i10,i4,1p4e13.5)
130686a4271fSThilina Rathnayake    3 format(i3,i9,e12.4,1x,a8,i9)
130786a4271fSThilina Rathnayake
130886a4271fSThilina RathnayakeC     Destroy ceed handles
130986a4271fSThilina Rathnayake      call ceedvectordestroy(vec_p1,err)
131086a4271fSThilina Rathnayake      call ceedvectordestroy(vec_ap1,err)
131186a4271fSThilina Rathnayake      call ceedvectordestroy(vec_rhs,err)
131286a4271fSThilina Rathnayake      call ceedvectordestroy(vec_qdata,err)
131386a4271fSThilina Rathnayake      call ceedvectordestroy(vec_coords,err)
131486a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctu,err)
131586a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctx,err)
131686a4271fSThilina Rathnayake      call ceedelemrestrictiondestroy(erstrctw,err)
131786a4271fSThilina Rathnayake      call ceedbasisdestroy(basisu,err)
131886a4271fSThilina Rathnayake      call ceedbasisdestroy(basisx,err)
131986a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_setup,err)
132086a4271fSThilina Rathnayake      call ceedqfunctiondestroy(qf_diffusion,err)
132186a4271fSThilina Rathnayake      call ceedoperatordestroy(op_setup,err)
132286a4271fSThilina Rathnayake      call ceedoperatordestroy(op_diffusion,err)
132386a4271fSThilina Rathnayake      call ceeddestroy(ceed,err)
132486a4271fSThilina Rathnayake
132586a4271fSThilina Rathnayake      return
132686a4271fSThilina Rathnayake      end
132786a4271fSThilina RathnayakeC-----------------------------------------------------------------------
132886a4271fSThilina Rathnayake      subroutine cggos(u1,r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,
132986a4271fSThilina Rathnayake     $  vec_ap1,maxit,bpname)
133086a4271fSThilina RathnayakeC     Scalar conjugate gradient iteration for solution of uncoupled
133186a4271fSThilina RathnayakeC     Helmholtz equations
133286a4271fSThilina RathnayakeC     Input: r1,h1,h2,rmult,binv,tin,ceed,ceed_op,vec_p1,vec_ap1,bpname
133386a4271fSThilina RathnayakeC     Output: u1,maxit
133486a4271fSThilina Rathnayake      include 'SIZE'
133586a4271fSThilina Rathnayake      include 'TOTAL'
133686a4271fSThilina Rathnayake      include 'DOMAIN'
133786a4271fSThilina Rathnayake      include 'FDMH1'
133886a4271fSThilina Rathnayake      character*3 bpname
133986a4271fSThilina Rathnayake
134086a4271fSThilina RathnayakeC     INPUT:  rhs1 - rhs
134186a4271fSThilina RathnayakeC             h1   - exact solution
134286a4271fSThilina Rathnayake
134386a4271fSThilina Rathnayake      parameter (lt=lx1*ly1*lz1*lelt)
134486a4271fSThilina Rathnayake      parameter (ld=lxd*lyd*lzd*lelt)
134586a4271fSThilina Rathnayake      real*8 u1(lt),r1(lt),h1(lt),h2(lt)
134686a4271fSThilina Rathnayake      real*8 rmult(1),binv(1)
134786a4271fSThilina Rathnayake      integer ceed,ceed_op,vec_ap1,vec_p1
134886a4271fSThilina Rathnayake      common /scrcg/ dpc(lt),p1(lt),z1(lt)
134986a4271fSThilina Rathnayake      common /scrca/ wv(4),wk(4),rpp1(4),rpp2(4),alph(4),beta(4),pap(4)
135086a4271fSThilina Rathnayake
135186a4271fSThilina Rathnayake      real*8 ap1(lt)
135286a4271fSThilina Rathnayake      equivalence (ap1,z1)
135386a4271fSThilina Rathnayake
135486a4271fSThilina Rathnayake      vol   = volfld(ifield)
135586a4271fSThilina Rathnayake      nel   = nelfld(ifield)
135686a4271fSThilina Rathnayake      nxyz  = lx1*ly1*lz1
135786a4271fSThilina Rathnayake      n     = nxyz*nel
135886a4271fSThilina Rathnayake      nx    = nx1-1                  ! Polynomial order (just for i/o)
135986a4271fSThilina Rathnayake
136086a4271fSThilina Rathnayake      tol=tin
136186a4271fSThilina Rathnayake
136286a4271fSThilina Rathnayake      if(bpname.ne.'bp1') then
136386a4271fSThilina Rathnayake        call setprecn_bp3(dpc,h1,h2) ! Set up diagional pre-conidtioner
136486a4271fSThilina Rathnayake      else
136586a4271fSThilina Rathnayake        call setprecn_bp1(dpc,h1,h2) ! Set up diagional pre-conidtioner
136686a4271fSThilina Rathnayake      endif
136786a4271fSThilina Rathnayake
136886a4271fSThilina Rathnayake      call rzero         (u1,n)      ! Initialize solution
136986a4271fSThilina Rathnayake
137086a4271fSThilina Rathnayake      wv(1)=0
137186a4271fSThilina Rathnayake      do i=1,n
137286a4271fSThilina Rathnayake         s=rmult(i)                  !      -1
137386a4271fSThilina Rathnayake         p1(i)=dpc(i)*r1(i)          ! p = M  r      T
137486a4271fSThilina Rathnayake         wv(1)=wv(1)+s*p1(i)*r1(i)   !              r p
137586a4271fSThilina Rathnayake      enddo
137686a4271fSThilina Rathnayake      call gop(wv(1),wk,'+  ',1)
137786a4271fSThilina Rathnayake      rpp1(1) = wv  (1)
137886a4271fSThilina Rathnayake
137986a4271fSThilina Rathnayake      do 1000 iter=1,maxit
138086a4271fSThilina Rathnayake         call axhm1_ceed (pap,ap1,p1,h1,h2,ceed,ceed_op,
138186a4271fSThilina Rathnayake     $     vec_ap1,vec_p1)
138286a4271fSThilina Rathnayake         call dssum    (ap1,nx1,ny1,nz1)
138386a4271fSThilina Rathnayake         if (bpname.ne.'bp1') call xmask1(ap1,h2,nel)
138486a4271fSThilina Rathnayake
138586a4271fSThilina Rathnayake         call gop      (pap,wk,'+  ',1)
138686a4271fSThilina Rathnayake         alph(1) = rpp1(1)/pap(1)
138786a4271fSThilina Rathnayake
138886a4271fSThilina Rathnayake         do i=1,n
138986a4271fSThilina Rathnayake            u1(i)=u1(i)+alph(1)* p1(i)
139086a4271fSThilina Rathnayake            r1(i)=r1(i)-alph(1)*ap1(i)
139186a4271fSThilina Rathnayake         enddo
139286a4271fSThilina Rathnayake
139386a4271fSThilina RathnayakeC        tolerance check here
139486a4271fSThilina Rathnayake         call rzero(wv,2)
139586a4271fSThilina Rathnayake         do i=1,n
139686a4271fSThilina Rathnayake            wv(1)=wv(1)+r1(i)*r1(i)            ! L2 error estimate
139786a4271fSThilina Rathnayake            z1(i)=dpc(i)*r1(i)                 ! z = M  r
139886a4271fSThilina Rathnayake            wv(2)=wv(2)+rmult(i)*z1(i)*r1(i)   ! r z
139986a4271fSThilina Rathnayake         enddo
140086a4271fSThilina Rathnayake         call gop(wv,wk,'+  ',2)
140186a4271fSThilina Rathnayake
140286a4271fSThilina RathnayakeC        if (nio.eq.0) write(6,1) ifield,istep,iter,nx,(wv(k),k=1,1)
140386a4271fSThilina Rathnayake  1     format(i2,i9,i5,i4,1p1e12.4,' cggos')
140486a4271fSThilina Rathnayake
140586a4271fSThilina Rathnayake         enorm=sqrt(wv(1))
140686a4271fSThilina Rathnayake         if (enorm.lt.tol) then
140786a4271fSThilina Rathnayake            ifin = iter
140886a4271fSThilina Rathnayake            if (nio.eq.0) write(6,3000) istep,ifin,enorm,tol
140986a4271fSThilina Rathnayake            goto 9999
141086a4271fSThilina Rathnayake         endif
141186a4271fSThilina RathnayakeC        if (nio.eq.0) write(6,2) iter,enorm,alph(1),pap(1),'alpha'
141286a4271fSThilina Rathnayake 2      format(i5,1p3e12.4,2x,a5)
141386a4271fSThilina Rathnayake
141486a4271fSThilina Rathnayake         rpp2(1)=rpp1(1)
141586a4271fSThilina Rathnayake         rpp1(1)=wv  (2)
141686a4271fSThilina Rathnayake         beta1  =rpp1(1)/rpp2(1)
141786a4271fSThilina Rathnayake         do i=1,n
141886a4271fSThilina Rathnayake            p1(i)=z1(i) + beta1*p1(i)
141986a4271fSThilina Rathnayake         enddo
142086a4271fSThilina Rathnayake
142186a4271fSThilina Rathnayake 1000 continue
142286a4271fSThilina Rathnayake
142386a4271fSThilina Rathnayake      rbnorm=sqrt(wv(1))
142486a4271fSThilina Rathnayake      if (nio.eq.0) write (6,3001) istep,iter,rbnorm,tol
142586a4271fSThilina Rathnayake      iter = iter-1
142686a4271fSThilina Rathnayake
142786a4271fSThilina Rathnayake 9999 continue
142886a4271fSThilina Rathnayake
142986a4271fSThilina Rathnayake      maxit=iter
143086a4271fSThilina Rathnayake
143186a4271fSThilina Rathnayake 3000 format(i12,1x,'cggo scalar:',i6,1p5e13.4)
143286a4271fSThilina Rathnayake 3001 format(2i6,' Unconverged cggo scalar: rbnorm =',1p2e13.6)
143386a4271fSThilina Rathnayake
143486a4271fSThilina Rathnayake      return
143586a4271fSThilina Rathnayake      end
143686a4271fSThilina RathnayakeC-----------------------------------------------------------------------
143786a4271fSThilina Rathnayake      subroutine axhm1_ceed(pap,ap1,p1,h1,h2,ceed,ceed_op,
143886a4271fSThilina Rathnayake     $  vec_ap1,vec_p1)
143986a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of uncoupled
144086a4271fSThilina RathnayakeC     Helmholtz equations
144186a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2,bpname,ceed,ceed_op,vec_ap1,vec_p1
144286a4271fSThilina RathnayakeC     Output: ap1
144386a4271fSThilina Rathnayake      include 'SIZE'
144486a4271fSThilina Rathnayake      include 'TOTAL'
144586a4271fSThilina Rathnayake      include 'ceedf.h'
144686a4271fSThilina Rathnayake
144786a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
144886a4271fSThilina Rathnayake      real*8       gf(lg,lx,lelt)             ! Equivalence new gf() data
144986a4271fSThilina Rathnayake      equivalence (gf,g1m1)                   ! layout to g1m1...g6m1
145086a4271fSThilina Rathnayake
145186a4271fSThilina Rathnayake      real*8   pap(3)
145286a4271fSThilina Rathnayake      real*8   ap1(lx,lelt)
145386a4271fSThilina Rathnayake      real*8    p1(lx,lelt)
145486a4271fSThilina Rathnayake      real*8    h1(lx,lelt),h2(lx,lelt)
145586a4271fSThilina Rathnayake      integer ceed,ceed_op,vec_ap1,vec_p1,err
145686a4271fSThilina Rathnayake      integer i,e
145786a4271fSThilina Rathnayake      integer*8 offset
145886a4271fSThilina Rathnayake
145986a4271fSThilina Rathnayake      offset=0
146086a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_p1,ceed_mem_host,ceed_use_pointer,
146186a4271fSThilina Rathnayake     $  p1,offset,err)
146286a4271fSThilina Rathnayake      call ceedvectorsetarray(vec_ap1,ceed_mem_host,ceed_use_pointer,
146386a4271fSThilina Rathnayake     $  ap1,offset,err)
146486a4271fSThilina Rathnayake
146586a4271fSThilina Rathnayake      call ceedoperatorapply(ceed_op,vec_p1,vec_ap1,
146686a4271fSThilina Rathnayake     $  ceed_request_immediate,err)
146786a4271fSThilina Rathnayake
146886a4271fSThilina Rathnayake      call ceedvectorsyncarray(vec_ap1,ceed_mem_host,err)
146986a4271fSThilina Rathnayake
147086a4271fSThilina Rathnayake      pap(1)=0.
147186a4271fSThilina Rathnayake
147286a4271fSThilina Rathnayake      do e=1,nelt
147386a4271fSThilina Rathnayake         do i=1,lx
147486a4271fSThilina Rathnayake           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
147586a4271fSThilina Rathnayake         enddo
147686a4271fSThilina Rathnayake      enddo
147786a4271fSThilina Rathnayake
147886a4271fSThilina Rathnayake      return
147986a4271fSThilina Rathnayake      end
148086a4271fSThilina RathnayakeC-----------------------------------------------------------------------
148186a4271fSThilina Rathnayake      subroutine ax_e_bp1(w,u,g,h1,h2,b,ju,us,ut)
148286a4271fSThilina RathnayakeC     Local matrix-vector for solution of BP3 (stiffness matrix)
148386a4271fSThilina RathnayakeC     Input: u,g,h1,h2,b,ju,us,ut   Output: w
148486a4271fSThilina Rathnayake      include 'SIZE'
148586a4271fSThilina Rathnayake      include 'TOTAL'
148686a4271fSThilina Rathnayake
148786a4271fSThilina Rathnayake      parameter (lxyz=lx1*ly1*lz1,lg=3+3*(ldim-2))
148886a4271fSThilina Rathnayake      real*8 w(lxyz),u(lxyz),g(lg,lxyz),h1(lxyz),h2(lxyz),b(lxyz)
148986a4271fSThilina Rathnayake      real*8 ju(lxyz),us(lxyz),ut(lxyz)
149086a4271fSThilina Rathnayake
149186a4271fSThilina Rathnayake      nxq = nx1+1 ! Number of quadrature points
149286a4271fSThilina Rathnayake
149386a4271fSThilina Rathnayake      lxyzq = nxq**ldim
149486a4271fSThilina Rathnayake
149586a4271fSThilina Rathnayake      call intp_rstd (ju,u,lx1,nxq,if3d,0) ! 0 --> Fwd interpolation
149686a4271fSThilina Rathnayake      do i=1,lxyzq
149786a4271fSThilina Rathnayake         ju(i)=ju(i)*h2(i) !! h2 must be on the fine grid, w/ quad wts
149886a4271fSThilina Rathnayake      enddo
149986a4271fSThilina Rathnayake      call intp_rstd (w,ju,lx1,nxq,if3d,1) ! 1 --> ju-->u
150086a4271fSThilina Rathnayake
150186a4271fSThilina Rathnayake      return
150286a4271fSThilina Rathnayake      end
150386a4271fSThilina RathnayakeC-----------------------------------------------------------------------
150486a4271fSThilina Rathnayake      subroutine axhm1_bp1(pap,ap1,p1,h1,h2)
150586a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of BP1 (mass matrix)
150686a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2           Output: ap1
150786a4271fSThilina Rathnayake      include 'SIZE'
150886a4271fSThilina Rathnayake      include 'TOTAL'
150986a4271fSThilina Rathnayake
151086a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2))
151186a4271fSThilina Rathnayake      real*8         gf(lg,lx,lelt)             ! Equivalence new gf() data
151286a4271fSThilina Rathnayake      equivalence (gf,g1m1)                     ! layout to g1m1...g6m1
151386a4271fSThilina Rathnayake
151486a4271fSThilina Rathnayake      real*8 pap(3)
151586a4271fSThilina Rathnayake      real*8 ap1(lx,lelt)
151686a4271fSThilina Rathnayake      real*8  p1(lx,lelt)
151786a4271fSThilina Rathnayake      real*8  h1(lx,lelt),h2(lx,lelt)
151886a4271fSThilina Rathnayake
151986a4271fSThilina Rathnayake      real*8 ur(lx),us(lx),ut(lx)
152086a4271fSThilina Rathnayake      common /ctmp1/ ur,us,ut
152186a4271fSThilina Rathnayake
152286a4271fSThilina Rathnayake      integer e
152386a4271fSThilina Rathnayake
152486a4271fSThilina Rathnayake      pap(1)=0.
152586a4271fSThilina Rathnayake
152686a4271fSThilina Rathnayake      k=1
152786a4271fSThilina Rathnayake      nxq = nx1+1
152886a4271fSThilina Rathnayake
152986a4271fSThilina Rathnayake      do e=1,nelt
153086a4271fSThilina Rathnayake
153186a4271fSThilina Rathnayake         call ax_e_bp1(ap1(1,e),p1(1,e),gf(1,1,e),h1(1,e),h2(k,1)
153286a4271fSThilina Rathnayake     $                                          ,bm1(1,1,1,e),ur,us,ut)
153386a4271fSThilina Rathnayake         do i=1,lx
153486a4271fSThilina Rathnayake           pap(1)=pap(1)+ap1(i,e)*p1(i,e)
153586a4271fSThilina Rathnayake         enddo
153686a4271fSThilina Rathnayake         k=k+nxq*nxq*nxq
153786a4271fSThilina Rathnayake
153886a4271fSThilina Rathnayake      enddo
153986a4271fSThilina Rathnayake
154086a4271fSThilina Rathnayake      return
154186a4271fSThilina Rathnayake      end
154286a4271fSThilina RathnayakeC-----------------------------------------------------------------------
154386a4271fSThilina Rathnayake      subroutine ax_e_bp3(w,u,g,ur,us,ut,wk)
154486a4271fSThilina RathnayakeC     Local matrix-vector for solution of BP3 (stiffness matrix)
154586a4271fSThilina RathnayakeC     Input: u,g,ur,us,ut,wk        Output: w
154686a4271fSThilina Rathnayake      include 'SIZE'
154786a4271fSThilina Rathnayake      include 'TOTAL'
154886a4271fSThilina Rathnayake
154986a4271fSThilina Rathnayake      parameter (lzq=lx1+1,lxyz=lx1*lx1*lx1,lxyzq=lzq*lzq*lzq)
155086a4271fSThilina Rathnayake
155186a4271fSThilina Rathnayake      common /ctmp0/ tmp(lxyzq)
155286a4271fSThilina Rathnayake      common /dxmfine/ dxmq(lzq,lzq),dxtmq(lzq,lzq)
155386a4271fSThilina Rathnayake
155486a4271fSThilina Rathnayake      real*8 ur(lxyzq),us(lxyzq),ut(lxyzq),wk(lxyzq)
155586a4271fSThilina Rathnayake      real*8 w(lxyz),u(lxyz),g(2*ldim,lxyzq)
155686a4271fSThilina Rathnayake
155786a4271fSThilina Rathnayake      n = lzq-1
155886a4271fSThilina Rathnayake
155986a4271fSThilina Rathnayake      call intp_rstd  (wk,u,lx1,lzq,if3d,0) ! 0 --> Fwd interpolation
156086a4271fSThilina Rathnayake      call loc_grad3  (ur,us,ut,wk,n,dxmq,dxtmq)
156186a4271fSThilina Rathnayake
156286a4271fSThilina Rathnayake      do i=1,lxyzq
156386a4271fSThilina Rathnayake         wr = g(1,i)*ur(i) + g(2,i)*us(i) + g(3,i)*ut(i)
156486a4271fSThilina Rathnayake         ws = g(2,i)*ur(i) + g(4,i)*us(i) + g(5,i)*ut(i)
156586a4271fSThilina Rathnayake         wt = g(3,i)*ur(i) + g(5,i)*us(i) + g(6,i)*ut(i)
156686a4271fSThilina Rathnayake         ur(i) = wr
156786a4271fSThilina Rathnayake         us(i) = ws
156886a4271fSThilina Rathnayake         ut(i) = wt
156986a4271fSThilina Rathnayake      enddo
157086a4271fSThilina Rathnayake
157186a4271fSThilina Rathnayake      call loc_grad3t (wk,ur,us,ut,n,dxmq,dxtmq,tmp)
157286a4271fSThilina Rathnayake      call intp_rstd  (w,wk,lx1,lzq,if3d,1) ! 1 --> ju-->u
157386a4271fSThilina Rathnayake
157486a4271fSThilina Rathnayake      return
157586a4271fSThilina Rathnayake      end
157686a4271fSThilina RathnayakeC-----------------------------------------------------------------------
157786a4271fSThilina Rathnayake      subroutine axhm1_bp3(pap,ap1,p1,h1,h2)
157886a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of BP3 (stiffness matrix)
157986a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2           Output: ap1
158086a4271fSThilina Rathnayake      include 'SIZE'
158186a4271fSThilina Rathnayake      include 'TOTAL'
158286a4271fSThilina Rathnayake
158386a4271fSThilina Rathnayake      parameter (lzq=lx1+1)
158486a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1,lg=3+3*(ldim-2),lq=lzq**ldim)
158586a4271fSThilina Rathnayake      common /bpgfactors/ gf(lg,lq,lelt),bmq(lq,lelt),w3mq(lq)
158686a4271fSThilina Rathnayake
158786a4271fSThilina Rathnayake      real*8 pap(3)
158886a4271fSThilina Rathnayake      real*8 ap1(lx,lelt)
158986a4271fSThilina Rathnayake      real*8  p1(lx,lelt)
159086a4271fSThilina Rathnayake      real*8  h1(lx,lelt),h2(lx,lelt)
159186a4271fSThilina Rathnayake
159286a4271fSThilina Rathnayake      common /ctmp1/ ur,us,ut,wk
159386a4271fSThilina Rathnayake      real*8 ur(lq),us(lq),ut(lq),wk(lq)
159486a4271fSThilina Rathnayake
159586a4271fSThilina Rathnayake      integer e
159686a4271fSThilina Rathnayake
159786a4271fSThilina Rathnayake      pap(1)=0.
159886a4271fSThilina Rathnayake
159986a4271fSThilina Rathnayake      do e=1,nelt
160086a4271fSThilina Rathnayake
160186a4271fSThilina Rathnayake         call ax_e_bp3(ap1(1,e),p1(1,e),gf(1,1,e),ur,us,ut,wk)
160286a4271fSThilina Rathnayake         do i=1,lx
160386a4271fSThilina Rathnayake           pap(1)=pap(1)+p1(i,e)*ap1(i,e)
160486a4271fSThilina Rathnayake         enddo
160586a4271fSThilina Rathnayake
160686a4271fSThilina Rathnayake      enddo
160786a4271fSThilina Rathnayake
160886a4271fSThilina Rathnayake      return
160986a4271fSThilina Rathnayake      end
161086a4271fSThilina RathnayakeC-----------------------------------------------------------------------
161186a4271fSThilina Rathnayake      subroutine axhm1(pap,ap1,p1,h1,h2,bpname)
161286a4271fSThilina RathnayakeC     Vector conjugate gradient matvec for solution of uncoupled
161386a4271fSThilina RathnayakeC     Helmholtz equations
161486a4271fSThilina RathnayakeC     Input: pap,p1,h1,h2,bpname    Output: ap1
161586a4271fSThilina Rathnayake      include 'SIZE'
161686a4271fSThilina Rathnayake      include 'TOTAL'
161786a4271fSThilina Rathnayake
161886a4271fSThilina Rathnayake      parameter (lx=lx1*ly1*lz1)
161986a4271fSThilina Rathnayake
162086a4271fSThilina Rathnayake      real*8 pap(3),ap1(lx,lelt),p1(lx,lelt)
162186a4271fSThilina Rathnayake      real*8 h1(lx,lelt),h2(lx,lelt)
162286a4271fSThilina Rathnayake
162386a4271fSThilina Rathnayake      character*3 bpname
162486a4271fSThilina Rathnayake
162586a4271fSThilina Rathnayake      if (bpname.eq.'bp1') then
162686a4271fSThilina Rathnayake         call axhm1_bp1(pap,ap1,p1,h1,h2)
162786a4271fSThilina Rathnayake
162886a4271fSThilina Rathnayake      elseif (bpname.eq.'bp3') then
162986a4271fSThilina Rathnayake         call axhm1_bp3(pap,ap1,p1,h1,h2)
163086a4271fSThilina Rathnayake
163186a4271fSThilina Rathnayake      else
163286a4271fSThilina Rathnayake         write(6,*) bpname,' axhm1 bpname error'
163386a4271fSThilina Rathnayake         stop
163486a4271fSThilina Rathnayake
163586a4271fSThilina Rathnayake      endif
163686a4271fSThilina Rathnayake
163786a4271fSThilina Rathnayake      return
163886a4271fSThilina Rathnayake      end
163986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
164086a4271fSThilina Rathnayake      subroutine get_bp(bp)
164186a4271fSThilina RathnayakeC     Get BP to run
164286a4271fSThilina RathnayakeC     Input:                        Output: bp
164386a4271fSThilina Rathnayake      integer i,bp
164486a4271fSThilina Rathnayake      character*64 bpval
164586a4271fSThilina Rathnayake
164686a4271fSThilina Rathnayake      bp=0
164786a4271fSThilina Rathnayake      if(iargc().ge.1) then
164886a4271fSThilina Rathnayake        call getarg(1,bpval)
164986a4271fSThilina Rathnayake      endif
165086a4271fSThilina Rathnayake      if(bpval.eq."bp1") then
165186a4271fSThilina Rathnayake        bp=1
165286a4271fSThilina Rathnayake      elseif(bpval.eq."bp3") then
165386a4271fSThilina Rathnayake        bp=3
165486a4271fSThilina Rathnayake      endif
165586a4271fSThilina Rathnayake
165686a4271fSThilina Rathnayake      return
165786a4271fSThilina Rathnayake      end
165886a4271fSThilina RathnayakeC-----------------------------------------------------------------------
165986a4271fSThilina Rathnayake      subroutine get_spec(spec)
166086a4271fSThilina RathnayakeC     Get CEED backend specification
166186a4271fSThilina RathnayakeC     Input:                        Output: spec
166286a4271fSThilina Rathnayake      integer i
166386a4271fSThilina Rathnayake      character*64 spec
166486a4271fSThilina Rathnayake
166586a4271fSThilina Rathnayake      spec = '/cpu/self'
166686a4271fSThilina Rathnayake      if(iargc().ge.2) then
166786a4271fSThilina Rathnayake        call getarg(2,spec)
166886a4271fSThilina Rathnayake      endif
166986a4271fSThilina Rathnayake
167086a4271fSThilina Rathnayake      return
167186a4271fSThilina Rathnayake      end
167286a4271fSThilina RathnayakeC-----------------------------------------------------------------------
167386a4271fSThilina Rathnayake      subroutine get_test(test)
167486a4271fSThilina RathnayakeC     Get test mode flag
167586a4271fSThilina RathnayakeC     Input:                        Output: test
167686a4271fSThilina Rathnayake      integer i,test
167786a4271fSThilina Rathnayake      character*64 testval
167886a4271fSThilina Rathnayake
167986a4271fSThilina Rathnayake      test=0
168086a4271fSThilina Rathnayake      if(iargc().ge.3) then
168186a4271fSThilina Rathnayake        call getarg(3,testval)
168286a4271fSThilina Rathnayake      endif
168386a4271fSThilina Rathnayake      if(testval.eq."test") then
168486a4271fSThilina Rathnayake        test=1
168586a4271fSThilina Rathnayake      endif
168686a4271fSThilina Rathnayake
168786a4271fSThilina Rathnayake      return
168886a4271fSThilina Rathnayake      end
168986a4271fSThilina RathnayakeC-----------------------------------------------------------------------
1690