xref: /phasta/phSolver/compressible/solmfg.f (revision 779e4b51fc2aad517e324269f1248fd2a51a661a)
1        subroutine SolMFG (y,         ac,        yold,      acold,
2     &			   x,
3     &                     iBC,       BC,        res,
4     &                     BDiag,     HBrg,      eBrg,
5     &                     yBrg,      Rcos,      Rsin,      iper,
6     &                     ilwork,    shp,       shgl,
7     &                     shpb,      shglb,     Dy, rerr)
8c
9c----------------------------------------------------------------------
10c
11c  This is the preconditioned matrix-free GMRES driver routine.
12c
13c input:
14c  y      (nshg,ndof)           : Y-variables at n+alpha_v
15c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
16c  yold   (nshg,ndof)           : Y-variables at beginning of step
17c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
18c  x      (numnp,nsd)            : node coordinates
19c  iBC    (nshg)                : BC codes
20c  BC     (nshg,ndofBC)         : BC constraint parameters
21c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
22c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
23c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
24c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
25c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
26c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
27c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
28c
29c output:
30c  res    (nshg,ndof)           : preconditioned residual
31c  BDiag  (nshg,ndof,ndof)      : block-diagonal preconditioner
32c
33c
34c Zdenek Johan,  Winter 1991.  (Fortran 90)
35c----------------------------------------------------------------------
36c
37        include "common.h"
38        include "mpif.h"
39        include "auxmpi.h"
40c
41        dimension y(nshg,ndof),             ac(nshg,ndof),
42     &            yold(nshg,ndof),          acold(nshg,ndof),
43     &            x(numnp,nsd),
44     &            iBC(nshg),                BC(nshg,ndofBC),
45     &            res(nshg,nflow),
46     &            BDiag(nshg,nflow,nflow),
47     &            HBrg(Kspace+1,Kspace),    eBrg(Kspace+1),
48     &            yBrg(Kspace),             Rcos(Kspace),
49     &            Rsin(Kspace),             ilwork(nlwork),
50     &            iper(nshg)
51c
52        dimension Dy(nshg,nflow),            rmes(nshg,nflow),
53     &            ypre(nshg,nflow),          temp(nshg,nflow),
54     &            uBrg(nshg,nflow,Kspace+1),  ytmp(nshg,nflow)
55
56c
57        dimension shp(MAXTOP,maxsh,MAXQPT),
58     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
59     &            shpb(MAXTOP,maxsh,MAXQPT),
60     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
61c
62c.... *******************>> Element Data Formation <<******************
63c
64c
65c.... set the parameters for flux and surface tension calculations
66c
67        idflx = zero
68        if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
69        if (isurf == 1) idflx=idflx + nsd
70c
71        call ElmMFG (y,             ac,            x,
72     &               shp,           shgl,
73     &               iBC,           BC,
74     &               shpb,          shglb,
75     &               res,           rmes,
76     &               BDiag,         iper,          ilwork,
77     &               rerr)
78c
79c.... **********************>> Matrix-Free GMRES <<********************
80c
81c
82c.... start the timer
83c
84        call timer ('Solver  ')
85c
86c.... ------------------------> Initialization <-----------------------
87c
88
89c
90c.... LU decompose the block diagonals
91c
92        if (iprec .ne. 0)
93     &  call i3LU (BDiag, res,  'LU_Fact ')
94
95c
96c.... block diagonal precondition residual
97c
98        call i3LU (BDiag, res,  'forward ')
99c
100c  This is a feature that allows one to take an extra pass just to
101c  find the residual at the end of the last solve.
102c
103c$$$        if(iter.ge.(press*nitr) ) then
104c$$$          iKs=0
105c$$$          lGMRES=0
106c$$$          goto 3002
107c$$$        endif
108
109c
110c.... block diagonal precondition modified residual
111c
112        call i3LU (BDiag, rmes, 'forward ')
113
114c
115c.... copy res in uBrg(1)
116c
117        uBrg(:,:,1) = res
118c
119c.... initialize Dy
120c
121        Dy = zero
122c
123c.... block diagonal precondition y-variables
124c
125        ypre(:,:) = y(:,1:nflow) ! ypre is the pre-conditioned,
126                                 ! unperturbed, base vector
127c
128        call yshuffle(ypre,'new2old ')
129c
130        call i3LU (BDiag, ypre,   'product ')
131c
132c  since we will never use ypre in the "new" form again, leave it
133c  shuffled
134c
135c        call yshuffle(ypre, 'old2new ')
136c
137c.... calculate norm of residual
138c
139        temp  = res**2
140
141c        call tnanq(temp,5,"res**2  ")
142
143        call sumgat (temp, nflow, summed, ilwork)
144        unorm = sqrt(summed)
145c
146c.... flop count
147c
148!        flops = flops + ndof*nshg+nshg
149c
150c.... check if GMRES iterations are required
151c
152        iKs    = 0
153        lGMRES = 0
154c
155c.... if we are down to machine precision, don't bother solving
156c
157        if (unorm .lt. 100.*epsM**2) goto 3000
158c
159c.... set up tolerance of the Hessenberg's problem
160c
161        epsnrm = etol * unorm
162c
163c.... compute the finite difference interval
164c
165        if ((iter .eq. 1) .and. (mod(istep,20) .eq. 0)) then
166           call itrFDI (ypre,            y,            ac,
167     &                  x,               rmes,
168     &                  res,             BDiag,         iBC,
169     &                  BC,              iper,
170     &                  ilwork,          shp,           shgl,
171     &                  shpb,            shglb)
172        endif
173        ires=2
174c
175c.... ------------------------>  GMRES Loop  <-------------------------
176c
177c.... loop through GMRES cycles
178c
179        do 2000 mGMRES = 1, nGMRES
180        lGMRES = mGMRES - 1
181c
182        if (lGMRES .gt. 0) then
183c
184c.... calculate  R - A u
185c
186          call Au2MFG (ypre,        y,        ac,
187     &                 x,           rmes,
188     &                 res,         Dy,          temp,
189     &                 BDiag,       iBC,         BC,
190     &                 iper,        ilwork,
191     &                 shp,         shgl,
192     &                 shpb,        shglb)
193
194c
195          uBrg(:,:,1) = temp
196c
197c.... calculate the norm
198c
199          temp  = temp**2
200          call sumgat (temp, ndof, summed, ilwork)
201          unorm = sqrt(summed)
202
203c
204c.... flop count
205c
206!          flops = flops + ndof*nshg+nshg
207c
208        endif
209c
210c.... set up RHS of the Hessenberg's problem
211c
212        call clear (eBrg, Kspace+1)
213        eBrg(1) = unorm
214c
215c.... normalize the first Krylov vector
216c
217        uBrg(:,:,1) = uBrg(:,:,1) / unorm
218c
219c.... loop through GMRES iterations
220c
221        do 1000 iK = 1, Kspace
222        iKs = iK
223c
224c.... Au product
225c
226        temp = uBrg(:,:,iKs)
227c
228        call Au1MFG (ypre,        y,           ac,
229     &               x,           rmes,
230     &               res,         temp,        BDiag,
231     &               iBC,         BC,
232     &               iper,        ilwork,
233     &               shp,         shgl,
234     &               shpb,        shglb)
235c
236        uBrg(:,:,iKs+1) = temp   ! u_{i+1}= J u_i  In Johan Thesis p 15c
237c$$$c
238c$$$c.... debug
239c$$$c
240c$$$           do i=1,nshg
241c$$$              write(78,'(5(f14.7))')(uBrg(i,j,iKs+1),j=1,5)
242c$$$           enddo
243c$$$           stop
244c
245c.... orthogonalize and get the norm
246c
247        do jK = 1, iKs+1
248c
249          if (jK .eq. 1) then
250c
251            temp = uBrg(:,:,iKs+1) * uBrg(:,:,1)  ! {u_{i+1}*u_1} vector
252            call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
253c
254          else
255c
256c project off jK-1 vector
257c
258            uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1)
259c
260            temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
261            call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
262c
263          endif
264c
265          HBrg(jK,iKs) = beta   ! put this in the Hessenberg Matrix
266c
267        enddo
268c
269!        flops = flops + (3*iKs+1)*nflow*nshg+(iKs+1)*nshg
270c
271c  the last inner product was with what was left of the vector (after
272c  projecting off all of the previous vectors
273c
274        unorm           = sqrt(beta)
275        HBrg(iKs+1,iKs) = unorm   ! this fills the 1 sub diagonal band
276c
277c.... normalize the Krylov vector
278c
279        uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm  ! normalize the next Krylov
280c vector
281c
282c.... construct and reduce the Hessenberg Matrix
283c  since there is only one subdiagonal we can use a Givens rotation to
284c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
285c  allows us to check progress of solution and quit when satisfied.  Note
286c  that all future K vects will put a subdiagonal in the next column so
287c  there is no penalty to work ahead as  the rotation for the next vector
288c   will be unaffected by this rotation.
289
290c
291c   H Y = E ========>   R_i H Y = R_i E
292c
293        do jK = 1, iKs-1
294          tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
295     &                      Rsin(jK) * HBrg(jK+1,iKs)
296          HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
297     &                      Rcos(jK) * HBrg(jK+1,iKs)
298          HBrg(jK,  iKs) =  tmp
299        enddo
300c
301        tmp             = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
302        Rcos(iKs)       = HBrg(iKs,  iKs) / tmp
303        Rsin(iKs)       = HBrg(iKs+1,iKs) / tmp
304        HBrg(iKs,  iKs) = tmp
305        HBrg(iKs+1,iKs) = zero
306c
307c.... rotate eBrg    R_i E
308c
309        tmp         =  Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
310        eBrg(iKs+1) = -Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
311        eBrg(iKs)   =  tmp
312c
313c.... check for convergence
314c
315        ntotGM = ntotGM + 1
316        echeck=abs(eBrg(iKs+1))
317        if (echeck .le. epsnrm.and. iKs .ge. minIters) exit
318c
319c.... end of GMRES iteration loop
320c
3211000    continue
322c
323c.... ------------------------->   Solution   <------------------------
324c
325c.... if converged or end of Krylov space
326c
327c.... solve for yBrg
328c
329        do jK = iKs, 1, -1
330          yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
331          do lK = 1, jK-1
332            eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
333          enddo
334        enddo
335c
336c.... update Dy
337c
338        do jK = 1, iKs
339          Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
340        enddo
341c
342c.... flop count
343c
344!        flops = flops + (3*iKs+1)*nflow*nshg
345c
346c.... check for convergence
347c
348        if (abs(eBrg(iKs+1)) .le. epsnrm) exit
349c
350c.... end of mGMRES loop
351c
3522000    continue
353c
354c.... ------------------------>   Converged   <------------------------
355c
356c.... if converged
357c
3583000    continue
359
360c
361c.... back precondition the results
362c
363        call i3LU (BDiag, Dy, 'backward')
364c
365c.... output the statistics
366c
367              call rstat (res, ilwork)
368c
369c ... reset ires to 3 again (asires changed ires to 2)
370c
371              ires = 3
372c
373c.... stop the timer
374c
3753002    continue  ! no solve just res.
376        call timer ('Back    ')
377c
378c.... end
379c
380        return
381        end
382