xref: /phasta/phSolver/compressible/solgmr.f (revision d06028c1ed09954bf598b0145fe1ac8aecabad11)
1        subroutine SolGMRe (y,         ac,        yold,      acold,
2     &			   x,         iBC,       BC,        EGmass,
3     &                     res,       BDiag,     HBrg,      eBrg,
4     &                     yBrg,      Rcos,      Rsin,      iper,
5     &                     ilwork,    shp,       shgl,      shpb,
6     &                     shglb,     Dy, rerr)
7c
8c----------------------------------------------------------------------
9c
10c  This is the preconditioned GMRES driver routine.
11c
12c input:
13c  y      (nshg,ndof)           : Y-variables at n+alpha_v
14c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
15c  yold   (nshg,ndof)           : Y-variables at beginning of step
16c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
17c  x      (numnp,nsd)            : node coordinates
18c  iBC    (nshg)                : BC codes
19c  BC     (nshg,ndofBC)         : BC constraint parameters
20c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
21c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
22c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
23c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
24c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
25c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
26c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
27c
28c output:
29c  res    (nshg,nflow)           : preconditioned residual
30c  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
31c
32c
33c Zdenek Johan,  Winter 1991.  (Fortran 90)
34c----------------------------------------------------------------------
35c
36        use pointer_data
37
38        include "common.h"
39        include "mpif.h"
40        include "auxmpi.h"
41c
42        dimension y(nshg,ndof),             ac(nshg,ndof),
43     &            yold(nshg,ndof),          acold(nshg,ndof),
44     &            x(numnp,nsd),
45     &            iBC(nshg),                BC(nshg,ndofBC),
46     &            res(nshg,nflow),
47     &            BDiag(nshg,nflow,nflow),
48     &            HBrg(Kspace+1,*),         eBrg(*),
49     &            yBrg(*),                  Rcos(*),
50     &            Rsin(*),                  ilwork(nlwork),
51     &            iper(nshg),               EGmass(numel,nedof,nedof)!,
52ctoomuchmem     &            Binv(numel,nedof,nedof)
53c
54        dimension Dy(nshg,nflow),            rmes(nshg,nflow),
55     &            temp(nshg,nflow),
56     &            uBrg(nshg,nflow,Kspace+1)
57c
58        dimension shp(MAXTOP,maxsh,MAXQPT),
59     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
60     &            shpb(MAXTOP,maxsh,MAXQPT),
61     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
62      real*8    rerr(nshg,10)
63c
64c.... *******************>> Element Data Formation <<******************
65c
66c
67c.... set the parameters for flux and surface tension calculations
68c
69c
70        idflx = 0
71        if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
72        if (isurf == 1) idflx=idflx + nsd
73c
74c.... form the LHS matrices, the residual vector, and the block
75c     diagonal preconditioner
76c
77        call ElmGMRe(y,             ac,            x,
78     &               shp,           shgl,          iBC,
79     &               BC,            shpb,
80     &               shglb,         res,
81     &               rmes,          BDiag,         iper,
82     &               ilwork,        EGmass,        rerr )
83      rmes=res  ! saving the b vector (residual)
84c
85c.... **********************>>    EBE - GMRES    <<********************
86c
87        call timer ('Solver  ')
88c
89c.... ------------------------> Initialization <-----------------------
90c
91c
92c.... LU decompose the block diagonals
93c
94        if (iprec .ne. 0)
95     &  call i3LU (BDiag, res,  'LU_Fact ')
96
97c
98c.... block diagonal precondition residual
99c
100        call i3LU (BDiag, res,  'forward ')
101c
102c.... initialize Dy
103c
104        Dy = zero
105c
106c.... Pre-precondition the LHS mass matrix and set up the element
107c     by element preconditioners
108c
109ctoomuchmemory note that Binv is demoted from huge array to just one
110c        real*8 in i3pre because it takes too much memory
111
112        call i3pre (BDiag,    Binv,   EGmass,  ilwork)
113c
114c.... left EBE precondition the residual
115c
116ctoomuchmem        call i3Pcond (Binv,  res,  ilwork,   'L_Pcond ')
117c
118c.... copy res in uBrg(1)
119c
120        uBrg(:,:,1) = res
121c
122c.... calculate norm of residual
123c
124        temp  = res**2
125
126        call sumgat (temp, nflow, summed, ilwork)
127        unorm = sqrt(summed)
128c
129c.... check if GMRES iterations are required
130c
131        iKs    = 0
132        lGMRES = 0
133c
134c.... if we are down to machine precision, don't bother solving
135c
136        if (unorm .lt. 100.*epsM**2) goto 3000
137c
138c.... set up tolerance of the Hessenberg's problem
139c
140        epsnrm = etol * unorm
141c
142c.... ------------------------>  GMRES Loop  <-------------------------
143c
144c.... loop through GMRES cycles
145c
146        do 2000 mGMRES = 1, nGMRES
147        lGMRES = mGMRES - 1
148c
149        if (lGMRES .gt. 0) then
150c
151c.... if GMRES restarts are necessary, calculate  R - A x
152c
153c
154c.... right precondition Dy
155c
156           temp = Dy
157
158ctoomuchmem           call i3Pcond (Binv,  temp,  ilwork,  'R_Pcond ')
159c
160c.... perform the A x product
161c
162           call Au1GMR (EGmass,  temp,  ilwork, iBC,iper)
163c
164c.... periodic nodes have to assemble results to their partners
165c
166           call bc3per (iBC,  temp,  iper, ilwork, nflow)
167c
168c.... left preconditioning
169c
170ctoomuchmem           call i3Pcond (Binv,  temp,  ilwork,  'L_Pcond ')
171c
172c.... subtract A x from residual and calculate the norm
173c
174           temp = res - temp
175           uBrg(:,:,1) = temp
176c
177c.... calculate the norm
178c
179           temp  = temp**2
180           call sumgat (temp, nflow, summed, ilwork)
181           unorm = sqrt(summed)
182c
183c.... flop count
184c
185      !      flops = flops + nflow*nshg+nshg
186c
187        endif
188c
189c.... set up RHS of the Hessenberg's problem
190c
191        call clear (eBrg, Kspace+1)
192        eBrg(1) = unorm
193c
194c.... normalize the first Krylov vector
195c
196        uBrg(:,:,1) = uBrg(:,:,1) / unorm
197c
198c.... loop through GMRES iterations
199c
200        do 1000 iK = 1, Kspace
201           iKs = iK
202
203           uBrg(:,:,iKs+1) = uBrg(:,:,iKs)
204c
205c.... right EBE precondition the LHS ( u_{i+1} <-- inverse(U) u_i )
206c
207ctoomuchmem           call i3Pcond (Binv,  uBrg(:,:,iKs+1), ilwork,  'R_Pcond ')
208c
209c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
210c
211           call Au1GMR ( EGmass, uBrg(:,:,iKs+1),  ilwork, iBC,iper)
212c
213c.... periodic nodes have to assemble results to their partners
214c
215           call bc3per (iBC,  uBrg(:,:,iKs+1),  iper, ilwork, nflow)
216
217c
218c.... left EBE precondition the LHS ( u_{i+1} <-- inverse(L) u_{i+1} )
219c
220ctoomuchmem           call i3Pcond (Binv,  uBrg(:,:,iKs+1), ilwork, 'L_Pcond ')
221c
222c.... orthogonalize and get the norm
223c
224          do jK = 1, iKs+1
225c
226            if (jK .eq. 1) then
227c
228              temp = uBrg(:,:,iKs+1) * uBrg(:,:,1)  ! {u_{i+1}*u_1} vector
229              call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
230c
231            else
232c
233c project off jK-1 vector
234c
235              uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1)
236c
237              temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
238              call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
239c
240            endif
241c
242            HBrg(jK,iKs) = beta   ! put this in the Hessenberg Matrix
243c
244        enddo
245c
246   !      flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp
247c
248c  the last inner product was with what was left of the vector (after
249c  projecting off all of the previous vectors
250c
251        unorm           = sqrt(beta)
252        HBrg(iKs+1,iKs) = unorm   ! this fills the 1 sub diagonal band
253c
254c.... normalize the Krylov vector
255c
256        uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm  ! normalize the next Krylov
257c vector
258c
259c.... construct and reduce the Hessenberg Matrix
260c  since there is only one subdiagonal we can use a Givens rotation to
261c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
262c  allows us to check progress of solution and quit when satisfied.  Note
263c  that all future K vects will put a subdiagonal in the next column so
264c  there is no penalty to work ahead as  the rotation for the next vector
265c  will be unaffected by this rotation.
266
267c
268c     H Y = E ========>   R_i H Y = R_i E
269c
270           do jK = 1, iKs-1
271              tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
272     &                          Rsin(jK) * HBrg(jK+1,iKs)
273              HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
274     &                          Rcos(jK) * HBrg(jK+1,iKs)
275              HBrg(jK,  iKs) =  tmp
276           enddo
277c
278           tmp             = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
279           Rcos(iKs)       = HBrg(iKs,  iKs) / tmp
280           Rsin(iKs)       = HBrg(iKs+1,iKs) / tmp
281           HBrg(iKs,  iKs) = tmp
282           HBrg(iKs+1,iKs) = zero
283c
284c.... rotate eBrg    R_i E
285c
286           tmp         = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
287           eBrg(iKs+1) =-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
288           eBrg(iKs)   = tmp
289c
290c.... check for convergence
291c
292           ntotGM = ntotGM + 1
293           echeck=abs(eBrg(iKs+1))
294           if (echeck .le. epsnrm) exit
295c
296c.... end of GMRES iteration loop
297c
298 1000   continue
299c
300c.... ------------------------->   Solution   <------------------------
301c
302c.... if converged or end of Krylov space
303c
304c.... solve for yBrg
305c
306        do jK = iKs, 1, -1
307           yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
308           do lK = 1, jK-1
309              eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
310           enddo
311        enddo
312c
313c.... update Dy
314c
315        do jK = 1, iKs
316           Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
317        enddo
318c
319c.... flop count
320c
321   !      flops = flops + (3*iKs+1)*nflow*nshg
322c
323c.... check for convergence
324c
325
326        echeck=abs(eBrg(iKs+1))
327        if (echeck .le. epsnrm) exit
328        if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction',
329     &  (one-echeck/unorm)/(one-etol)*100
330c
331c.... end of mGMRES loop
332c
333 2000 continue
334c
335c.... ------------------------>   Converged   <------------------------
336c
337c.... if converged
338c
339 3000 continue
340c
341c.... back EBE precondition the results
342c
343ctoomuchmem      call i3Pcond (Binv,  Dy, ilwork, 'R_Pcond ')
344c
345c.... back block-diagonal precondition the results
346c
347      call i3LU (BDiag, Dy, 'backward')
348c
349c
350c.... output the statistics
351c
352              call rstat (res, ilwork,rmes)
353c
354c.... stop the timer
355c
356 3002 continue                  ! no solve just res.
357      call timer ('Back    ')
358c
359c.... end
360c
361      return
362      end
363
364
365
366
367
368      subroutine SolGMRs(y,         ac,        yold,      acold,
369     &			 x,         iBC,       BC,
370     &                   col,       row,       lhsk,
371     &                   res,       BDiag,     HBrg,      eBrg,
372     &                   yBrg,      Rcos,      Rsin,      iper,
373     &                   ilwork,    shp,       shgl,      shpb,
374     &                   shglb,     Dy,        rerr)
375c
376c----------------------------------------------------------------------
377c
378c  This is the preconditioned GMRES driver routine.
379c
380c input:
381c  y      (nshg,ndof)           : Y-variables at n+alpha_v
382c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
383c  yold   (nshg,ndof)           : Y-variables at beginning of step
384c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
385c  x      (numnp,nsd)            : node coordinates
386c  iBC    (nshg)                : BC codes
387c  BC     (nshg,ndofBC)         : BC constraint parameters
388c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
389c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
390c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
391c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
392c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
393c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
394c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
395c
396c output:
397c  res    (nshg,nflow)           : preconditioned residual
398c  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
399c
400c
401c Zdenek Johan,  Winter 1991.  (Fortran 90)
402c----------------------------------------------------------------------
403c
404      use pointer_data
405
406      include "common.h"
407      include "mpif.h"
408      include "auxmpi.h"
409c
410      integer col(nshg+1), row(nnz*nshg)
411      real*8 lhsK(nflow*nflow,nnz_tot)
412
413
414      dimension y(nshg,ndof),             ac(nshg,ndof),
415     &          yold(nshg,ndof),          acold(nshg,ndof),
416     &          x(numnp,nsd),
417     &          iBC(nshg),                BC(nshg,ndofBC),
418     &          res(nshg,nflow),
419     &          BDiag(nshg,nflow,nflow),
420     &          HBrg(Kspace+1,Kspace),    eBrg(Kspace+1),
421     &          yBrg(Kspace),             Rcos(Kspace),
422     &          Rsin(Kspace),             ilwork(nlwork),
423     &          iper(nshg)
424c
425      dimension Dy(nshg,nflow),            rmes(nshg,nflow),
426     &          temp(nshg,nflow),
427     &          uBrg(nshg,nflow,Kspace+1)
428c
429      dimension shp(MAXTOP,maxsh,MAXQPT),
430     &          shgl(MAXTOP,nsd,maxsh,MAXQPT),
431     &          shpb(MAXTOP,maxsh,MAXQPT),
432     &          shglb(MAXTOP,nsd,maxsh,MAXQPT)
433      real*8    rerr(nshg,10)
434c
435c
436c.... *******************>> Element Data Formation <<******************
437c
438c
439c.... set the parameters for flux and surface tension calculations
440c
441c
442      idflx = 0
443      if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
444      if (isurf == 1) idflx=idflx + nsd
445c
446c.... form the LHS matrices, the residual vector, and the block
447c     diagonal preconditioner
448c
449      call ElmGMRs(y,             ac,            x,
450     &             shp,           shgl,          iBC,
451     &             BC,            shpb,
452     &             shglb,         res,
453     &             rmes,          BDiag,         iper,
454     &             ilwork,        lhsK,          col,
455     &             row,           rerr )
456      rmes=res  ! saving the b vector (residual)
457c
458
459	call tnanq(res,5, 'res_egmr')
460	call tnanq(BDiag,25, 'bdg_egmr')
461c
462c.... **********************>>    EBE - GMRES    <<********************
463c
464      call timer ('Solver  ')
465c
466c.... ------------------------> Initialization <-----------------------
467c
468c
469c.... LU decompose the block diagonals
470c
471      if (iprec .ne. 0) then
472         call i3LU (BDiag, res,  'LU_Fact ')
473         if (numpe > 1) then
474            call commu (BDiag  , ilwork, nflow*nflow  , 'out')
475         endif
476      endif
477c
478c.... block diagonal precondition residual
479c
480      call i3LU (BDiag, res,  'forward ')
481!  from this point forward b is btilde (Preconditioned residual)
482c
483c Check the residual for divering trend
484c
485	call rstatCheck(res,ilwork,y,ac)
486c
487c.... initialize Dy
488c
489      Dy = zero
490c
491c.... Pre-precondition the LHS mass matrix and set up the sparse
492c     preconditioners
493c
494
495      if(lhs.eq.1) call Spsi3pre (BDiag,    lhsK,  col, row)
496c
497c.... copy res in uBrg(1)
498c
499      uBrg(:,:,1) = res
500c
501c.... calculate norm of residual
502c
503      temp  = res**2
504
505      call sumgat (temp, nflow, summed, ilwork)
506      unorm = sqrt(summed)
507c
508c.... check if GMRES iterations are required
509c
510      iKs    = 0
511      lGMRESs = 0
512c
513c.... if we are down to machine precision, don't bother solving
514c
515      if (unorm .lt. 100.*epsM**2) goto 3000
516c
517c.... set up tolerance of the Hessenberg's problem
518c
519      epsnrm = etol * unorm
520c
521c.... ------------------------>  GMRES Loop  <-------------------------
522c
523c.... loop through GMRES cycles
524c
525      do 2000 mGMRES = 1, nGMRES
526         lGMRESs = mGMRES - 1
527c
528         if (lGMRES .gt. 0) then
529c
530c.... if GMRES restarts are necessary, calculate  R - A x
531c
532c
533c.... right precondition Dy
534c
535            temp = Dy
536
537c
538c.... perform the A x product
539c
540            call SparseAp (iper,ilwork,iBC, col, row, lhsK,  temp)
541c           call tnanq(temp,5, 'q_spAPrs')
542
543c
544c.... periodic nodes have to assemble results to their partners
545c
546            call bc3per (iBC,  temp,  iper, ilwork, nflow)
547c           call tnanq(temp,5, 'q_BCprs')
548c
549c.... subtract A x from residual and calculate the norm
550c
551            temp = res - temp
552            uBrg(:,:,1) = temp
553c
554c.... calculate the norm
555c
556            temp  = temp**2
557            call sumgat (temp, nflow, summed, ilwork)
558            unorm = sqrt(summed)
559c
560c.... flop count
561c
562       !      flops = flops + nflow*nshg+nshg
563c
564         endif
565c
566c.... set up RHS of the Hessenberg's problem
567c
568         call clear (eBrg, Kspace+1)
569         eBrg(1) = unorm
570c
571c.... normalize the first Krylov vector
572c
573         uBrg(:,:,1) = uBrg(:,:,1) / unorm
574c
575c.... loop through GMRES iterations
576c
577         do 1000 iK = 1, Kspace
578            iKs = iK
579
580            uBrg(:,:,iKs+1) = uBrg(:,:,iKs)
581c
582c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
583c
584            call SparseAp (iper, ilwork, iBC,
585     &                     col,  row,    lhsK,
586     &                     uBrg(:,:,iKs+1) )
587c           call tnanq(uBrg(:,:,iKS+1),5, 'q_spAP')
588
589c
590c.... periodic nodes have to assemble results to their partners
591c
592            call bc3per (iBC,  uBrg(:,:,iKs+1),  iper, ilwork, nflow)
593c           call tnanq(uBrg(:,:,iKS+1),5, 'q_bc')
594
595c
596c.... orthogonalize and get the norm
597c
598            do jK = 1, iKs+1
599c
600               if (jK .eq. 1) then
601c
602                  temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector
603                  call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
604c
605               else
606c
607c project off jK-1 vector
608c
609                  uBrg(:,:,iKs+1)=uBrg(:,:,iKs+1)-beta * uBrg(:,:,jK-1)
610c
611                  temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
612                  call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
613c
614               endif
615c
616               HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix
617c
618            enddo
619c
620       !      flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp
621c
622c  the last inner product was with what was left of the vector (after
623c  projecting off all of the previous vectors
624c
625        if(beta.le.0) write(*,*) 'beta in solgmr non-positive'
626            unorm           = sqrt(beta)
627            HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band
628c
629c.... normalize the Krylov vector
630c
631            uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov
632c vector
633c
634c.... construct and reduce the Hessenberg Matrix
635c  since there is only one subdiagonal we can use a Givens rotation to
636c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
637c  allows us to check progress of solution and quit when satisfied.  Note
638c  that all future K vects will put a subdiagonal in the next column so
639c  there is no penalty to work ahead as  the rotation for the next vector
640c  will be unaffected by this rotation.
641
642c
643c     H Y = E ========>   R_i H Y = R_i E
644c
645            do jK = 1, iKs-1
646               tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
647     &                           Rsin(jK) * HBrg(jK+1,iKs)
648               HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
649     &                           Rcos(jK) * HBrg(jK+1,iKs)
650               HBrg(jK,  iKs) =  tmp
651            enddo
652c
653            tmp            = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
654            Rcos(iKs)      = HBrg(iKs,  iKs) / tmp
655            Rsin(iKs)      = HBrg(iKs+1,iKs) / tmp
656            HBrg(iKs,  iKs)= tmp
657            HBrg(iKs+1,iKs)= zero
658c
659c.... rotate eBrg    R_i E
660c
661            tmp        = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
662            eBrg(iKs+1)=-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
663            eBrg(iKs)  = tmp
664c
665c.... check for convergence
666c
667            ntotGM = ntotGM + 1
668            echeck=abs(eBrg(iKs+1))
669            if (echeck .le. epsnrm.and. iKs .ge. minIters) exit
670c
671c.... end of GMRES iteration loop
672c
673 1000    continue
674c
675c.... ------------------------->   Solution   <------------------------
676c
677c.... if converged or end of Krylov space
678c
679c.... solve for yBrg
680c
681         do jK = iKs, 1, -1
682            yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
683            do lK = 1, jK-1
684               eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
685            enddo
686         enddo
687c
688c.... update Dy
689c
690         do jK = 1, iKs
691            Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
692         enddo
693c
694c.... flop count
695c
696    !      flops = flops + (3*iKs+1)*nflow*nshg
697c
698c.... check for convergence
699c
700        echeck=abs(eBrg(iKs+1))
701        if (echeck .le. epsnrm) exit
702!        if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction',
703!     &  (one-echeck*etol/epsnrm)/(one-etol)*100
704
705c
706c.... end of mGMRES loop
707c
708 2000 continue
709c
710c.... ------------------------>   Converged   <------------------------
711c
712c.... if converged
713c
714 3000 continue
715
716c
717c
718c.... back block-diagonal precondition the results
719c
720      call i3LU (BDiag, Dy, 'backward')
721c
722c
723c.... output the statistics
724c
725      call rstat (res, ilwork,rmes)
726
727      if(myrank.eq.master) then
728        if (echeck .le. epsnrm) then
729            write(*,*)
730        else
731            write(*,*)'solver tolerance %satisfaction',
732     &  (one-echeck*etol/epsnrm)/(one-etol)*100
733        endif
734      endif
735c
736c.... stop the timer
737c
738 3002 continue                  ! no solve just res.
739      call timer ('Back    ')
740c
741c.... end
742c
743      return
744      end
745
746        subroutine SolGMRSclr(y,       ac,      yold,
747     &                        acold,   EGmasst,
748     &                        x,       elDw,
749     &                        iBC,      BC,
750     &                        rest,     HBrg,     eBrg,
751     &                        yBrg,     Rcos,     Rsin,    iper,
752     &                        ilwork,
753     &                        shp,      shgl,
754     &                        shpb,     shglb,  Dyt)
755c
756c----------------------------------------------------------------------
757c
758c  This is the preconditioned GMRES driver routine.
759c
760c input:
761c  y      (nshg,ndof)           : Y-variables at n+alpha_f
762c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
763c  yold   (nshg,ndof)           : Y-variables at beginning of step
764c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
765c  x      (numnp,nsd)            : node coordinates
766c  iBC    (nshg)                : BC codes
767c  BC     (nshg,ndofBC)         : BC constraint parameters
768c  iper   (nshg)                : periodic nodal information
769c
770c output:
771c  y      (nshg,ndof)           : Y-variables at n+alpha_f
772c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
773c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
774c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
775c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
776c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
777c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
778c output:
779c  rest   (numnp)           : preconditioned residual
780c
781c
782c Zdenek Johan,  Winter 1991.  (Fortran 90)
783c----------------------------------------------------------------------
784c
785        use pointer_data
786
787        include "common.h"
788        include "mpif.h"
789        include "auxmpi.h"
790c
791        dimension y(nshg,ndof),      ac(nshg,ndof),
792     &            yold(nshg,ndof),   acold(nshg,ndof),
793     &            x(numnp,nsd),
794     &            iBC(nshg),         BC(nshg,ndofBC),
795     &            rest(nshg),
796     &            Diag(nshg),
797     &            HBrg(Kspace+1,*),  eBrg(*),
798     &            yBrg(*),           Rcos(*),
799     &            Rsin(*),           ilwork(nlwork),
800     &            iper(nshg),        EGmasst(numel,nshape,nshape)
801c
802        dimension Dyt(nshg),         rmest(nshg),
803     &            tempt(nshg),       Dinv(nshg),
804     &            uBrgt(nshg,Kspace+1)
805c
806        dimension shp(MAXTOP,maxsh,MAXQPT),
807     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
808     &            shpb(MAXTOP,maxsh,MAXQPT),
809     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
810        real*8    elDw(numel)
811c
812c.... *******************>> Element Data Formation <<******************
813c
814c
815c.... form the LHS matrices, the residual vector, and the block
816c     diagonal preconditioner
817c
818        call ElmGMRSclr(y,             ac,
819     &                  x,             elDw,      shp,        shgl,
820     &                  iBC,           BC,
821     &                  shpb,          shglb,
822     &                  rest,
823     &                  rmest,         Diag,       iper,
824     &                  ilwork,        EGmasst)
825c
826c.... **********************>>    EBE - GMRES    <<********************
827c
828        call timer ('Solver  ')
829c
830c.... ------------------------> Initialization <-----------------------
831c
832c
833      id = isclr+5
834c.... initialize Dy
835c
836        Dyt = zero
837c
838c.... Right preconditioner
839c
840        call i3preSclr(Diag, Dinv, EGmassT, ilwork)
841c
842c Check the residual for divering trend
843c
844
845        call rstatCheckSclr(rest,ilwork,y,ac)
846
847c
848c.... copy rest in uBrgt(1)
849c
850        uBrgt(:,1) = rest
851c
852c.... calculate norm of residual
853c
854        tempt  = rest**2
855
856        call sumgat (tempt, 1, summed, ilwork)
857        unorm = sqrt(summed)
858c
859c.... check if GMRES iterations are required
860c
861        iKss    = 0
862        lGMRESt = 0
863c
864c.... if we are down to machine precision, don't bother solving
865c
866        if (unorm .lt. 100.*epsM**2) goto 3000
867c
868c.... set up tolerance of the Hessenberg's problem
869c
870        epsnrm = etol * unorm
871c
872c.... ------------------------>  GMRES Loop  <-------------------------
873c
874c.... loop through GMRES cycles
875c
876        do 2000 mGMRES = 1, nGMRES
877        lGMRESt = mGMRES - 1
878c
879        if (lGMRESt .gt. 0) then
880c
881c.... if GMRES restarts are necessary, calculate  R - A x
882c
883c
884
885c.... perform the A x product
886c
887           call Au1GMRSclr (EGmasst,  tempt,  ilwork, iper)
888c
889c.... periodic nodes have to assemble results to their partners
890c
891c          call bc3perSclr (iBC,  tempt,  iper)
892c
893c.... subtract A x from residual and calculate the norm
894c
895           tempt = rest - tempt
896           uBrgt(:,1) = tempt
897c
898c.... calculate the norm
899c
900           tempt  = tempt**2
901           call sumgat (tempt, 1, summed, ilwork)
902           unorm = sqrt(summed)
903c
904c.... flop count
905c
906      !      flops = flops + ndof*numnp+numnp
907c
908        endif
909c
910c.... set up RHS of the Hessenberg's problem
911c
912        call clear (eBrg, Kspace+1)
913        call clear (HBrg, Kspace+1)
914        eBrg(1) = unorm
915c
916c.... normalize the first Krylov vector
917c
918        uBrgt(:,1) = uBrgt(:,1) / unorm
919c
920c.... loop through GMRES iterations
921c
922        do 1000 iK = 1, Kspace
923           iKss = iK
924
925           uBrgt(:,iKss+1) = uBrgt(:,iKss)
926
927c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
928c
929           call Au1GMRSclr ( EGmasst, uBrgt(:,iKss+1),  ilwork, iper )
930
931c
932c.... periodic nodes have to assemble results to their partners
933c
934           call bc3perSclr (iBC,  uBrgt(:,iKss+1),  iper)
935c
936c.... orthogonalize and get the norm
937c
938          do jK = 1, iKss+1
939c
940            if (jK .eq. 1) then
941c
942              tempt = uBrgt(:,iKss+1) * uBrgt(:,1)  ! {u_{i+1}*u_1} vector
943              call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_1)
944c
945            else
946c
947c project off jK-1 vector
948c
949          uBrgt(:,iKss+1) = uBrgt(:,iKss+1) - beta * uBrgt(:,jK-1)
950c
951              tempt = uBrgt(:,iKss+1) * uBrgt(:,jK) !{u_{i+1}*u_j} vector
952              call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_j)
953c
954            endif
955c
956            HBrg(jK,iKss) = beta   ! put this in the Hessenberg Matrix
957c
958        enddo
959c
960   !      flops = flops + (3*iKss+1)*ndof*numnp+(iKss+1)*numnp
961c
962c  the last inner product was with what was left of the vector (after
963c  projecting off all of the previous vectors
964c
965        unorm           = sqrt(beta)
966        HBrg(iKss+1,iKss) = unorm   ! this fills the 1 sub diagonal band
967c
968c.... normalize the Krylov vector
969c
970        uBrgt(:,iKss+1) = uBrgt(:,iKss+1) / unorm  ! normalize the next Krylov
971c vector
972c
973c.... construct and reduce the Hessenberg Matrix
974c  since there is only one subdiagonal we can use a Givens rotation to
975c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
976c  allows us to check progress of solution and quit when satisfied.  Note
977c  that all future K vects will put a subdiagonal in the next column so
978c  there is no penalty to work ahead as  the rotation for the next vector
979c  will be unaffected by this rotation.
980
981c
982c     H Y = E ========>   R_i H Y = R_i E
983c
984           do jK = 1, iKss-1
985              tmp            =  Rcos(jK) * HBrg(jK,  iKss) +
986     &                          Rsin(jK) * HBrg(jK+1,iKss)
987              HBrg(jK+1,iKss) = -Rsin(jK) * HBrg(jK,  iKss) +
988     &                          Rcos(jK) * HBrg(jK+1,iKss)
989              HBrg(jK,  iKss) =  tmp
990           enddo
991c
992           tmp        = sqrt(HBrg(iKss,iKss)**2 + HBrg(iKss+1,iKss)**2)
993           Rcos(iKss) = HBrg(iKss,  iKss) / tmp
994           Rsin(iKss) = HBrg(iKss+1,iKss) / tmp
995           HBrg(iKss,  iKss) = tmp
996           HBrg(iKss+1,iKss) = zero
997c
998c.... rotate eBrg    R_i E
999c
1000           tmp         = Rcos(iKss)*eBrg(iKss) + Rsin(iKss)*eBrg(iKss+1)
1001           eBrg(iKss+1)=-Rsin(iKss)*eBrg(iKss) + Rcos(iKss)*eBrg(iKss+1)
1002           eBrg(iKss)  = tmp
1003c
1004c.... check for convergence
1005c
1006           ercheck=eBrg(iKss+1)
1007           ntotGMs = ntotGMs + 1
1008           if (abs(eBrg(iKss+1)) .le. epsnrm) exit
1009c
1010c.... end of GMRES iteration loop
1011c
1012 1000   continue
1013c
1014c.... ------------------------->   Solution   <------------------------
1015c
1016c.... if converged or end of Krylov space
1017c
1018c.... solve for yBrg
1019c
1020        do jK = iKss, 1, -1
1021           yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
1022           do lK = 1, jK-1
1023              eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
1024           enddo
1025        enddo
1026c
1027c.... update Dy
1028c
1029        do jK = 1, iKss
1030           Dyt = Dyt + yBrg(jK) * uBrgt(:,jK)
1031        enddo
1032c
1033c.... flop count
1034c
1035   !      flops = flops + (3*iKss+1)*ndof*numnp
1036c
1037c.... check for convergence
1038c
1039        if (abs(eBrg(iKss+1)) .le. epsnrm) exit
1040c
1041c.... end of mGMRES loop
1042c
1043 2000 continue
1044c
1045c.... ------------------------>   Converged   <------------------------
1046c
1047c.... if converged
1048c
1049 3000 continue
1050c
1051c.... back precondition the result
1052c
1053      Dyt(:) = Dyt(:) * Dinv(:)
1054c
1055c.... output the statistics
1056c
1057      call rstatSclr(rest, ilwork)
1058c.... stop the timer
1059c
1060 3002 continue                  ! no solve just res.
1061      call timer ('Back    ')
1062c
1063c.... end
1064c
1065      return
1066      end
1067
1068
1069