1 subroutine SolGMRe (y, ac, yold, acold, 2 & x, iBC, BC, EGmass, 3 & res, BDiag, HBrg, eBrg, 4 & yBrg, Rcos, Rsin, iper, 5 & ilwork, shp, shgl, shpb, 6 & shglb, Dy, rerr) 7c 8c---------------------------------------------------------------------- 9c 10c This is the preconditioned GMRES driver routine. 11c 12c input: 13c y (nshg,ndof) : Y-variables at n+alpha_v 14c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 15c yold (nshg,ndof) : Y-variables at beginning of step 16c acold (nshg,ndof) : Primvar. accel. variable at begng step 17c x (numnp,nsd) : node coordinates 18c iBC (nshg) : BC codes 19c BC (nshg,ndofBC) : BC constraint parameters 20c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 21c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 22c yBrg (Kspace) : solution of Hessenberg minim. problem 23c Rcos (Kspace) : Rotational cosine of QR algorithm 24c Rsin (Kspace) : Rotational sine of QR algorithm 25c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 26c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 27c 28c output: 29c res (nshg,nflow) : preconditioned residual 30c BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 31c 32c 33c Zdenek Johan, Winter 1991. (Fortran 90) 34c---------------------------------------------------------------------- 35c 36 use pointer_data 37 38 include "common.h" 39 include "mpif.h" 40 include "auxmpi.h" 41c 42 dimension y(nshg,ndof), ac(nshg,ndof), 43 & yold(nshg,ndof), acold(nshg,ndof), 44 & x(numnp,nsd), 45 & iBC(nshg), BC(nshg,ndofBC), 46 & res(nshg,nflow), 47 & BDiag(nshg,nflow,nflow), 48 & HBrg(Kspace+1,*), eBrg(*), 49 & yBrg(*), Rcos(*), 50 & Rsin(*), ilwork(nlwork), 51 & iper(nshg), EGmass(numel,nedof,nedof)!, 52ctoomuchmem & Binv(numel,nedof,nedof) 53c 54 dimension Dy(nshg,nflow), rmes(nshg,nflow), 55 & temp(nshg,nflow), 56 & uBrg(nshg,nflow,Kspace+1) 57c 58 dimension shp(MAXTOP,maxsh,MAXQPT), 59 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 60 & shpb(MAXTOP,maxsh,MAXQPT), 61 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 62 real*8 rerr(nshg,10) 63c 64c.... *******************>> Element Data Formation <<****************** 65c 66c 67c.... set the parameters for flux and surface tension calculations 68c 69c 70 idflx = 0 71 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 72 if (isurf == 1) idflx=idflx + nsd 73c 74c.... form the LHS matrices, the residual vector, and the block 75c diagonal preconditioner 76c 77 call ElmGMRe(y, ac, x, 78 & shp, shgl, iBC, 79 & BC, shpb, 80 & shglb, res, 81 & rmes, BDiag, iper, 82 & ilwork, EGmass, rerr ) 83 rmes=res ! saving the b vector (residual) 84c 85c.... **********************>> EBE - GMRES <<******************** 86c 87 call timer ('Solver ') 88c 89c.... ------------------------> Initialization <----------------------- 90c 91c 92c.... LU decompose the block diagonals 93c 94 if (iprec .ne. 0) 95 & call i3LU (BDiag, res, 'LU_Fact ') 96 97c 98c.... block diagonal precondition residual 99c 100 call i3LU (BDiag, res, 'forward ') 101c 102c.... initialize Dy 103c 104 Dy = zero 105c 106c.... Pre-precondition the LHS mass matrix and set up the element 107c by element preconditioners 108c 109ctoomuchmemory note that Binv is demoted from huge array to just one 110c real*8 in i3pre because it takes too much memory 111 112 call i3pre (BDiag, Binv, EGmass, ilwork) 113c 114c.... left EBE precondition the residual 115c 116ctoomuchmem call i3Pcond (Binv, res, ilwork, 'L_Pcond ') 117c 118c.... copy res in uBrg(1) 119c 120 uBrg(:,:,1) = res 121c 122c.... calculate norm of residual 123c 124 temp = res**2 125 126 call sumgat (temp, nflow, summed, ilwork) 127 unorm = sqrt(summed) 128c 129c.... check if GMRES iterations are required 130c 131 iKs = 0 132 lGMRES = 0 133c 134c.... if we are down to machine precision, don't bother solving 135c 136 if (unorm .lt. 100.*epsM**2) goto 3000 137c 138c.... set up tolerance of the Hessenberg's problem 139c 140 epsnrm = etol * unorm 141c 142c.... ------------------------> GMRES Loop <------------------------- 143c 144c.... loop through GMRES cycles 145c 146 do 2000 mGMRES = 1, nGMRES 147 lGMRES = mGMRES - 1 148c 149 if (lGMRES .gt. 0) then 150c 151c.... if GMRES restarts are necessary, calculate R - A x 152c 153c 154c.... right precondition Dy 155c 156 temp = Dy 157 158ctoomuchmem call i3Pcond (Binv, temp, ilwork, 'R_Pcond ') 159c 160c.... perform the A x product 161c 162 call Au1GMR (EGmass, temp, ilwork, iBC,iper) 163c 164c.... periodic nodes have to assemble results to their partners 165c 166 call bc3per (iBC, temp, iper, ilwork, nflow) 167c 168c.... left preconditioning 169c 170ctoomuchmem call i3Pcond (Binv, temp, ilwork, 'L_Pcond ') 171c 172c.... subtract A x from residual and calculate the norm 173c 174 temp = res - temp 175 uBrg(:,:,1) = temp 176c 177c.... calculate the norm 178c 179 temp = temp**2 180 call sumgat (temp, nflow, summed, ilwork) 181 unorm = sqrt(summed) 182c 183c.... flop count 184c 185 ! flops = flops + nflow*nshg+nshg 186c 187 endif 188c 189c.... set up RHS of the Hessenberg's problem 190c 191 call clear (eBrg, Kspace+1) 192 eBrg(1) = unorm 193c 194c.... normalize the first Krylov vector 195c 196 uBrg(:,:,1) = uBrg(:,:,1) / unorm 197c 198c.... loop through GMRES iterations 199c 200 do 1000 iK = 1, Kspace 201 iKs = iK 202 203 uBrg(:,:,iKs+1) = uBrg(:,:,iKs) 204c 205c.... right EBE precondition the LHS ( u_{i+1} <-- inverse(U) u_i ) 206c 207ctoomuchmem call i3Pcond (Binv, uBrg(:,:,iKs+1), ilwork, 'R_Pcond ') 208c 209c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 210c 211 call Au1GMR ( EGmass, uBrg(:,:,iKs+1), ilwork, iBC,iper) 212c 213c.... periodic nodes have to assemble results to their partners 214c 215 call bc3per (iBC, uBrg(:,:,iKs+1), iper, ilwork, nflow) 216 217c 218c.... left EBE precondition the LHS ( u_{i+1} <-- inverse(L) u_{i+1} ) 219c 220ctoomuchmem call i3Pcond (Binv, uBrg(:,:,iKs+1), ilwork, 'L_Pcond ') 221c 222c.... orthogonalize and get the norm 223c 224 do jK = 1, iKs+1 225c 226 if (jK .eq. 1) then 227c 228 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 229 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 230c 231 else 232c 233c project off jK-1 vector 234c 235 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1) 236c 237 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 238 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 239c 240 endif 241c 242 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 243c 244 enddo 245c 246 ! flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp 247c 248c the last inner product was with what was left of the vector (after 249c projecting off all of the previous vectors 250c 251 unorm = sqrt(beta) 252 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 253c 254c.... normalize the Krylov vector 255c 256 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 257c vector 258c 259c.... construct and reduce the Hessenberg Matrix 260c since there is only one subdiagonal we can use a Givens rotation to 261c rotate off each subdiagonal AS IT IS FORMED. We do this because it 262c allows us to check progress of solution and quit when satisfied. Note 263c that all future K vects will put a subdiagonal in the next column so 264c there is no penalty to work ahead as the rotation for the next vector 265c will be unaffected by this rotation. 266 267c 268c H Y = E ========> R_i H Y = R_i E 269c 270 do jK = 1, iKs-1 271 tmp = Rcos(jK) * HBrg(jK, iKs) + 272 & Rsin(jK) * HBrg(jK+1,iKs) 273 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 274 & Rcos(jK) * HBrg(jK+1,iKs) 275 HBrg(jK, iKs) = tmp 276 enddo 277c 278 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 279 Rcos(iKs) = HBrg(iKs, iKs) / tmp 280 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 281 HBrg(iKs, iKs) = tmp 282 HBrg(iKs+1,iKs) = zero 283c 284c.... rotate eBrg R_i E 285c 286 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 287 eBrg(iKs+1) =-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 288 eBrg(iKs) = tmp 289c 290c.... check for convergence 291c 292 ntotGM = ntotGM + 1 293 echeck=abs(eBrg(iKs+1)) 294 if (echeck .le. epsnrm) exit 295c 296c.... end of GMRES iteration loop 297c 298 1000 continue 299c 300c.... -------------------------> Solution <------------------------ 301c 302c.... if converged or end of Krylov space 303c 304c.... solve for yBrg 305c 306 do jK = iKs, 1, -1 307 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 308 do lK = 1, jK-1 309 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 310 enddo 311 enddo 312c 313c.... update Dy 314c 315 do jK = 1, iKs 316 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 317 enddo 318c 319c.... flop count 320c 321 ! flops = flops + (3*iKs+1)*nflow*nshg 322c 323c.... check for convergence 324c 325 326 echeck=abs(eBrg(iKs+1)) 327 if (echeck .le. epsnrm) exit 328 if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction', 329 & (one-echeck/unorm)/(one-etol)*100 330c 331c.... end of mGMRES loop 332c 333 2000 continue 334c 335c.... ------------------------> Converged <------------------------ 336c 337c.... if converged 338c 339 3000 continue 340c 341c.... back EBE precondition the results 342c 343ctoomuchmem call i3Pcond (Binv, Dy, ilwork, 'R_Pcond ') 344c 345c.... back block-diagonal precondition the results 346c 347 call i3LU (BDiag, Dy, 'backward') 348c 349c 350c.... output the statistics 351c 352 call rstat (res, ilwork,rmes) 353c 354c.... stop the timer 355c 356 3002 continue ! no solve just res. 357 call timer ('Back ') 358c 359c.... end 360c 361 return 362 end 363 364 365 366 367 368 subroutine SolGMRs(y, ac, yold, acold, 369 & x, iBC, BC, 370 & col, row, lhsk, 371 & res, BDiag, HBrg, eBrg, 372 & yBrg, Rcos, Rsin, iper, 373 & ilwork, shp, shgl, shpb, 374 & shglb, Dy, rerr) 375c 376c---------------------------------------------------------------------- 377c 378c This is the preconditioned GMRES driver routine. 379c 380c input: 381c y (nshg,ndof) : Y-variables at n+alpha_v 382c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 383c yold (nshg,ndof) : Y-variables at beginning of step 384c acold (nshg,ndof) : Primvar. accel. variable at begng step 385c x (numnp,nsd) : node coordinates 386c iBC (nshg) : BC codes 387c BC (nshg,ndofBC) : BC constraint parameters 388c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 389c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 390c yBrg (Kspace) : solution of Hessenberg minim. problem 391c Rcos (Kspace) : Rotational cosine of QR algorithm 392c Rsin (Kspace) : Rotational sine of QR algorithm 393c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 394c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 395c 396c output: 397c res (nshg,nflow) : preconditioned residual 398c BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 399c 400c 401c Zdenek Johan, Winter 1991. (Fortran 90) 402c---------------------------------------------------------------------- 403c 404 use pointer_data 405 406 include "common.h" 407 include "mpif.h" 408 include "auxmpi.h" 409c 410 integer col(nshg+1), row(nnz*nshg) 411 real*8 lhsK(nflow*nflow,nnz_tot) 412 413 414 dimension y(nshg,ndof), ac(nshg,ndof), 415 & yold(nshg,ndof), acold(nshg,ndof), 416 & x(numnp,nsd), 417 & iBC(nshg), BC(nshg,ndofBC), 418 & res(nshg,nflow), 419 & BDiag(nshg,nflow,nflow), 420 & HBrg(Kspace+1,Kspace), eBrg(Kspace+1), 421 & yBrg(Kspace), Rcos(Kspace), 422 & Rsin(Kspace), ilwork(nlwork), 423 & iper(nshg) 424c 425 dimension Dy(nshg,nflow), rmes(nshg,nflow), 426 & temp(nshg,nflow), 427 & uBrg(nshg,nflow,Kspace+1) 428c 429 dimension shp(MAXTOP,maxsh,MAXQPT), 430 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 431 & shpb(MAXTOP,maxsh,MAXQPT), 432 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 433 real*8 rerr(nshg,10) 434c 435c 436c.... *******************>> Element Data Formation <<****************** 437c 438c 439c.... set the parameters for flux and surface tension calculations 440c 441c 442 idflx = 0 443 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 444 if (isurf == 1) idflx=idflx + nsd 445c 446c.... form the LHS matrices, the residual vector, and the block 447c diagonal preconditioner 448c 449 call ElmGMRs(y, ac, x, 450 & shp, shgl, iBC, 451 & BC, shpb, 452 & shglb, res, 453 & rmes, BDiag, iper, 454 & ilwork, lhsK, col, 455 & row, rerr ) 456 rmes=res ! saving the b vector (residual) 457c 458 459 call tnanq(res,5, 'res_egmr') 460 call tnanq(BDiag,25, 'bdg_egmr') 461c 462c.... **********************>> EBE - GMRES <<******************** 463c 464 call timer ('Solver ') 465c 466c.... ------------------------> Initialization <----------------------- 467c 468c 469c.... LU decompose the block diagonals 470c 471 if (iprec .ne. 0) then 472 call i3LU (BDiag, res, 'LU_Fact ') 473 if (numpe > 1) then 474 call commu (BDiag , ilwork, nflow*nflow , 'out') 475 endif 476 endif 477c 478c.... block diagonal precondition residual 479c 480 call i3LU (BDiag, res, 'forward ') 481! from this point forward b is btilde (Preconditioned residual) 482c 483c Check the residual for divering trend 484c 485 call rstatCheck(res,ilwork,y,ac) 486c 487c.... initialize Dy 488c 489 Dy = zero 490c 491c.... Pre-precondition the LHS mass matrix and set up the sparse 492c preconditioners 493c 494 495 if(lhs.eq.1) call Spsi3pre (BDiag, lhsK, col, row) 496c 497c.... copy res in uBrg(1) 498c 499 uBrg(:,:,1) = res 500c 501c.... calculate norm of residual 502c 503 temp = res**2 504 505 call sumgat (temp, nflow, summed, ilwork) 506 unorm = sqrt(summed) 507c 508c.... check if GMRES iterations are required 509c 510 iKs = 0 511 lGMRESs = 0 512c 513c.... if we are down to machine precision, don't bother solving 514c 515 if (unorm .lt. 100.*epsM**2) goto 3000 516c 517c.... set up tolerance of the Hessenberg's problem 518c 519 epsnrm = etol * unorm 520c 521c.... ------------------------> GMRES Loop <------------------------- 522c 523c.... loop through GMRES cycles 524c 525 do 2000 mGMRES = 1, nGMRES 526 lGMRESs = mGMRES - 1 527c 528 if (lGMRES .gt. 0) then 529c 530c.... if GMRES restarts are necessary, calculate R - A x 531c 532c 533c.... right precondition Dy 534c 535 temp = Dy 536 537c 538c.... perform the A x product 539c 540 call SparseAp (iper,ilwork,iBC, col, row, lhsK, temp) 541c call tnanq(temp,5, 'q_spAPrs') 542 543c 544c.... periodic nodes have to assemble results to their partners 545c 546 call bc3per (iBC, temp, iper, ilwork, nflow) 547c call tnanq(temp,5, 'q_BCprs') 548c 549c.... subtract A x from residual and calculate the norm 550c 551 temp = res - temp 552 uBrg(:,:,1) = temp 553c 554c.... calculate the norm 555c 556 temp = temp**2 557 call sumgat (temp, nflow, summed, ilwork) 558 unorm = sqrt(summed) 559c 560c.... flop count 561c 562 ! flops = flops + nflow*nshg+nshg 563c 564 endif 565c 566c.... set up RHS of the Hessenberg's problem 567c 568 call clear (eBrg, Kspace+1) 569 eBrg(1) = unorm 570c 571c.... normalize the first Krylov vector 572c 573 uBrg(:,:,1) = uBrg(:,:,1) / unorm 574c 575c.... loop through GMRES iterations 576c 577 do 1000 iK = 1, Kspace 578 iKs = iK 579 580 uBrg(:,:,iKs+1) = uBrg(:,:,iKs) 581c 582c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 583c 584 call SparseAp (iper, ilwork, iBC, 585 & col, row, lhsK, 586 & uBrg(:,:,iKs+1) ) 587c call tnanq(uBrg(:,:,iKS+1),5, 'q_spAP') 588 589c 590c.... periodic nodes have to assemble results to their partners 591c 592 call bc3per (iBC, uBrg(:,:,iKs+1), iper, ilwork, nflow) 593c call tnanq(uBrg(:,:,iKS+1),5, 'q_bc') 594 595c 596c.... orthogonalize and get the norm 597c 598 do jK = 1, iKs+1 599c 600 if (jK .eq. 1) then 601c 602 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 603 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 604c 605 else 606c 607c project off jK-1 vector 608c 609 uBrg(:,:,iKs+1)=uBrg(:,:,iKs+1)-beta * uBrg(:,:,jK-1) 610c 611 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 612 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 613c 614 endif 615c 616 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 617c 618 enddo 619c 620 ! flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp 621c 622c the last inner product was with what was left of the vector (after 623c projecting off all of the previous vectors 624c 625 if(beta.le.0) write(*,*) 'beta in solgmr non-positive' 626 unorm = sqrt(beta) 627 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 628c 629c.... normalize the Krylov vector 630c 631 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 632c vector 633c 634c.... construct and reduce the Hessenberg Matrix 635c since there is only one subdiagonal we can use a Givens rotation to 636c rotate off each subdiagonal AS IT IS FORMED. We do this because it 637c allows us to check progress of solution and quit when satisfied. Note 638c that all future K vects will put a subdiagonal in the next column so 639c there is no penalty to work ahead as the rotation for the next vector 640c will be unaffected by this rotation. 641 642c 643c H Y = E ========> R_i H Y = R_i E 644c 645 do jK = 1, iKs-1 646 tmp = Rcos(jK) * HBrg(jK, iKs) + 647 & Rsin(jK) * HBrg(jK+1,iKs) 648 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 649 & Rcos(jK) * HBrg(jK+1,iKs) 650 HBrg(jK, iKs) = tmp 651 enddo 652c 653 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 654 Rcos(iKs) = HBrg(iKs, iKs) / tmp 655 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 656 HBrg(iKs, iKs)= tmp 657 HBrg(iKs+1,iKs)= zero 658c 659c.... rotate eBrg R_i E 660c 661 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 662 eBrg(iKs+1)=-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 663 eBrg(iKs) = tmp 664c 665c.... check for convergence 666c 667 ntotGM = ntotGM + 1 668 echeck=abs(eBrg(iKs+1)) 669 if (echeck .le. epsnrm.and. iKs .ge. minIters) exit 670c 671c.... end of GMRES iteration loop 672c 673 1000 continue 674c 675c.... -------------------------> Solution <------------------------ 676c 677c.... if converged or end of Krylov space 678c 679c.... solve for yBrg 680c 681 do jK = iKs, 1, -1 682 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 683 do lK = 1, jK-1 684 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 685 enddo 686 enddo 687c 688c.... update Dy 689c 690 do jK = 1, iKs 691 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 692 enddo 693c 694c.... flop count 695c 696 ! flops = flops + (3*iKs+1)*nflow*nshg 697c 698c.... check for convergence 699c 700 echeck=abs(eBrg(iKs+1)) 701 if (echeck .le. epsnrm) exit 702! if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction', 703! & (one-echeck*etol/epsnrm)/(one-etol)*100 704 705c 706c.... end of mGMRES loop 707c 708 2000 continue 709c 710c.... ------------------------> Converged <------------------------ 711c 712c.... if converged 713c 714 3000 continue 715 716c 717c 718c.... back block-diagonal precondition the results 719c 720 call i3LU (BDiag, Dy, 'backward') 721c 722c 723c.... output the statistics 724c 725 call rstat (res, ilwork,rmes) 726 727 if(myrank.eq.master) then 728 if (echeck .le. epsnrm) then 729 write(*,*) 730 else 731 write(*,*)'solver tolerance %satisfaction', 732 & (one-echeck*etol/epsnrm)/(one-etol)*100 733 endif 734 endif 735c 736c.... stop the timer 737c 738 3002 continue ! no solve just res. 739 call timer ('Back ') 740c 741c.... end 742c 743 return 744 end 745 746 subroutine SolGMRSclr(y, ac, yold, 747 & acold, EGmasst, 748 & x, elDw, 749 & iBC, BC, 750 & rest, HBrg, eBrg, 751 & yBrg, Rcos, Rsin, iper, 752 & ilwork, 753 & shp, shgl, 754 & shpb, shglb, Dyt) 755c 756c---------------------------------------------------------------------- 757c 758c This is the preconditioned GMRES driver routine. 759c 760c input: 761c y (nshg,ndof) : Y-variables at n+alpha_f 762c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 763c yold (nshg,ndof) : Y-variables at beginning of step 764c acold (nshg,ndof) : Primvar. accel. variable at begng step 765c x (numnp,nsd) : node coordinates 766c iBC (nshg) : BC codes 767c BC (nshg,ndofBC) : BC constraint parameters 768c iper (nshg) : periodic nodal information 769c 770c output: 771c y (nshg,ndof) : Y-variables at n+alpha_f 772c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 773c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 774c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 775c yBrg (Kspace) : solution of Hessenberg minim. problem 776c Rcos (Kspace) : Rotational cosine of QR algorithm 777c Rsin (Kspace) : Rotational sine of QR algorithm 778c output: 779c rest (numnp) : preconditioned residual 780c 781c 782c Zdenek Johan, Winter 1991. (Fortran 90) 783c---------------------------------------------------------------------- 784c 785 use pointer_data 786 787 include "common.h" 788 include "mpif.h" 789 include "auxmpi.h" 790c 791 dimension y(nshg,ndof), ac(nshg,ndof), 792 & yold(nshg,ndof), acold(nshg,ndof), 793 & x(numnp,nsd), 794 & iBC(nshg), BC(nshg,ndofBC), 795 & rest(nshg), 796 & Diag(nshg), 797 & HBrg(Kspace+1,*), eBrg(*), 798 & yBrg(*), Rcos(*), 799 & Rsin(*), ilwork(nlwork), 800 & iper(nshg), EGmasst(numel,nshape,nshape) 801c 802 dimension Dyt(nshg), rmest(nshg), 803 & tempt(nshg), Dinv(nshg), 804 & uBrgt(nshg,Kspace+1) 805c 806 dimension shp(MAXTOP,maxsh,MAXQPT), 807 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 808 & shpb(MAXTOP,maxsh,MAXQPT), 809 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 810 real*8 elDw(numel) 811c 812c.... *******************>> Element Data Formation <<****************** 813c 814c 815c.... form the LHS matrices, the residual vector, and the block 816c diagonal preconditioner 817c 818 call ElmGMRSclr(y, ac, 819 & x, elDw, shp, shgl, 820 & iBC, BC, 821 & shpb, shglb, 822 & rest, 823 & rmest, Diag, iper, 824 & ilwork, EGmasst) 825c 826c.... **********************>> EBE - GMRES <<******************** 827c 828 call timer ('Solver ') 829c 830c.... ------------------------> Initialization <----------------------- 831c 832c 833 id = isclr+5 834c.... initialize Dy 835c 836 Dyt = zero 837c 838c.... Right preconditioner 839c 840 call i3preSclr(Diag, Dinv, EGmassT, ilwork) 841c 842c Check the residual for divering trend 843c 844 845 call rstatCheckSclr(rest,ilwork,y,ac) 846 847c 848c.... copy rest in uBrgt(1) 849c 850 uBrgt(:,1) = rest 851c 852c.... calculate norm of residual 853c 854 tempt = rest**2 855 856 call sumgat (tempt, 1, summed, ilwork) 857 unorm = sqrt(summed) 858c 859c.... check if GMRES iterations are required 860c 861 iKss = 0 862 lGMRESt = 0 863c 864c.... if we are down to machine precision, don't bother solving 865c 866 if (unorm .lt. 100.*epsM**2) goto 3000 867c 868c.... set up tolerance of the Hessenberg's problem 869c 870 epsnrm = etol * unorm 871c 872c.... ------------------------> GMRES Loop <------------------------- 873c 874c.... loop through GMRES cycles 875c 876 do 2000 mGMRES = 1, nGMRES 877 lGMRESt = mGMRES - 1 878c 879 if (lGMRESt .gt. 0) then 880c 881c.... if GMRES restarts are necessary, calculate R - A x 882c 883c 884 885c.... perform the A x product 886c 887 call Au1GMRSclr (EGmasst, tempt, ilwork, iper) 888c 889c.... periodic nodes have to assemble results to their partners 890c 891c call bc3perSclr (iBC, tempt, iper) 892c 893c.... subtract A x from residual and calculate the norm 894c 895 tempt = rest - tempt 896 uBrgt(:,1) = tempt 897c 898c.... calculate the norm 899c 900 tempt = tempt**2 901 call sumgat (tempt, 1, summed, ilwork) 902 unorm = sqrt(summed) 903c 904c.... flop count 905c 906 ! flops = flops + ndof*numnp+numnp 907c 908 endif 909c 910c.... set up RHS of the Hessenberg's problem 911c 912 call clear (eBrg, Kspace+1) 913 call clear (HBrg, Kspace+1) 914 eBrg(1) = unorm 915c 916c.... normalize the first Krylov vector 917c 918 uBrgt(:,1) = uBrgt(:,1) / unorm 919c 920c.... loop through GMRES iterations 921c 922 do 1000 iK = 1, Kspace 923 iKss = iK 924 925 uBrgt(:,iKss+1) = uBrgt(:,iKss) 926 927c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 928c 929 call Au1GMRSclr ( EGmasst, uBrgt(:,iKss+1), ilwork, iper ) 930 931c 932c.... periodic nodes have to assemble results to their partners 933c 934 call bc3perSclr (iBC, uBrgt(:,iKss+1), iper) 935c 936c.... orthogonalize and get the norm 937c 938 do jK = 1, iKss+1 939c 940 if (jK .eq. 1) then 941c 942 tempt = uBrgt(:,iKss+1) * uBrgt(:,1) ! {u_{i+1}*u_1} vector 943 call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_1) 944c 945 else 946c 947c project off jK-1 vector 948c 949 uBrgt(:,iKss+1) = uBrgt(:,iKss+1) - beta * uBrgt(:,jK-1) 950c 951 tempt = uBrgt(:,iKss+1) * uBrgt(:,jK) !{u_{i+1}*u_j} vector 952 call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_j) 953c 954 endif 955c 956 HBrg(jK,iKss) = beta ! put this in the Hessenberg Matrix 957c 958 enddo 959c 960 ! flops = flops + (3*iKss+1)*ndof*numnp+(iKss+1)*numnp 961c 962c the last inner product was with what was left of the vector (after 963c projecting off all of the previous vectors 964c 965 unorm = sqrt(beta) 966 HBrg(iKss+1,iKss) = unorm ! this fills the 1 sub diagonal band 967c 968c.... normalize the Krylov vector 969c 970 uBrgt(:,iKss+1) = uBrgt(:,iKss+1) / unorm ! normalize the next Krylov 971c vector 972c 973c.... construct and reduce the Hessenberg Matrix 974c since there is only one subdiagonal we can use a Givens rotation to 975c rotate off each subdiagonal AS IT IS FORMED. We do this because it 976c allows us to check progress of solution and quit when satisfied. Note 977c that all future K vects will put a subdiagonal in the next column so 978c there is no penalty to work ahead as the rotation for the next vector 979c will be unaffected by this rotation. 980 981c 982c H Y = E ========> R_i H Y = R_i E 983c 984 do jK = 1, iKss-1 985 tmp = Rcos(jK) * HBrg(jK, iKss) + 986 & Rsin(jK) * HBrg(jK+1,iKss) 987 HBrg(jK+1,iKss) = -Rsin(jK) * HBrg(jK, iKss) + 988 & Rcos(jK) * HBrg(jK+1,iKss) 989 HBrg(jK, iKss) = tmp 990 enddo 991c 992 tmp = sqrt(HBrg(iKss,iKss)**2 + HBrg(iKss+1,iKss)**2) 993 Rcos(iKss) = HBrg(iKss, iKss) / tmp 994 Rsin(iKss) = HBrg(iKss+1,iKss) / tmp 995 HBrg(iKss, iKss) = tmp 996 HBrg(iKss+1,iKss) = zero 997c 998c.... rotate eBrg R_i E 999c 1000 tmp = Rcos(iKss)*eBrg(iKss) + Rsin(iKss)*eBrg(iKss+1) 1001 eBrg(iKss+1)=-Rsin(iKss)*eBrg(iKss) + Rcos(iKss)*eBrg(iKss+1) 1002 eBrg(iKss) = tmp 1003c 1004c.... check for convergence 1005c 1006 ercheck=eBrg(iKss+1) 1007 ntotGMs = ntotGMs + 1 1008 if (abs(eBrg(iKss+1)) .le. epsnrm) exit 1009c 1010c.... end of GMRES iteration loop 1011c 1012 1000 continue 1013c 1014c.... -------------------------> Solution <------------------------ 1015c 1016c.... if converged or end of Krylov space 1017c 1018c.... solve for yBrg 1019c 1020 do jK = iKss, 1, -1 1021 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 1022 do lK = 1, jK-1 1023 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 1024 enddo 1025 enddo 1026c 1027c.... update Dy 1028c 1029 do jK = 1, iKss 1030 Dyt = Dyt + yBrg(jK) * uBrgt(:,jK) 1031 enddo 1032c 1033c.... flop count 1034c 1035 ! flops = flops + (3*iKss+1)*ndof*numnp 1036c 1037c.... check for convergence 1038c 1039 if (abs(eBrg(iKss+1)) .le. epsnrm) exit 1040c 1041c.... end of mGMRES loop 1042c 1043 2000 continue 1044c 1045c.... ------------------------> Converged <------------------------ 1046c 1047c.... if converged 1048c 1049 3000 continue 1050c 1051c.... back precondition the result 1052c 1053 Dyt(:) = Dyt(:) * Dinv(:) 1054c 1055c.... output the statistics 1056c 1057 call rstatSclr(rest, ilwork) 1058c.... stop the timer 1059c 1060 3002 continue ! no solve just res. 1061 call timer ('Back ') 1062c 1063c.... end 1064c 1065 return 1066 end 1067 1068 1069