1 subroutine SolMFG (y, ac, yold, acold, 2 & x, 3 & iBC, BC, res, 4 & BDiag, HBrg, eBrg, 5 & yBrg, Rcos, Rsin, iper, 6 & ilwork, shp, shgl, 7 & shpb, shglb, Dy, rerr) 8c 9c---------------------------------------------------------------------- 10c 11c This is the preconditioned matrix-free GMRES driver routine. 12c 13c input: 14c y (nshg,ndof) : Y-variables at n+alpha_v 15c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 16c yold (nshg,ndof) : Y-variables at beginning of step 17c acold (nshg,ndof) : Primvar. accel. variable at begng step 18c x (numnp,nsd) : node coordinates 19c iBC (nshg) : BC codes 20c BC (nshg,ndofBC) : BC constraint parameters 21c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 22c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 23c yBrg (Kspace) : solution of Hessenberg minim. problem 24c Rcos (Kspace) : Rotational cosine of QR algorithm 25c Rsin (Kspace) : Rotational sine of QR algorithm 26c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 27c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 28c 29c output: 30c res (nshg,ndof) : preconditioned residual 31c BDiag (nshg,ndof,ndof) : block-diagonal preconditioner 32c 33c 34c Zdenek Johan, Winter 1991. (Fortran 90) 35c---------------------------------------------------------------------- 36c 37 include "common.h" 38 include "mpif.h" 39 include "auxmpi.h" 40c 41 dimension y(nshg,ndof), ac(nshg,ndof), 42 & yold(nshg,ndof), acold(nshg,ndof), 43 & x(numnp,nsd), 44 & iBC(nshg), BC(nshg,ndofBC), 45 & res(nshg,nflow), 46 & BDiag(nshg,nflow,nflow), 47 & HBrg(Kspace+1,Kspace), eBrg(Kspace+1), 48 & yBrg(Kspace), Rcos(Kspace), 49 & Rsin(Kspace), ilwork(nlwork), 50 & iper(nshg) 51c 52 dimension Dy(nshg,nflow), rmes(nshg,nflow), 53 & ypre(nshg,nflow), temp(nshg,nflow), 54 & uBrg(nshg,nflow,Kspace+1), ytmp(nshg,nflow) 55 56c 57 dimension shp(MAXTOP,maxsh,MAXQPT), 58 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 59 & shpb(MAXTOP,maxsh,MAXQPT), 60 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 61c 62c.... *******************>> Element Data Formation <<****************** 63c 64c 65c.... set the parameters for flux and surface tension calculations 66c 67 idflx = zero 68 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 69 if (isurf == 1) idflx=idflx + nsd 70c 71 call ElmMFG (y, ac, x, 72 & shp, shgl, 73 & iBC, BC, 74 & shpb, shglb, 75 & res, rmes, 76 & BDiag, iper, ilwork, 77 & rerr) 78c 79c.... **********************>> Matrix-Free GMRES <<******************** 80c 81c 82c.... start the timer 83c 84 call timer ('Solver ') 85c 86c.... ------------------------> Initialization <----------------------- 87c 88 89c 90c.... LU decompose the block diagonals 91c 92 if (iprec .ne. 0) 93 & call i3LU (BDiag, res, 'LU_Fact ') 94 95c 96c.... block diagonal precondition residual 97c 98 call i3LU (BDiag, res, 'forward ') 99c 100c This is a feature that allows one to take an extra pass just to 101c find the residual at the end of the last solve. 102c 103c$$$ if(iter.ge.(press*nitr) ) then 104c$$$ iKs=0 105c$$$ lGMRES=0 106c$$$ goto 3002 107c$$$ endif 108 109c 110c.... block diagonal precondition modified residual 111c 112 call i3LU (BDiag, rmes, 'forward ') 113 114c 115c.... copy res in uBrg(1) 116c 117 uBrg(:,:,1) = res 118c 119c.... initialize Dy 120c 121 Dy = zero 122c 123c.... block diagonal precondition y-variables 124c 125 ypre(:,:) = y(:,1:nflow) ! ypre is the pre-conditioned, 126 ! unperturbed, base vector 127c 128 call yshuffle(ypre,'new2old ') 129c 130 call i3LU (BDiag, ypre, 'product ') 131c 132c since we will never use ypre in the "new" form again, leave it 133c shuffled 134c 135c call yshuffle(ypre, 'old2new ') 136c 137c.... calculate norm of residual 138c 139 temp = res**2 140 141c call tnanq(temp,5,"res**2 ") 142 143 call sumgat (temp, nflow, summed, ilwork) 144 unorm = sqrt(summed) 145c 146c.... flop count 147c 148! flops = flops + ndof*nshg+nshg 149c 150c.... check if GMRES iterations are required 151c 152 iKs = 0 153 lGMRES = 0 154c 155c.... if we are down to machine precision, don't bother solving 156c 157 if (unorm .lt. 100.*epsM**2) goto 3000 158c 159c.... set up tolerance of the Hessenberg's problem 160c 161 epsnrm = etol * unorm 162c 163c.... compute the finite difference interval 164c 165 if ((iter .eq. 1) .and. (mod(istep,20) .eq. 0)) then 166 call itrFDI (ypre, y, ac, 167 & x, rmes, 168 & res, BDiag, iBC, 169 & BC, iper, 170 & ilwork, shp, shgl, 171 & shpb, shglb) 172 endif 173 ires=2 174c 175c.... ------------------------> GMRES Loop <------------------------- 176c 177c.... loop through GMRES cycles 178c 179 do 2000 mGMRES = 1, nGMRES 180 lGMRES = mGMRES - 1 181c 182 if (lGMRES .gt. 0) then 183c 184c.... calculate R - A u 185c 186 call Au2MFG (ypre, y, ac, 187 & x, rmes, 188 & res, Dy, temp, 189 & BDiag, iBC, BC, 190 & iper, ilwork, 191 & shp, shgl, 192 & shpb, shglb) 193 194c 195 uBrg(:,:,1) = temp 196c 197c.... calculate the norm 198c 199 temp = temp**2 200 call sumgat (temp, ndof, summed, ilwork) 201 unorm = sqrt(summed) 202 203c 204c.... flop count 205c 206! flops = flops + ndof*nshg+nshg 207c 208 endif 209c 210c.... set up RHS of the Hessenberg's problem 211c 212 call clear (eBrg, Kspace+1) 213 eBrg(1) = unorm 214c 215c.... normalize the first Krylov vector 216c 217 uBrg(:,:,1) = uBrg(:,:,1) / unorm 218c 219c.... loop through GMRES iterations 220c 221 do 1000 iK = 1, Kspace 222 iKs = iK 223c 224c.... Au product 225c 226 temp = uBrg(:,:,iKs) 227c 228 call Au1MFG (ypre, y, ac, 229 & x, rmes, 230 & res, temp, BDiag, 231 & iBC, BC, 232 & iper, ilwork, 233 & shp, shgl, 234 & shpb, shglb) 235c 236 uBrg(:,:,iKs+1) = temp ! u_{i+1}= J u_i In Johan Thesis p 15c 237c$$$c 238c$$$c.... debug 239c$$$c 240c$$$ do i=1,nshg 241c$$$ write(78,'(5(f14.7))')(uBrg(i,j,iKs+1),j=1,5) 242c$$$ enddo 243c$$$ stop 244c 245c.... orthogonalize and get the norm 246c 247 do jK = 1, iKs+1 248c 249 if (jK .eq. 1) then 250c 251 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 252 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 253c 254 else 255c 256c project off jK-1 vector 257c 258 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1) 259c 260 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 261 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 262c 263 endif 264c 265 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 266c 267 enddo 268c 269! flops = flops + (3*iKs+1)*nflow*nshg+(iKs+1)*nshg 270c 271c the last inner product was with what was left of the vector (after 272c projecting off all of the previous vectors 273c 274 unorm = sqrt(beta) 275 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 276c 277c.... normalize the Krylov vector 278c 279 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 280c vector 281c 282c.... construct and reduce the Hessenberg Matrix 283c since there is only one subdiagonal we can use a Givens rotation to 284c rotate off each subdiagonal AS IT IS FORMED. We do this because it 285c allows us to check progress of solution and quit when satisfied. Note 286c that all future K vects will put a subdiagonal in the next column so 287c there is no penalty to work ahead as the rotation for the next vector 288c will be unaffected by this rotation. 289 290c 291c H Y = E ========> R_i H Y = R_i E 292c 293 do jK = 1, iKs-1 294 tmp = Rcos(jK) * HBrg(jK, iKs) + 295 & Rsin(jK) * HBrg(jK+1,iKs) 296 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 297 & Rcos(jK) * HBrg(jK+1,iKs) 298 HBrg(jK, iKs) = tmp 299 enddo 300c 301 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 302 Rcos(iKs) = HBrg(iKs, iKs) / tmp 303 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 304 HBrg(iKs, iKs) = tmp 305 HBrg(iKs+1,iKs) = zero 306c 307c.... rotate eBrg R_i E 308c 309 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 310 eBrg(iKs+1) = -Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 311 eBrg(iKs) = tmp 312c 313c.... check for convergence 314c 315 ntotGM = ntotGM + 1 316 echeck=abs(eBrg(iKs+1)) 317 if (echeck .le. epsnrm.and. iKs .ge. minIters) exit 318c 319c.... end of GMRES iteration loop 320c 3211000 continue 322c 323c.... -------------------------> Solution <------------------------ 324c 325c.... if converged or end of Krylov space 326c 327c.... solve for yBrg 328c 329 do jK = iKs, 1, -1 330 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 331 do lK = 1, jK-1 332 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 333 enddo 334 enddo 335c 336c.... update Dy 337c 338 do jK = 1, iKs 339 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 340 enddo 341c 342c.... flop count 343c 344! flops = flops + (3*iKs+1)*nflow*nshg 345c 346c.... check for convergence 347c 348 if (abs(eBrg(iKs+1)) .le. epsnrm) exit 349c 350c.... end of mGMRES loop 351c 3522000 continue 353c 354c.... ------------------------> Converged <------------------------ 355c 356c.... if converged 357c 3583000 continue 359 360c 361c.... back precondition the results 362c 363 call i3LU (BDiag, Dy, 'backward') 364c 365c.... output the statistics 366c 367 call rstat (res, ilwork) 368c 369c ... reset ires to 3 again (asires changed ires to 2) 370c 371 ires = 3 372c 373c.... stop the timer 374c 3753002 continue ! no solve just res. 376 call timer ('Back ') 377c 378c.... end 379c 380 return 381 end 382