1 2c--------------------------------------------------------------------------- 3 4 subroutine SUPGstress (y, ac, x, qres, ien, xmudmi, 5 & cdelsq, shgl, shp, Qwtf, shglo, shpo, stress, diss, vol) 6 7 use stats 8 use rlssave ! Use the resolved Leonard stresses at the nodes. 9 10 include "common.h" 11 12 dimension y(nshg,5), ac(nshg,5), 13 & x(numnp,nsd), ien(npro,nshl), 14 & shp(nshl,ngauss), shpfun(npro,nshl), 15 & shgl(nsd,nshl,ngauss), shg(npro,nshl,nsd), 16 & shglo(nsd,nshl,ngauss), shpo(nshl,ngauss), 17 & Qwtf(ngaussf), acl(npro,nshl,ndof), 18 & yl(npro,nshl,ndof), xl(npro,nenl,nsd) 19 dimension stress(nshg,9), stressl(npro,9), 20 & stressli(npro,9), 21 & dxdxi(npro,nsd,nsd), dxidx(npro,nsd,nsd), 22 & WdetJ(npro), rho(npro), 23 & tmp(npro), aci(npro,nsd), 24 & pres(npro), u1(npro), 25 & u2(npro), u3(npro) 26 dimension qres(nshg,nsd*nsd), ql(npro,nshl,nsd*nsd), 27 & g1yi(npro,ndof), g2yi(npro,ndof), 28 & g3yi(npro,ndof), divqi(npro,3), 29 & src(npro,nsd), Temp(npro), 30 & xx(npro,nsd), 31 & rlsl(npro,nshl,6), rlsli(npro,6), 32 & rLui(npro,3), 33 & tauC(npro), tauM(npro), 34 & tauBar(npro), uBar(npro,nsd) 35 dimension Sij(npro,6), Snorm(npro), 36 & Snorm2(npro), cdelsq(nshg), 37 & xmudmi(npro,ngauss), xmudmif(npro,ngauss), 38 & dissi(npro,3), dissl(npro,3), 39 & voli(npro), voll(npro), 40 & vol(nshg), diss(nshg,3), 41 & rmu(npro) 42 43 real*8 omega(3), divu(npro) 44 45c.... Note that the xmudmi passed in here is 46c.... evaluated at quadrature points of the flow. xmudmif will 47c... be evaluated at the ngaussf quad. pts. 48 49 xmudmif = zero 50 51c.... Debuggin 52 53c xmudmi = zero 54 55c.... Localization 56 57 call localy(y, yl, ien, ndofl, 'gather ') 58 call localy(ac, acl, ien, ndofl, 'gather ') 59 call localx(x, xl, ien, nsd, 'gather ') 60 61 if (idiff==1 .or. idiff==3) then ! global reconstruction of qdiff 62 call local (qres, ql, ien, nsd*nsd, 'gather ') 63 endif 64 65 if ( idiff==2 .and. ires .eq. 1 ) then 66 call e3ql (yl, shpo, shglo, 67 & xl, ql, xmudmi, 68 & sgn) 69 endif 70 71 if( (iLES.gt.10).and.(iLES.lt.20)) then ! bardina 72 call local (rls, rlsl, ien, 6, 'gather ') 73 else 74 rlsl = zero 75 endif 76 77c... Now that everything is localized, begin loop over ngaussf quad. pts. 78 79 80 stressl = zero 81 dissl = zero 82 voll = zero 83 84 do intp = 1, ngaussf 85 86c 87c.... -------------> Primitive variables at int. point <-------------- 88c 89 pres = zero 90 u1 = zero 91 u2 = zero 92 u3 = zero 93c 94 do n = 1, nshl 95 pres(:) = pres(:) + shp(n,intp) * yl(:,n,1) 96 u1(:) = u1(:) + shp(n,intp) * yl(:,n,2) 97 u2(:) = u2(:) + shp(n,intp) * yl(:,n,3) 98 u3(:) = u3(:) + shp(n,intp) * yl(:,n,4) 99 enddo 100 101c 102c.... -----------------------> accel. at int. point <---------------------- 103c 104 aci = zero 105 do n = 1, nshl 106 aci(:,1) = aci(:,1) + shp(n,intp) * acl(:,n,2) 107 aci(:,2) = aci(:,2) + shp(n,intp) * acl(:,n,3) 108 aci(:,3) = aci(:,3) + shp(n,intp) * acl(:,n,4) 109 enddo 110 111 112c 113c.... ---------------> Element Metrics at int. point <------------- 114c 115c.... compute the deformation gradient 116c 117 dxdxi = zero 118c 119 do n = 1, nenl 120 dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp) 121 dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp) 122 dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp) 123 dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp) 124 dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp) 125 dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp) 126 dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp) 127 dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp) 128 dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp) 129 enddo 130c 131c.... compute the inverse of deformation gradient 132c 133 dxidx(:,1,1) = dxdxi(:,2,2) * dxdxi(:,3,3) 134 & - dxdxi(:,3,2) * dxdxi(:,2,3) 135 dxidx(:,1,2) = dxdxi(:,3,2) * dxdxi(:,1,3) 136 & - dxdxi(:,1,2) * dxdxi(:,3,3) 137 dxidx(:,1,3) = dxdxi(:,1,2) * dxdxi(:,2,3) 138 & - dxdxi(:,1,3) * dxdxi(:,2,2) 139 tmp = one / ( dxidx(:,1,1) * dxdxi(:,1,1) 140 & + dxidx(:,1,2) * dxdxi(:,2,1) 141 & + dxidx(:,1,3) * dxdxi(:,3,1) ) 142 dxidx(:,1,1) = dxidx(:,1,1) * tmp 143 dxidx(:,1,2) = dxidx(:,1,2) * tmp 144 dxidx(:,1,3) = dxidx(:,1,3) * tmp 145 dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1) 146 & - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp 147 dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3) 148 & - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp 149 dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3) 150 & - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp 151 dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2) 152 & - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp 153 dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2) 154 & - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp 155 dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2) 156 & - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp 157c 158 159 wght=Qwtf(intp) 160 WdetJ = wght / tmp 161 162c Obtain the global gradient of the shape functions at current qpt. 163 164 do n = 1,nshl 165 shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1) 166 & + shgl(2,n,intp) * dxidx(:,2,1) 167 & + shgl(3,n,intp) * dxidx(:,3,1)) 168 shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2) 169 & + shgl(2,n,intp) * dxidx(:,2,2) 170 & + shgl(3,n,intp) * dxidx(:,3,2)) 171 shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3) 172 & + shgl(2,n,intp) * dxidx(:,2,3) 173 & + shgl(3,n,intp) * dxidx(:,3,3)) 174 enddo 175 176c 177c.... compute the global gradient of u and P 178c 179c 180 g1yi = zero 181 g2yi = zero 182 g3yi = zero 183 do n = 1, nshl 184 g1yi(:,1) = g1yi(:,1) + shg(:,n,1) * yl(:,n,1) 185 g1yi(:,2) = g1yi(:,2) + shg(:,n,1) * yl(:,n,2) 186 g1yi(:,3) = g1yi(:,3) + shg(:,n,1) * yl(:,n,3) 187 g1yi(:,4) = g1yi(:,4) + shg(:,n,1) * yl(:,n,4) 188c 189 g2yi(:,1) = g2yi(:,1) + shg(:,n,2) * yl(:,n,1) 190 g2yi(:,2) = g2yi(:,2) + shg(:,n,2) * yl(:,n,2) 191 g2yi(:,3) = g2yi(:,3) + shg(:,n,2) * yl(:,n,3) 192 g2yi(:,4) = g2yi(:,4) + shg(:,n,2) * yl(:,n,4) 193c 194 g3yi(:,1) = g3yi(:,1) + shg(:,n,3) * yl(:,n,1) 195 g3yi(:,2) = g3yi(:,2) + shg(:,n,3) * yl(:,n,2) 196 g3yi(:,3) = g3yi(:,3) + shg(:,n,3) * yl(:,n,3) 197 g3yi(:,4) = g3yi(:,4) + shg(:,n,3) * yl(:,n,4) 198 enddo 199 200c.... Let us build the Sij tensor and its norms 201 202 Sij(:,1) = g1yi(:,2) 203 Sij(:,2) = g2yi(:,3) 204 Sij(:,3) = g3yi(:,4) 205 Sij(:,4) = (g2yi(:,2)+g1yi(:,3))*pt5 206 Sij(:,5) = (g3yi(:,2)+g1yi(:,4))*pt5 207 Sij(:,6) = (g3yi(:,3)+g2yi(:,4))*pt5 208 209 Snorm(:) = Sij(:,1)**2 + Sij(:,2)**2 + Sij(:,3)**2 210 & + two*(Sij(:,4)**2 + Sij(:,5)**2 + Sij(:,6)**2) 211 212 Snorm2(:) = sqrt( two*(Sij(:,1)**2 + Sij(:,2)**2 + Sij(:,3)**2) 213 & + four*(Sij(:,4)**2 + Sij(:,5)**2 + Sij(:,6)**2) ) 214 215c... Let us build xmudmif at current quad pt. a la scatnu.f 216 217 do n = 1,nshl 218 xmudmif(:,intp) = xmudmif(:,intp) + 219 & cdelsq(ien(:,n)) * Snorm2(:)*shp(n,intp) 220 enddo 221 222 rmu=datmat(1,2,1) 223 xmudmif(:,intp)=min(xmudmif(:,intp),1000.0*rmu(:)) ! 224c don't let it get larger than 1000 mu 225 xmudmif(:,intp)=max(xmudmif(:,intp), zero) ! don't let (xmudmi) < 0 226 227c.... Debugging 228 229c xmudmif(:,intp) = rmu(:) 230 231 232c 233c.... get necessary fluid properties (including the updated viscosity) 234c 235 do i = 1, npro 236 do n = 1, nshl 237 shpfun(i,n) = shp(n,intp) 238 enddo 239 enddo 240 241 call getdiff(yl, shpfun, xmudmif,xl, rmu, rho) 242 243 244 divqi = zero 245 if ( idiff >= 1 ) then 246c 247c.... compute divergence of diffusive flux vector, qi,i 248c 249 do n=1, nshl 250 divqi(:,1) = divqi(:,1) + shg(:,n,1)*ql(:,n,1 ) 251 & + shg(:,n,2)*ql(:,n,4 ) 252 & + shg(:,n,3)*ql(:,n,7 ) 253 254 divqi(:,2) = divqi(:,2) + shg(:,n,1)*ql(:,n,2 ) 255 & + shg(:,n,2)*ql(:,n,5 ) 256 & + shg(:,n,3)*ql(:,n,8) 257 258 divqi(:,3) = divqi(:,3) + shg(:,n,1)*ql(:,n,3 ) 259 & + shg(:,n,2)*ql(:,n,6 ) 260 & + shg(:,n,3)*ql(:,n,9 ) 261 262 enddo 263 264 endif ! diffusive flux computation 265 266c 267c.... take care of the body force term here 268c 269 src = zero 270 if(matflg(5,1) .ge. 1) then 271c 272 bfx = datmat(1,5,1) ! Boussinesq, g*alfap 273 bfy = datmat(2,5,1) 274 bfz = datmat(3,5,1) 275 276 select case ( matflg(5,1) ) 277 case ( 1 ) ! standard linear body force 278 src(:,1) = bfx 279 src(:,2) = bfy 280 src(:,3) = bfz 281 case ( 2 ) ! boussinesq body force 282 Temp = zero 283 do n = 1, nshl 284 Temp = Temp + shp(n,intp) * yl(:,n,5) 285 enddo 286 Tref = datmat(2,2,1) 287 src(:,1) = bfx * (Temp(:)-Tref) 288 src(:,2) = bfy * (Temp(:)-Tref) 289 src(:,3) = bfz * (Temp(:)-Tref) 290 case ( 3 ) ! user specified f(x,y,z) 291 xx = zero 292 do n = 1,nenl 293 xx(:,1) = xx(:,1) + shp(n,intp) * xl(:,n,1) 294 xx(:,2) = xx(:,2) + shp(n,intp) * xl(:,n,2) 295 xx(:,3) = xx(:,3) + shp(n,intp) * xl(:,n,3) 296 enddo 297 298 call e3source(xx, src) 299 end select 300 301 endif 302c 303c.... -------------------> Coriolis force <----------------- 304c 305 omag=datmat(3,5,1) ! frame rotation rate 306 if(omag.ne.0) then 307c 308c.... unit vector of axis of rotation currently selecting the i,j,k 309c 310 e1 = one/sqrt(3.0d0) 311 e2 = e1 312 e3 = e1 313 314 omega(1)=omag*e1 315 omega(2)=omag*e2 316 omega(3)=omag*e3 317 318 if(matflg(5,1) .ne. 3) then ! we need to calculate the int pt. coords 319 xx = zero 320 do n = 1,nenl 321 xx(:,1) = xx(:,1) + shp(n,intp) * xl(:,n,1) 322 xx(:,2) = xx(:,2) + shp(n,intp) * xl(:,n,2) 323 xx(:,3) = xx(:,3) + shp(n,intp) * xl(:,n,3) 324 enddo 325 326 endif 327c 328c note that we calculate f as if it contains the usual source 329c plus the Coriolis and the centrifugal forces taken to the rhs (sign change) 330c as long as we are doing SUPG with no accounting for these terms in the 331c LHS this is the only change (which will find its way to the RHS momentum 332c equation (both Galerkin and SUPG parts)). 333c 334c uncomment later if you want rotation always about z axis 335c orig_src - om x om x r - two om x u 336c 337c$$$ src(:,1)=src(:,1)+omega(3)*omega(3)*xx(:,1)+two*omega(3)*u2 338c$$$ src(:,2)=src(:,2)+omega(3)*omega(3)*xx(:,2)-two*omega(3)*u1 339c 340c more general for testing 341c 342 src(:,1)=src(:,1) 343 & -(omega(2)*(omega(1)*xx(:,2)-omega(2)*xx(:,1)) 344 & -omega(3)*(omega(3)*xx(:,1)-omega(1)*xx(:,3))) 345 & -two*(omega(2)*u3-omega(3)*u2) 346 src(:,2)=src(:,2) 347 & -(omega(3)*(omega(2)*xx(:,3)-omega(3)*xx(:,2)) 348 & -omega(1)*(omega(1)*xx(:,2)-omega(2)*xx(:,1))) 349 & -two*(omega(3)*u1-omega(1)*u3) 350 src(:,3)=src(:,3) 351 & -(omega(1)*(omega(3)*xx(:,1)-omega(1)*xx(:,3)) 352 & -omega(2)*(omega(2)*xx(:,3)-omega(3)*xx(:,2))) 353 & -two*(omega(1)*u2-omega(2)*u1) 354 endif 355c 356c.... -------------------> momentum residual <----------------- 357c 358 rLui(:,1) =(aci(:,1) + u1 * g1yi(:,2) 359 & + u2 * g2yi(:,2) 360 & + u3 * g3yi(:,2) - src(:,1) ) * rho 361 & + g1yi(:,1) 362 & - divqi(:,1) 363 rLui(:,2) =(aci(:,2) + u1 * g1yi(:,3) 364 & + u2 * g2yi(:,3) 365 & + u3 * g3yi(:,3) - src(:,2) ) * rho 366 & + g2yi(:,1) 367 & - divqi(:,2) 368 rLui(:,3) =(aci(:,3) + u1 * g1yi(:,4) 369 & + u2 * g2yi(:,4) 370 & + u3 * g3yi(:,4) - src(:,3) ) * rho 371 & + g3yi(:,1) 372 & - divqi(:,3) 373 if(iconvflow.eq.1) then 374 divu(:) = (g1yi(:,2) + g2yi(:,3) + g3yi(:,4))*rho 375 rLui(:,1)=rlui(:,1)+u1*divu 376 rLui(:,2)=rlui(:,2)+u2*divu 377 rLui(:,3)=rlui(:,3)+u3*divu 378 endif 379 380c 381c.... compute the stabilization terms 382c 383 call e3stab (rho, u1, u2, 384 & u3, dxidx, rLui, 385 & rmu, tauC, tauM, 386 & tauBar, uBar ) 387c 388c... Compute the SUPG stress at the current quad point multiplied 389c... by the quadrature point weight. 390c 391 stressli(:,1) = u1(:)*rLui(:,1) 392 stressli(:,2) = u1(:)*rLui(:,2) 393 stressli(:,3) = u1(:)*rLui(:,3) 394 stressli(:,4) = u2(:)*rLui(:,1) 395 stressli(:,5) = u2(:)*rLui(:,2) 396 stressli(:,6) = u2(:)*rLui(:,3) 397 stressli(:,7) = u3(:)*rLui(:,1) 398 stressli(:,8) = u3(:)*rLui(:,2) 399 stressli(:,9) = u3(:)*rLui(:,3) 400 401 if (iconvflow .eq. 1) then 402 stressli(:,1) = stressli(:,1) + u1(:)*rLui(:,1) 403 stressli(:,2) = stressli(:,2) + u2(:)*rLui(:,1) 404 stressli(:,3) = stressli(:,3) + u3(:)*rLui(:,1) 405 stressli(:,4) = stressli(:,4) + u1(:)*rLui(:,2) 406 stressli(:,5) = stressli(:,5) + u2(:)*rLui(:,2) 407 stressli(:,6) = stressli(:,6) + u3(:)*rLui(:,2) 408 stressli(:,7) = stressli(:,7) + u1(:)*rLui(:,3) 409 stressli(:,8) = stressli(:,8) + u2(:)*rLui(:,3) 410 stressli(:,9) = stressli(:,9) + u3(:)*rLui(:,3) 411 endif 412 413c.... Debugging 414 415c stressli = two 416c tauM = one 417 418c.... Multiply ui*Luj times tauM and times WdetJ 419 420 do l = 1, 9 421 do k = 1, npro 422 stressli(k,l) = stressli(k,l)*WdetJ(k)*tauM(k) 423 enddo 424 enddo 425 426c.... Obtain the SUPG energy dissipation (tau_{ij} S_{ij}) at the 427c.... current qpt. 428 429 dissi(:,1) = stressli(:,1)*Sij(:,1) + stressli(:,5)*Sij(:,2) 430 & + stressli(:,9)*Sij(:,3) + stressli(:,4)*Sij(:,4) 431 & + stressli(:,7)*Sij(:,5) + stressli(:,8)*Sij(:,6) 432 & + stressli(:,2)*Sij(:,4) + stressli(:,3)*Sij(:,5) 433 & + stressli(:,6)*Sij(:,6) 434 435c.... Obtain the eddy viscosity dissipation multiplied by WdetJ 436 437 dissi(:,2) = xmudmif(:,intp)*Snorm(:)*rho(:)*WdetJ(:) 438 dissi(:,3) = rmu(:)*Snorm(:)*rho(:)*WdetJ(:) ! Total dissipation 439c from molec. and eddy 440 441c.... Debugging 442 443c dissi(:,1) = two*WdetJ(:) 444c dissi(:,2) = two*WdetJ(:) 445c dissi(:,3) = two*WdetJ(:) 446 447c..... Volume of element 448 449 voli = WdetJ ! Volume of element patch 450c 451c.... For debugging purposes let us keep track of rLui 452 453c rLui(:,1) = rLui(:,1)*WdetJ(:) 454c rLui(:,1) = rLui(:,2)*WdetJ(:) 455c rLui(:,1) = rLui(:,3)*WdetJ(:) 456 457c.... Acumulate integration point contributions for each each element 458 459 do l = 1, 9 460 stressl(:,l) = stressl(:,l) + stressli(:,l) 461 enddo 462 463 do l = 1, 3 464 dissl(:,l) = dissl(:,l) + dissi(:,l) 465 enddo 466 467 voll = voll + voli 468 469 enddo ! End loop over quadrature points. 470 471 do j = 1,nshl 472 do nel = 1,npro 473 stress(ien(nel,j),:) = stress(ien(nel,j),:) + stressl(nel,:) 474 enddo 475 enddo 476 477 do j = 1,nshl 478 do nel = 1,npro 479 diss(ien(nel,j),:) = diss(ien(nel,j),:) + dissl(nel,:) 480 enddo 481 enddo 482 483 do j = 1,nshl 484 do nel = 1,npro 485 vol(ien(nel,j)) = vol(ien(nel,j)) + voll(nel) 486 enddo 487 enddo 488 489 return 490 end 491 subroutine cpjdmcnoi (y, shgl, shp, 492 & iper, ilwork, x, 493 & rowp, colm, 494 & iBC, BC) 495 496 use pointer_data 497 498 use lhsGkeep ! This module stores the mass (Gram) matrix. 499 500 use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf), 501c shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf). 502c Shpf and shglf are the shape funciotns and their 503c gradient evaluated using the quadrature rule desired 504c for computing the dmod. Qwt contains the weights of the 505c quad. points. 506 507 include "common.h" 508 include "mpif.h" 509 include "auxmpi.h" 510 511c 512 dimension fres(nshg,24), fwr(nshg), 513 & strnrm(nshg), cdelsq(nshg), 514 & xnum(nshg), xden(nshg), 515 & xmij(nshg,6), xlij(nshg,6), 516 & xnude(nfath,2), xnuder(nfath,2), 517 & strl(numel,ngauss), 518 & y(nshg,5), yold(nshg,5), 519 & iper(nshg), 520 & ilwork(nlwork), 521 & x(numnp,3), 522 & shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT), 523 & pfres(nshg,22), ifath(nshg), 524 & nsons(nshg), iBC(nshg), 525 & BC(nshg,ndofBC), xnutf(nfath), 526 & xnut(nshg) 527 528 integer rowp(nshg*nnz), colm(nshg+1) 529 530 real*8, allocatable, dimension(:,:,:) :: em 531 532 533 denom=max(1.0d0*(lstep),one) 534 if(dtavei.lt.0) then 535 wcur=one/denom 536 else 537 wcur=dtavei 538 endif 539 whist=1.0-wcur 540 541 if (istep .eq. 0) then 542 lhsG = zero 543 endif 544 545 fres = zero 546 yold(:,1)=y(:,4) 547 yold(:,2:4)=y(:,1:3) 548c 549c hack in an interesting velocity field (uncomment to test dmod) 550c 551c do i = 1, nshg ! No periodicity for testing 552c iper(i) = i 553c enddo 554c yold(:,5) = 1.0 555c yold(:,2) = 3.0d0 556c yold(:,2) = 2.0*x(:,1) - 3*x(:,2) 557c yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2) 558c yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3) 559c yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable 560c suitable for the 561 562 563 intrul=intg(1,itseq) 564 intind=intpt(intrul) 565 566 do iblk = 1,nelblk 567 lcsyst = lcblk(3,iblk) 568 iel = lcblk(1,iblk) !Element number where this block begins 569 npro = lcblk(1,iblk+1) - iel 570 lelCat = lcblk(2,iblk) 571 nenl = lcblk(5,iblk) 572 nshl = lcblk(10,iblk) 573 inum = iel + npro - 1 574 575 ngauss = nint(lcsyst) 576 ngaussf = nintf(lcsyst) 577 578 call hfilterBB (yold, x, mien(iblk)%p, fres, 579 & shglf(lcsyst,:,1:nshl,:), 580 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 581 582 583 if ( istep.eq.0 ) then 584 585 allocate ( em(npro,nshl,nshl) ) 586 587 call getgram2 (x, mien(iblk)%p, 588 & shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:), 589 & shglf(lcsyst,:,1:nshl,:), shpf(lcsyst,1:nshl,:), em, 590 & Qwtf(lcsyst,1:ngaussf)) 591 592 call fillsparseSclr (mien(iblk)%p, 593 & em, lhsG, 594 & rowp, colm) 595 596 597 deallocate ( em ) 598 599 endif 600 601 enddo ! End loop over element blocks 602c 603 604c write(*,*)'Im here' 605 606 if(numpe>1) call commu (fres, ilwork, 24, 'in ') 607c 608c account for periodicity in filtered variables 609c 610 do j = 1,nshg 611 i = iper(j) 612 if (i .ne. j) then 613 fres(i,:) = fres(i,:) + fres(j,:) 614 endif 615 enddo 616 617 do j = 1,nshg 618 i = iper(j) 619 if (i .ne. j) then 620 fres(j,:) = zero 621 endif 622 enddo 623 624c Need to zero off-processor slaves as well. 625 626 if (numpe.gt.1) then 627 628 numtask = ilwork(1) 629 itkbeg = 1 630 631c zero the nodes that are "solved" on the other processors 632 633 do itask = 1, numtask 634 635 iacc = ilwork (itkbeg + 2) 636 numseg = ilwork (itkbeg + 4) 637 638 if (iacc .eq. 0) then 639 do is = 1,numseg 640 isgbeg = ilwork (itkbeg + 3 + 2*is) 641 lenseg = ilwork (itkbeg + 4 + 2*is) 642 isgend = isgbeg + lenseg - 1 643 fres(isgbeg:isgend,:) = zero 644 enddo 645 endif 646 647 itkbeg = itkbeg + 4 + 2*numseg 648 649 enddo 650 651 endif 652 653c... At this point fres has the right hand side vector (b) and lhsG has 654c... the Gram matrix (M_{AB}) (in sparse storage). Now we need to solve 655c... Ax = b using the conjugate gradient method to finish off the 656c... L2-projection. 657 658 659 do i = 1, 21 660 call sparseCG (fres(:,i), pfres(:,i), lhsG, 661 & rowp, colm, iper, ilwork, 662 & iBC, BC) 663 enddo 664 665 666 write(*,*)'Done with least-squares projection' 667 668 do i = 1, 21 669 fres(:,i) = pfres(:,i) 670 enddo 671 672 fres(:,22) = one 673 674 do iblk = 1,nelblk 675 lcsyst = lcblk(3,iblk) 676 iel = lcblk(1,iblk) !Element number where this block begins 677 npro = lcblk(1,iblk+1) - iel 678 lelCat = lcblk(2,iblk) 679 nenl = lcblk(5,iblk) 680 nshl = lcblk(10,iblk) 681 inum = iel + npro - 1 682 683 ngauss = nint(lcsyst) 684 685 call getstrl (yold, x, mien(iblk)%p, 686 & strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:), 687 & shp(lcsyst,1:nshl,:)) 688 689 enddo 690 691 692 strnrm = sqrt( 693 & two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2) 694 & + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) ) 695 696 fwr = fwr1 * fres(:,22) * strnrm 697 698 xmij(:,1) = -fwr 699 & * fres(:,10) + fres(:,16) 700 xmij(:,2) = -fwr 701 & * fres(:,11) + fres(:,17) 702 xmij(:,3) = -fwr 703 & * fres(:,12) + fres(:,18) 704 705 xmij(:,4) = -fwr * fres(:,13) + fres(:,19) 706 xmij(:,5) = -fwr * fres(:,14) + fres(:,20) 707 xmij(:,6) = -fwr * fres(:,15) + fres(:,21) 708 709 fres(:,22) = one / fres(:,22) 710 711 xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1) * fres(:,22) 712 xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2) * fres(:,22) 713 xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3) * fres(:,22) 714 xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2) * fres(:,22) 715 xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3) * fres(:,22) 716 xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3) * fres(:,22) 717 718 xnum = xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2) 719 & + xlij(:,3) * xmij(:,3) 720 & + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5) 721 & + xlij(:,6) * xmij(:,6)) 722 xden = xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2) 723 & + xmij(:,3) * xmij(:,3) 724 & + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5) 725 & + xmij(:,6) * xmij(:,6)) 726 xden = two * xden 727 728c 729c don't account for periodic nodes twice 730c 731 do j = 1,numnp 732 i = iper(j) 733 if (i .ne. j) then 734 xden(j) = zero 735 xnum(j) = zero 736 endif 737 enddo 738 739 if(numpe.gt.1) then 740c 741c.... nodes treated on another processor are eliminated 742c 743 numtask = ilwork(1) 744 itkbeg = 1 745 746 do itask = 1, numtask 747 748 iacc = ilwork (itkbeg + 2) 749 numseg = ilwork (itkbeg + 4) 750 751 if (iacc .eq. 0) then 752 do is = 1,numseg 753 isgbeg = ilwork (itkbeg + 3 + 2*is) 754 lenseg = ilwork (itkbeg + 4 + 2*is) 755 isgend = isgbeg + lenseg - 1 756 xnum(isgbeg:isgend) = zero 757 xden(isgbeg:isgend) = zero 758 enddo 759 endif 760 761 itkbeg = itkbeg + 4 + 2*numseg 762 763 enddo 764 765c if (myrank.eq.0)then 766c do i = 1, numnp 767c write(253,*)xnum(i),xden(i),myrank 768c enddo 769c endif 770c if (myrank.eq.1)then 771c do i = 1, numnp 772c write(254,*)xnum(i),xden(i),myrank 773c enddo 774c endif 775 776c xnuml = sum(xnum) 777c xdenl = sum(xden) 778 779 xnuml = zero 780 xdenl = zero 781 do i = 1, numnp 782 xnuml = xnuml + xnum(i) 783 xdenl = xdenl + xden(i) 784 enddo 785 786c write(*,*)xnuml,xdenl,myrank 787 788 call drvAllreducesclr ( xnuml, xnumt ) 789 call drvAllreducesclr ( xdenl, xdent ) 790cd 791 else 792 793c xnumt = sum(xnum) 794c xdent = sum(xden) 795 xnumt = zero 796 xdent = zero 797 do i = 1, numnp 798 xnumt = xnumt + xnum(i) 799 xdent = xdent + xden(i) 800 enddo 801 802 803 endif 804 805 scalar = xnumt / (xdent + 1.d-09) 806 xnut = scalar 807 808 809 if (myrank .eq. 0)then 810 write(*,*) 'xnut=', xnut(100) 811 endif 812c do i = 1, numnp 813c write(*,*)xnumt/xdent,myrank 814c enddo 815c 816 do iblk = 1,nelblk 817 lcsyst = lcblk(3,iblk) 818 iel = lcblk(1,iblk) 819 npro = lcblk(1,iblk+1) - iel 820 lelCat = lcblk(2,iblk) 821 inum = iel + npro - 1 822 823 ngauss = nint(lcsyst) 824 825 call scatnu (mien(iblk)%p, strl(iel:inum,:), 826 & mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:)) 827 enddo 828c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 829c$$$ tmp1 = MINVAL(xmudmi) 830c$$$ tmp2 = MAXVAL(xmudmi) 831c$$$ if(numpe>1) then 832c$$$ call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION, 833c$$$ & MPI_MIN, master, MPI_COMM_WORLD, ierr) 834c$$$ call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION, 835c$$$ & MPI_MAX, master, MPI_COMM_WORLD, ierr) 836c$$$ tmp1=tmp3 837c$$$ tmp2=tmp4 838c$$$ endif 839c$$$ if (myrank .EQ. master) then 840c$$$ write(35,*) lstep,tmp1,tmp2 841c$$$ call flush(35) 842c$$$ endif 843c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 844 845c 846c if flag set, write a restart file with info (reuse xmij's memory) 847c 848 if(irs.eq.11) then 849 lstep=999 850 xmij(:,1)=xnum(:) 851 xmij(:,2)=xden(:) 852 xmij(:,3)=cdelsq(:) 853 xmij(:,5)=xlij(:,4) !leave M_{12} in 4 and put L_{12} here 854 call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac 855 stop 856 endif 857c 858c local clipping moved to scatnu with the creation of mxmudmi pointers 859c 860c$$$ rmu=datmat(1,2,1) 861c$$$ xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu 862c$$$ xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0 863c stop !uncomment to test dmod 864c 865 866 867c write out the nodal values of xnut (estimate since we don't calc strain 868c there and must use the filtered strain). 869c 870 871 872 873 return 874 end 875 876c----------------------------------------------------- 877 878 subroutine getgram (x, ien, shgl, shp, em, Qwtf) 879 880 include "common.h" 881 882 dimension x(numnp,nsd), xl(npro,nenl,nsd) 883 dimension ien(npro,nshl), 884 & shgl(nsd,nshl,ngauss), shp(nshl,ngauss), 885 & em(npro,nshl,nshl), Qwtf(ngaussf) 886 887 call localx(x, xl, ien, nsd, 'gather ') 888 889 call cmass(shp,shgl,xl,em) 890 891 892 return 893 894 end 895 896c---------------------------------------------------------------------- 897 898 899 subroutine getgram2 (x, ien, shgl, shp, shglf, shpf, em, Qwtf) 900 901 include "common.h" 902 903 dimension x(numnp,nsd), xl(npro,nenl,nsd) 904 dimension ien(npro,nshl), 905 & shgl(nsd,nshl,ngauss), shp(nshl,ngauss), 906 & shglf(nsd,nshl,ngauss), shpf(nshl,ngauss), 907 & em(npro,nshl,nshl), Qwtf(ngaussf) 908 909 910 call localx(x, xl, ien, nsd, 'gather ') 911 912 call cmassl(shp,shgl,shpf,shglf,xl,em,Qwtf) 913 914 915 return 916 917 end 918 919c----------------------------------------------------------------------- 920 921 subroutine getgram3 (x, ien, shgl, shp, shglf, shpf, em, Qwtf) 922 923 include "common.h" 924 925 dimension x(numnp,nsd), xl(npro,nenl,nsd) 926 dimension ien(npro,nshl), 927 & shgl(nsd,nshl,ngauss), shp(nshl,ngauss), 928 & shglf(nsd,nshl,ngauss), shpf(nshl,ngauss), 929 & em(npro,nshl,nshl), Qwtf(ngaussf) 930 931 932 call localx(x, xl, ien, nsd, 'gather ') 933 934 call cmasstl(shp,shgl,shpf,shglf,xl,em,Qwtf) 935 936 937 return 938 939 end 940 subroutine cdelBHsq (y, shgl, shp, 941 & iper, ilwork, 942 & nsons, ifath, x, cdelsq1) 943 944 use pointer_data 945 946 use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf), 947c shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf). 948c Shpf and shglf are the shape funciotns and their 949c gradient evaluated using the quadrature rule desired 950c for computing the dmod. Qwtf contains the weights of the 951c quad. points. 952 953 include "common.h" 954 include "mpif.h" 955 include "auxmpi.h" 956 957c 958 dimension fres(nshg,33), fwr(nshg), 959 & strnrm(nshg), cdelsq1(nfath), 960 & xnum(nshg), xden(nshg), 961 & xmij(nshg,6), xlij(nshg,6), 962 & xnude(nfath,2), xnuder(nfath,2), 963 & nsons(nshg), 964 & strl(numel,ngauss), 965 & y(nshg,5), yold(nshg,5), 966 & ifath(nshg), iper(nshg), 967 & ilwork(nlwork), 968 & x(numnp,3), 969 & shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT), 970 & xnutf(nfath), 971 & hfres(nshg,16) 972 973c 974 975 fres = zero 976 hfres = zero 977 978 yold(:,1)=y(:,4) 979 yold(:,2:4)=y(:,1:3) 980 981c 982c hack in an interesting velocity field (uncomment to test dmod) 983c 984c yold(:,5) = 1.0 ! Debugging 985c yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2) 986c yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2) 987c yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3) 988c yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable 989c suitable for the 990 991 do iblk = 1,nelblk 992 lcsyst = lcblk(3,iblk) 993 iel = lcblk(1,iblk) !Element number where this block begins 994 npro = lcblk(1,iblk+1) - iel 995 lelCat = lcblk(2,iblk) 996 nenl = lcblk(5,iblk) 997 nshl = lcblk(10,iblk) 998 inum = iel + npro - 1 999 1000 ngauss = nint(lcsyst) 1001 ngaussf = nintf(lcsyst) 1002 1003c call hfilterB (yold, x, mien(iblk)%p, hfres, 1004c & shglf(lcsyst,:,1:nshl,:), 1005c & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 1006 1007 call hfilterC (yold, x, mien(iblk)%p, hfres, 1008 & shglf(lcsyst,:,1:nshl,:), 1009 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 1010 1011 enddo 1012 1013 if(numpe>1) call commu (hfres, ilwork, 16, 'in ') 1014c 1015c... account for periodicity in filtered variables 1016c 1017 do j = 1,nshg ! Add on-processor slave contribution to masters 1018 i = iper(j) 1019 if (i .ne. j) then 1020 hfres(i,:) = hfres(i,:) + hfres(j,:) 1021 endif 1022 enddo 1023 do j = 1,nshg ! Set on-processor slaves to be the same as masters 1024 i = iper(j) 1025 if (i .ne. j) then 1026 hfres(j,:) = hfres(i,:) 1027 endif 1028 enddo 1029 1030c... Set off-processor slaves to be the same as their masters 1031 1032 if(numpe>1) call commu (hfres, ilwork, 16, 'out') 1033 1034 1035 hfres(:,16) = one / hfres(:,16) ! one/(volume of hat filter kernel) 1036 1037 do j = 1, 15 1038 hfres(:,j) = hfres(:,j) * hfres(:,16) 1039 enddo 1040 1041c... For debugging 1042 1043c hfres(:,1) = 2.0*x(:,1) - 3.0*x(:,2) 1044c hfres(:,2) = 3.0*x(:,1) + 4.0*x(:,2) 1045c hfres(:,3) = 4.0*x(:,1) + x(:,2) + x(:,3) 1046 1047c... Done w/ h-filtering. Begin 2h-filtering. 1048 1049 do iblk = 1,nelblk 1050 lcsyst = lcblk(3,iblk) 1051 iel = lcblk(1,iblk) !Element number where this block begins 1052 npro = lcblk(1,iblk+1) - iel 1053 lelCat = lcblk(2,iblk) 1054 nenl = lcblk(5,iblk) 1055 nshl = lcblk(10,iblk) 1056 inum = iel + npro - 1 1057 1058 ngauss = nint(lcsyst) 1059 ngaussf = nintf(lcsyst) 1060 1061 call twohfilterB (yold, x, strl(iel:inum,:), mien(iblk)%p, 1062 & fres, hfres, shgl(lcsyst,:,1:nshl,:), 1063 & shp(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 1064 1065 enddo 1066c 1067 1068 1069 if(numpe>1) call commu (fres, ilwork, 33, 'in ') 1070c 1071c account for periodicity in filtered variables 1072c 1073 do j = 1,nshg 1074 i = iper(j) 1075 if (i .ne. j) then 1076 fres(i,:) = fres(i,:) + fres(j,:) 1077 endif 1078 enddo 1079 1080 do j = 1,nshg 1081 i = iper(j) 1082 if (i .ne. j) then 1083 fres(j,:) = fres(i,:) 1084 endif 1085 enddo 1086 1087 if(numpe>1)then 1088 call commu (fres, ilwork, 33, 'out') 1089 endif 1090 1091 fres(:,22) = one / fres(:,22) 1092 do j = 1,21 1093 fres(:,j) = fres(:,j) * fres(:,22) 1094 enddo 1095 do j = 23,33 1096 fres(:,j) = fres(:,j) * fres(:,22) 1097 enddo 1098 1099 1100 do iblk = 1,nelblk 1101 lcsyst = lcblk(3,iblk) 1102 iel = lcblk(1,iblk) !Element number where this block begins 1103 npro = lcblk(1,iblk+1) - iel 1104 lelCat = lcblk(2,iblk) 1105 nenl = lcblk(5,iblk) 1106 nshl = lcblk(10,iblk) 1107 inum = iel + npro - 1 1108 1109 ngauss = nint(lcsyst) 1110 1111 call getstrl (yold, x, mien(iblk)%p, 1112 & strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:), 1113 & shp(lcsyst,1:nshl,:)) 1114 1115 enddo 1116 1117c 1118c... Obtain the hat-tilde strain rate norm at the nodes 1119c 1120 1121 strnrm = sqrt( 1122 & two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2) 1123 & + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) ) 1124 1125 fwr = fwr1 * strnrm 1126 1127 xmij(:,1) = -fwr 1128 & * fres(:,10) + fres(:,16) 1129 xmij(:,2) = -fwr 1130 & * fres(:,11) + fres(:,17) 1131 xmij(:,3) = -fwr 1132 & * fres(:,12) + fres(:,18) 1133 1134 xmij(:,4) = -fwr * fres(:,13) + fres(:,19) 1135 xmij(:,5) = -fwr * fres(:,14) + fres(:,20) 1136 xmij(:,6) = -fwr * fres(:,15) + fres(:,21) 1137 1138 1139 xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1) 1140 xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2) 1141 xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3) 1142 xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2) 1143 xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3) 1144 xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3) 1145 1146 xnum = xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2) 1147 & + xlij(:,3) * xmij(:,3) 1148 & + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5) 1149 & + xlij(:,6) * xmij(:,6)) 1150 xden = xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2) 1151 & + xmij(:,3) * xmij(:,3) 1152 & + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5) 1153 & + xmij(:,6) * xmij(:,6)) 1154 xden = two * xden 1155 1156c zero on processor periodic nodes so that they will not be added twice 1157 do j = 1,numnp 1158 i = iper(j) 1159 if (i .ne. j) then 1160 xnum(j) = zero 1161 xden(j) = zero 1162 endif 1163 enddo 1164 1165 if (numpe.gt.1) then 1166 1167 numtask = ilwork(1) 1168 itkbeg = 1 1169 1170c zero the nodes that are "solved" on the other processors 1171 do itask = 1, numtask 1172 1173 iacc = ilwork (itkbeg + 2) 1174 numseg = ilwork (itkbeg + 4) 1175 1176 if (iacc .eq. 0) then 1177 do is = 1,numseg 1178 isgbeg = ilwork (itkbeg + 3 + 2*is) 1179 lenseg = ilwork (itkbeg + 4 + 2*is) 1180 isgend = isgbeg + lenseg - 1 1181 xnum(isgbeg:isgend) = zero 1182 xden(isgbeg:isgend) = zero 1183 enddo 1184 endif 1185 1186 itkbeg = itkbeg + 4 + 2*numseg 1187 1188 enddo 1189 1190 endif 1191c 1192c Description of arrays. Each processor has an array of length equal 1193c to the total number of fathers times 2 xnude(nfathers,2). One to collect 1194c the numerator and one to collect the denominator. There is also an array 1195c of length nshg on each processor which tells the father number of each 1196c on processor node, ifath(nnshg). Finally, there is an arry of length 1197c nfathers to tell the total (on all processors combined) number of sons 1198c for each father. 1199c 1200c Now loop over nodes and accumlate the numerator and the denominator 1201c to the father nodes. Only on processor addition at this point. 1202c Note that serrogate fathers are collect some for the case where some 1203c sons are on another processor 1204c 1205 xnude = zero 1206 do i = 1,nshg 1207 xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i) 1208 xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i) 1209 enddo 1210 1211c 1212c Now the true fathers and serrogates combine results and update 1213c each other. 1214c 1215 if(numpe .gt. 1)then 1216 call drvAllreduce(xnude, xnuder,2*nfath) 1217c 1218c xnude is the sum of the sons for each father on this processor 1219c 1220c xnuder is the sum of the sons for each father on all processor combined 1221c (the same as if we had not partitioned the mesh for each processor) 1222c 1223c For each father we have precomputed the number of sons (including 1224c the sons off processor). 1225c 1226c Now divide by number of sons to get the average (not really necessary 1227c for dynamic model since ratio will cancel nsons at each father) 1228c 1229c xnuder(:,1) = xnuder(:,1) ! / nsons(:) 1230c xnuder(:,2) = xnuder(:,2) ! / nsons(:) 1231c 1232c the next line is c \Delta^2 1233c 1234 xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09) 1235 do i = 1,nfath 1236 cdelsq1(i) = xnuder(i,1) 1237 enddo 1238 else 1239c 1240c the next line is c \Delta^2, not nu_T but we want to save the 1241c memory 1242c 1243 xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09) 1244 do i = 1,nfath 1245 cdelsq1(i) = xnude(i,1) 1246 enddo 1247 endif 1248 1249 if (myrank .eq. master) then 1250 if (numpe .gt. 1) then 1251 do i = 1, nfath 1252 write(22,*)i, xnuder(i,1) 1253 enddo 1254 else 1255 do i = 1, nfath 1256 write(22,*)i, xnude(i,1) 1257 enddo 1258 endif 1259 endif 1260 call flush(22) 1261 1262 do i = 1, nfath 1263 if (cdelsq1(i) .lt. zero) then 1264 cdelsq1(i) = zero 1265 endif 1266 enddo 1267 1268 return 1269 end 1270 subroutine SUPGdis (y, ac, shgl, 1271 & shp, iper, ilwork, 1272 & nsons, ifath, x, 1273 & iBC, BC, stabdis, xavegt) 1274 1275 1276 use stats ! 1277 use pointer_data ! brings in the pointers for the blocked arrays 1278 use local_mass 1279 use rlssave ! Use the resolved Leonard stresses at the nodes. 1280 use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf), 1281c shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf). 1282c Shpf and shglf are the shape funciotns and their 1283c gradient evaluated using the quadrature rule desired 1284c for computing the dmod. Qwt contains the weights of the 1285c quad. points. 1286 1287 1288 1289 include "common.h" 1290 include "mpif.h" 1291 include "auxmpi.h" 1292 1293 1294 dimension y(nshg,ndof), ac(nshg,ndof), 1295 & yold(nshg,ndof), 1296 & ifath(nshg), nsons(nshg), 1297 & iper(nshg), ilwork(nlwork), 1298 & shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT), 1299 & x(numnp,3), 1300 & qres(nshg,nsd*nsd), rmass(nshg), 1301 & iBC(nshg), BC(nshg,ndofBC), 1302 & cdelsq(nshg), vol(nshg), 1303 & stress(nshg,9), diss(nshg,3), 1304 & xave(nshg,12), xaveg(nfath,12), 1305 & xavegr(nfath,12), stabdis(nfath), 1306 & dmodc(nfath), strl(numel,ngauss), 1307 & xavegt(nfath,12) 1308 1309 character*5 cname 1310 character*30 fname 1311 1312 yold(:,1)=y(:,4) 1313 yold(:,2:4)=y(:,1:3) 1314 1315c 1316c hack in an interesting velocity field (uncomment to test dmod) 1317c 1318c yold(:,5) = 1.0 ! Debugging 1319c yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2) 1320c yold(:,2) = 2.0 1321c yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2) 1322c yold(:,3) = 3.0 1323c yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3) 1324c yold(:,4) = 4.0 1325c yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable 1326c suitable for the 1327 1328c.... First let us obtain cdelsq at each node in the domain. 1329c.... We use numNden which lives in the quadfilt module. 1330 1331 if ( (istep .eq. 0) ) then 1332 fname = 'dmodc.dat' // cname (myrank+1) 1333 open (99,file=fname,form='unformatted',status='unknown') 1334 read(99) dmodc 1335 close(99) 1336 cdelsq(:) = dmodc(ifath(:)) 1337 else 1338 cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09) 1339 endif 1340 1341c if (myrank .eq. master) then 1342c do i = 1, nfath 1343c write(*,*)'dmod=', dmodc(i) 1344c enddo 1345c endif 1346 1347 if ( istep .eq. (nstep(1)-1) ) then 1348 dmodc(ifath(:)) = cdelsq(:) 1349 fname = 'dmodc.dat' // cname (myrank+1) 1350 open (99,file=fname,form='unformatted', status='replace') 1351 write(99) dmodc 1352 close(99) 1353c if (myrank .eq. master) then 1354c do i = 1, nfath 1355c write(*,*)'dmod=', dmodc(i) 1356c enddo 1357c endif 1358 1359 endif 1360 1361c if (istep .eq. 0) 1362 do iblk = 1,nelblk 1363 lcsyst = lcblk(3,iblk) 1364 iel = lcblk(1,iblk) !Element number where this block begins 1365 npro = lcblk(1,iblk+1) - iel 1366 lelCat = lcblk(2,iblk) 1367 nenl = lcblk(5,iblk) 1368 nshl = lcblk(10,iblk) 1369 inum = iel + npro - 1 1370 1371 ngauss = nint(lcsyst) 1372 1373 call getstrl (yold, x, mien(iblk)%p, 1374 & strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:), 1375 & shp(lcsyst,1:nshl,:)) 1376 1377 enddo 1378 1379 do iblk = 1,nelblk 1380 lcsyst = lcblk(3,iblk) 1381 iel = lcblk(1,iblk) 1382 npro = lcblk(1,iblk+1) - iel 1383 lelCat = lcblk(2,iblk) 1384 inum = iel + npro - 1 1385 1386 ngauss = nint(lcsyst) 1387 1388 call scatnu (mien(iblk)%p, strl(iel:inum,:), 1389 & mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:)) 1390 enddo 1391c endif 1392 1393 1394 1395 if (idiff==1 .or. idiff==3) then ! global reconstruction of qdiff 1396c 1397c loop over element blocks for the global reconstruction 1398c of the diffusive flux vector, q, and lumped mass matrix, rmass 1399c 1400 qres = zero 1401 rmass = zero 1402 1403 do iblk = 1, nelblk 1404 iel = lcblk(1,iblk) 1405 lelCat = lcblk(2,iblk) 1406 lcsyst = lcblk(3,iblk) 1407 iorder = lcblk(4,iblk) 1408 nenl = lcblk(5,iblk) ! no. of vertices per element 1409 nshl = lcblk(10,iblk) 1410 mattyp = lcblk(7,iblk) 1411 ndofl = lcblk(8,iblk) 1412 nsymdl = lcblk(9,iblk) 1413 npro = lcblk(1,iblk+1) - iel 1414 ngauss = nint(lcsyst) 1415c 1416c.... compute and assemble diffusive flux vector residual, qres, 1417c and lumped mass matrix, rmass 1418 1419 call AsIq (y, x, 1420 & shp(lcsyst,1:nshl,:), 1421 & shgl(lcsyst,:,1:nshl,:), 1422 & mien(iblk)%p, mxmudmi(iblk)%p, 1423 & qres, rmass ) 1424 enddo 1425 1426c 1427c.... form the diffusive flux approximation 1428c 1429 call qpbc( rmass, qres, iBC, BC, iper, ilwork ) 1430c 1431 endif 1432 1433 1434c.... form the SUPG stresses well as dissipation due to eddy viscosity, 1435c... and SUPG stabilization. 1436 1437 1438 stress = zero 1439 vol = zero 1440 diss = zero 1441 1442 do iblk = 1,nelblk 1443 lcsyst = lcblk(3,iblk) 1444 iel = lcblk(1,iblk) !Element number where this block begins 1445 npro = lcblk(1,iblk+1) - iel 1446 lelCat = lcblk(2,iblk) 1447 nenl = lcblk(5,iblk) 1448 nshl = lcblk(10,iblk) 1449 inum = iel + npro - 1 1450 1451 ngauss = nint(lcsyst) 1452 ngaussf = nintf(lcsyst) 1453 1454 call SUPGstress (y, ac, x, qres, mien(iblk)%p, mxmudmi(iblk)%p, 1455 & cdelsq, shglf(lcsyst,:,1:nshl,:), 1456 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf), 1457 & shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:), 1458 & stress, diss, vol) 1459 1460 enddo 1461 1462 if(numpe>1) call commu (stress, ilwork, 9, 'in ') 1463 if(numpe>1) call commu (diss, ilwork, 3, 'in ') 1464 if(numpe>1) call commu (vol, ilwork, 1, 'in ') 1465 1466c 1467c account for periodicity 1468c 1469 do j = 1,nshg 1470 i = iper(j) 1471 if (i .ne. j) then 1472 stress(i,:) = stress(i,:) + stress(j,:) 1473 diss(i,:) = diss(i,:) + diss(j,:) 1474 vol(i) = vol(i) + vol(j) 1475 endif 1476 enddo 1477 1478 do j = 1,nshg 1479 i = iper(j) 1480 if (i .ne. j) then 1481 stress(j,:) = stress(i,:) 1482 diss(j,:) = diss(i,:) 1483 vol(j) = vol(i) 1484 endif 1485 enddo 1486 1487 if(numpe>1) call commu (stress, ilwork, 9, 'out ') 1488 if(numpe>1) call commu (diss, ilwork, 3, 'out ') 1489 if(numpe>1) call commu (vol, ilwork, 1, 'out ') 1490 1491 vol = one / vol 1492 do i = 1, 9 1493 stress(:,i) = stress(:,i)*vol(:) 1494 enddo 1495 do i = 1, 3 1496 diss(:,i) = diss(:,i)*vol(:) 1497 enddo 1498 1499c---------- > Begin averaging dissipations and SUPG stress <-------------- 1500 1501 do i = 1, 9 1502 xave(:,i) = stress(:,i) 1503 enddo 1504 xave(:,10) = diss(:,1) 1505 xave(:,11) = diss(:,2) 1506 xave(:,12) = diss(:,3) 1507 1508c zero on processor periodic nodes so that they will not be added twice 1509 do j = 1,numnp 1510 i = iper(j) 1511 if (i .ne. j) then 1512 xave(j,:) = zero 1513 endif 1514 enddo 1515 1516 if (numpe.gt.1) then 1517 1518 numtask = ilwork(1) 1519 itkbeg = 1 1520 1521c zero the nodes that are "solved" on the other processors 1522 do itask = 1, numtask 1523 1524 iacc = ilwork (itkbeg + 2) 1525 numseg = ilwork (itkbeg + 4) 1526 1527 if (iacc .eq. 0) then 1528 do is = 1,numseg 1529 isgbeg = ilwork (itkbeg + 3 + 2*is) 1530 lenseg = ilwork (itkbeg + 4 + 2*is) 1531 isgend = isgbeg + lenseg - 1 1532 xave(isgbeg:isgend,:) = zero 1533 enddo 1534 endif 1535 1536 itkbeg = itkbeg + 4 + 2*numseg 1537 1538 enddo 1539 1540 endif 1541c 1542 1543 xaveg = zero 1544 do i = 1,nshg 1545 xaveg(ifath(i),:) = xaveg(ifath(i),:) + xave(i,:) 1546 enddo 1547 1548 if(numpe .gt. 1)then 1549 call drvAllreduce(xaveg, xavegr,12*nfath) 1550 1551 do m = 1, 12 1552 xavegr(:,m) = xavegr(:,m)/nsons(:) 1553 enddo 1554 1555c if (myrank .eq. master) then 1556c write(*,*)'diss=', xavegt(14,11), xavegr(14,11) 1557c endif 1558 1559 do m = 1, 12 1560 xavegt(:,m) = xavegt(:,m) + xavegr(:,m) 1561 enddo 1562 1563 stabdis(:) = xavegr(:,10) 1564 1565 else 1566 1567 do m = 1, 12 1568 xaveg(:,m) = xaveg(:,m)/nsons(:) 1569 enddo 1570 1571 do m = 1, 12 1572 xavegt(:,m) = xavegt(:,m) + xaveg(:,m) 1573 enddo 1574 1575 stabdis(:) = xaveg(:,10) 1576 1577 endif 1578 1579c if (myrank .eq. master) then 1580c write(*,*)'diss=', xavegt(14,11), xavegr(14,11) 1581c endif 1582 1583 if ( istep .eq. (nstep(1)-1) ) then 1584 if ( myrank .eq. master) then 1585 1586 do i = 1, nfath 1587c write(376,*)xavegt(i,1),xavegt(i,2),xavegt(i,3) 1588c write(377,*)xavegt(i,4),xavegt(i,5),xavegt(i,6) 1589c write(378,*)xavegt(i,7),xavegt(i,8),xavegt(i,9) 1590 write(380,*)xavegt(i,10),xavegt(i,11),xavegt(i,12) 1591 enddo 1592 1593c call flush(376) 1594c call flush(377) 1595c call flush(378) 1596 call flush(380) 1597 1598 endif 1599 endif 1600 1601 1602 return 1603 1604 end 1605 subroutine dmcSUPG(y, ac, shgl, 1606 & shp, iper, ilwork, 1607 & nsons, ifath, x, 1608 & iBC, BC, rowp, colm, 1609 & xavegt, stabdis) 1610 1611 use lhsGkeep ! This module stores the mass (Gram) matrix. 1612 use stats ! 1613 use pointer_data ! brings in the pointers for the blocked arrays 1614 use local_mass 1615 use rlssave ! Use the resolved Leonard stresses at the nodes. 1616 use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf), 1617c shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf). 1618c Shpf and shglf are the shape funciotns and their 1619c gradient evaluated using the quadrature rule desired 1620c for computing the dmod. Qwt contains the weights of the 1621c quad. points. 1622 1623 1624 1625 include "common.h" 1626 include "mpif.h" 1627 include "auxmpi.h" 1628 1629 1630 dimension y(nshg,ndof), ac(nshg,ndof), 1631 & ifath(nshg), nsons(nshg), 1632 & iper(nshg), ilwork(nlwork), 1633 & shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT), 1634 & x(numnp,3), 1635 & qres(nshg,nsd*nsd), rmass(nshg), 1636 & iBC(nshg), BC(nshg,ndofBC), 1637 & cdelsq(nshg), vol(nshg), 1638 & stress(nshg,9), diss(nshg,3), 1639 & xave(nshg,12), xaveg(nfath,12), 1640 & xavegr(nfath,12), stabdis(nfath), 1641 & yold(nshg,ndof), xavegt(nfath,12), 1642 & fres(nshg,24), pfres(nshg,24), 1643 & cdel(nfath), xnume(nfath), 1644 & xdeno(nfath), strl(numel,ngauss), 1645 & rden(nshg), rnum(nshg) 1646 1647 1648 integer rowp(nshg*nnz), colm(nshg+1) 1649 1650 real*8, allocatable, dimension(:,:,:) :: em 1651 1652 real*8, allocatable, dimension(:,:) :: fakexmu 1653 1654 1655 yold(:,1)=y(:,4) 1656 yold(:,2:4)=y(:,1:3) 1657 fres = zero 1658 1659c 1660c hack in an interesting velocity field (uncomment to test dmod) 1661c 1662c yold(:,5) = 1.0 ! Debugging 1663c yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2) 1664c yold(:,2) = 2.0 1665c yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2) 1666c yold(:,3) = 3.0 1667c yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3) 1668c yold(:,4) = 4.0 1669c yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable 1670c suitable for the 1671 1672 1673 intrul=intg(1,itseq) 1674 intind=intpt(intrul) 1675 1676 do iblk = 1,nelblk 1677 lcsyst = lcblk(3,iblk) 1678 iel = lcblk(1,iblk) !Element number where this block begins 1679 npro = lcblk(1,iblk+1) - iel 1680 lelCat = lcblk(2,iblk) 1681 nenl = lcblk(5,iblk) 1682 nshl = lcblk(10,iblk) 1683 inum = iel + npro - 1 1684 1685 ngauss = nint(lcsyst) 1686 ngaussf = nintf(lcsyst) 1687 1688 call resSij (yold, x, mien(iblk)%p, fres, 1689 & shglf(lcsyst,:,1:nshl,:), 1690 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 1691 1692 if ( istep.eq.0 ) then 1693 1694 allocate ( em(npro,nshl,nshl) ) 1695 1696 call getgram2 (x, mien(iblk)%p, 1697 & shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:), 1698 & shglf(lcsyst,:,1:nshl,:), shpf(lcsyst,1:nshl,:), em, 1699 & Qwtf(lcsyst,1:ngaussf)) 1700 1701c call getgram (x, mien(iblk)%p, 1702c & shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:), 1703c & em, Qwtf(lcsyst,1:ngaussf)) 1704 1705 call fillsparseSclr (mien(iblk)%p, 1706 & em, lhsG, 1707 & rowp, colm) 1708 1709 1710 deallocate ( em ) 1711 1712 endif 1713 1714 enddo ! End loop over element blocks 1715c 1716 1717 if(numpe>1) call commu (fres, ilwork, 24, 'in ') 1718c 1719c account for periodicity in filtered variables 1720c 1721 do j = 1,nshg 1722 i = iper(j) 1723 if (i .ne. j) then 1724 fres(i,:) = fres(i,:) + fres(j,:) 1725 endif 1726 enddo 1727 do j = 1,nshg 1728 i = iper(j) 1729 if (i .ne. j) then 1730 fres(j,:) = fres(i,:) 1731 endif 1732 enddo 1733 1734 if(numpe>1) call commu (fres, ilwork, 24, 'out') 1735 1736 fres(:,22) = one / fres(:,22) 1737 do j = 1,21 1738 fres(:,j) = fres(:,j) * fres(:,22) 1739 enddo 1740 pfres = fres 1741 1742c---- Needed for consistent projection 1743 1744c if(numpe>1) call commu (fres, ilwork, 24, 'in ') 1745c 1746c account for periodicity in filtered variables 1747c 1748c do j = 1,nshg 1749c i = iper(j) 1750c if (i .ne. j) then 1751c fres(i,:) = fres(i,:) + fres(j,:) 1752c endif 1753c enddo 1754 1755c do j = 1,nshg 1756c i = iper(j) 1757c if (i .ne. j) then 1758c fres(j,:) = zero 1759c endif 1760c enddo 1761 1762c Need to zero off-processor slaves as well. 1763 1764c if (numpe.gt.1 .and. nsons(1).gt.1) then 1765 1766c numtask = ilwork(1) 1767c itkbeg = 1 1768 1769c zero the nodes that are "solved" on the other processors 1770 1771c do itask = 1, numtask 1772 1773c iacc = ilwork (itkbeg + 2) 1774c numseg = ilwork (itkbeg + 4) 1775 1776c if (iacc .eq. 0) then 1777c do is = 1,numseg 1778c isgbeg = ilwork (itkbeg + 3 + 2*is) 1779c lenseg = ilwork (itkbeg + 4 + 2*is) 1780c isgend = isgbeg + lenseg - 1 1781c fres(isgbeg:isgend,:) = zero 1782c enddo 1783c endif 1784 1785c itkbeg = itkbeg + 4 + 2*numseg 1786 1787c enddo 1788 1789c endif 1790 1791c... At this point fres has the right hand side vector (b) and lhsG has 1792c... the Gram matrix (M_{AB}) (in sparse storage). Now we need to solve 1793c... Ax = b using the conjugate gradient method to finish off the 1794c... L2-projection. 1795 1796 1797c do i = 16, 16 1798c call sparseCG (fres(:,i), pfres(:,i), lhsG, 1799c & rowp, colm, iper, ilwork, 1800c & iBC, BC) 1801c write(*,*) 'i=', i 1802c enddo 1803 1804 1805c write(*,*)'Done with least-squares projection' 1806 1807 1808 1809 1810 1811c.... First let us obtain cdelsq at each node in the domain. 1812c.... We use numNden which lives in the quadfilt module. 1813 1814 cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09) 1815c cdelsq(:) = zero ! Debugging 1816 1817 if (istep .eq. 0) then 1818 xavegt = zero ! For averaging dissipations and SUPG stresses 1819 endif 1820 1821 if (idiff==1 .or. idiff==3) then ! global reconstruction of qdiff 1822c 1823c loop over element blocks for the global reconstruction 1824c of the diffusive flux vector, q, and lumped mass matrix, rmass 1825c 1826 qres = zero 1827 rmass = zero 1828 1829 do iblk = 1, nelblk 1830 iel = lcblk(1,iblk) 1831 lelCat = lcblk(2,iblk) 1832 lcsyst = lcblk(3,iblk) 1833 iorder = lcblk(4,iblk) 1834 nenl = lcblk(5,iblk) ! no. of vertices per element 1835 nshl = lcblk(10,iblk) 1836 mattyp = lcblk(7,iblk) 1837 ndofl = lcblk(8,iblk) 1838 nsymdl = lcblk(9,iblk) 1839 npro = lcblk(1,iblk+1) - iel 1840 ngauss = nint(lcsyst) 1841 1842 allocate ( fakexmu(npro,ngauss) ) 1843 fakexmu = zero 1844 1845c 1846c.... compute and assemble diffusive flux vector residual, qres, 1847c and lumped mass matrix, rmass 1848 1849 call AsIq (y, x, 1850 & shp(lcsyst,1:nshl,:), 1851 & shgl(lcsyst,:,1:nshl,:), 1852 & mien(iblk)%p, mxmudmi(iblk)%p, 1853 & qres, rmass ) 1854 1855 deallocate ( fakexmu ) 1856 enddo 1857 1858c 1859c.... form the diffusive flux approximation 1860c 1861 call qpbc( rmass, qres, iBC, BC, iper, ilwork ) 1862c 1863 endif 1864 1865 1866c.... form the SUPG stresses well as dissipation due to eddy viscosity, 1867c... and SUPG stabilization. 1868 1869 1870 stress = zero 1871 vol = zero 1872 diss = zero 1873 1874 do iblk = 1,nelblk 1875 lcsyst = lcblk(3,iblk) 1876 iel = lcblk(1,iblk) !Element number where this block begins 1877 npro = lcblk(1,iblk+1) - iel 1878 lelCat = lcblk(2,iblk) 1879 nenl = lcblk(5,iblk) 1880 nshl = lcblk(10,iblk) 1881 inum = iel + npro - 1 1882 1883 ngauss = nint(lcsyst) 1884 ngaussf = nintf(lcsyst) 1885 1886 allocate ( fakexmu(npro,ngauss) ) 1887 fakexmu = zero 1888 1889 call SUPGstress (y, ac, x, qres, mien(iblk)%p, fakexmu, 1890 & cdelsq, shglf(lcsyst,:,1:nshl,:), 1891 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf), 1892 & shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:), 1893 & stress, diss, vol) 1894 1895 deallocate ( fakexmu ) 1896 enddo 1897 1898 if(numpe>1) call commu (stress, ilwork, 9, 'in ') 1899 if(numpe>1) call commu (diss, ilwork, 3, 'in ') 1900 if(numpe>1) call commu (vol, ilwork, 1, 'in ') 1901 1902c 1903c account for periodicity 1904c 1905 do j = 1,nshg 1906 i = iper(j) 1907 if (i .ne. j) then 1908 stress(i,:) = stress(i,:) + stress(j,:) 1909 diss(i,:) = diss(i,:) + diss(j,:) 1910 vol(i) = vol(i) + vol(j) 1911 endif 1912 enddo 1913 1914 do j = 1,nshg 1915 i = iper(j) 1916 if (i .ne. j) then 1917 stress(j,:) = stress(i,:) 1918 diss(j,:) = diss(i,:) 1919 vol(j) = vol(i) 1920 endif 1921 enddo 1922 1923 if(numpe>1) call commu (stress, ilwork, 9, 'out ') 1924 if(numpe>1) call commu (diss, ilwork, 3, 'out ') 1925 if(numpe>1) call commu (vol, ilwork, 1, 'out ') 1926 1927 vol = one / vol 1928 do i = 1, 9 1929 stress(:,i) = stress(:,i)*vol(:) 1930 enddo 1931 do i = 1, 3 1932 diss(:,i) = diss(:,i)*vol(:) 1933 enddo 1934 1935c---------- > Begin averaging dissipations and SUPG stress <-------------- 1936 1937 do i = 1, 9 1938 xave(:,i) = stress(:,i) 1939 enddo 1940 xave(:,10) = diss(:,1) 1941 xave(:,11) = diss(:,2) 1942 xave(:,12) = pfres(:,16) 1943 1944c zero on processor periodic nodes so that they will not be added twice 1945 do j = 1,numnp 1946 i = iper(j) 1947 if (i .ne. j) then 1948 xave(j,:) = zero 1949 endif 1950 enddo 1951 1952 if (numpe.gt.1) then 1953 1954 numtask = ilwork(1) 1955 itkbeg = 1 1956 1957c zero the nodes that are "solved" on the other processors 1958 do itask = 1, numtask 1959 1960 iacc = ilwork (itkbeg + 2) 1961 numseg = ilwork (itkbeg + 4) 1962 1963 if (iacc .eq. 0) then 1964 do is = 1,numseg 1965 isgbeg = ilwork (itkbeg + 3 + 2*is) 1966 lenseg = ilwork (itkbeg + 4 + 2*is) 1967 isgend = isgbeg + lenseg - 1 1968 xave(isgbeg:isgend,:) = zero 1969 enddo 1970 endif 1971 1972 itkbeg = itkbeg + 4 + 2*numseg 1973 1974 enddo 1975 1976 endif 1977c 1978 1979 xaveg = zero 1980 do i = 1,nshg 1981 xaveg(ifath(i),:) = xaveg(ifath(i),:) + xave(i,:) 1982 enddo 1983 1984 if(numpe .gt. 1)then 1985 call drvAllreduce(xaveg, xavegr,12*nfath) 1986 1987 do m = 1, 12 1988 xavegr(:,m) = xavegr(:,m)/nsons(:) 1989 enddo 1990 1991c if (myrank .eq. master) then 1992c write(*,*)'diss=', xavegt(14,11), xavegr(14,11) 1993c endif 1994 1995 do m = 1, 12 1996 xavegt(:,m) = xavegt(:,m) + xavegr(:,m) 1997 enddo 1998 1999 else 2000 2001 do m = 1, 12 2002 xaveg(:,m) = xaveg(:,m)/nsons(:) 2003 enddo 2004 2005 do m = 1, 12 2006 xavegt(:,m) = xavegt(:,m) + xaveg(:,m) 2007 enddo 2008 2009 endif 2010 2011 if (myrank .eq. master) then 2012 write(*,*)'diss0=', xavegt(14,11), xavegr(14,11) 2013 endif 2014 2015 if ( istep .eq. (nstep(1)-1) ) then 2016 if ( myrank .eq. master) then 2017 2018 do i = 1, nfath 2019c write(376,*)xavegt(i,1),xavegt(i,2),xavegt(i,3) 2020c write(377,*)xavegt(i,4),xavegt(i,5),xavegt(i,6) 2021c write(378,*)xavegt(i,7),xavegt(i,8),xavegt(i,9) 2022 write(381,*)xavegt(i,10),xavegt(i,11),xavegt(i,12) 2023 enddo 2024 2025c call flush(376) 2026c call flush(377) 2027c call flush(378) 2028c call flush(379) 2029 call flush(381) 2030 endif 2031 endif 2032 2033 rnum(ifath(:)) = numNden(:,1) 2034 rden(ifath(:)) = numNden(:,2) 2035 2036 if (numpe .gt. 1) then 2037 do i = 1, nfath 2038 if (stabdis(i) .gt. zero) then 2039 cdel(i) = (two*xavegr(i,11)-stabdis(i))/xavegr(i,12) 2040 xnume(i) = two*xavegr(i,11)-stabdis(i) 2041 xdeno(i) = xavegr(i,12) 2042 else 2043 xnume(i) = rnum(i) 2044 xdeno(i) = rden(i) 2045 endif 2046 enddo 2047 else 2048 do i = 1, nfath 2049 if (stabdis(i) .gt. zero) then 2050 cdel(i) = (two*xaveg(i,11)-stabdis(i))/xaveg(i,12) 2051 xnume(i) = two*xaveg(i,11)-stabdis(i) 2052 xdeno(i) = xaveg(i,12) 2053 else 2054 xnume(i) = rnum(i) 2055 xdeno(i) = rden(i) 2056 endif 2057 enddo 2058 endif 2059 2060 do i = 1, nfath 2061 if (xnume(i) .lt. zero) then 2062 xnume(i) = rnum(i) 2063 xdeno(i) = rden(i) 2064 endif 2065 enddo 2066 2067 do i = 1, nshg 2068 numNden(i,1) = xnume(ifath(i)) 2069 numNden(i,2) = xdeno(ifath(i)) 2070 enddo 2071 2072 cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09) 2073 2074 do iblk = 1,nelblk 2075 lcsyst = lcblk(3,iblk) 2076 iel = lcblk(1,iblk) !Element number where this block begins 2077 npro = lcblk(1,iblk+1) - iel 2078 lelCat = lcblk(2,iblk) 2079 nenl = lcblk(5,iblk) 2080 nshl = lcblk(10,iblk) 2081 inum = iel + npro - 1 2082 2083 ngauss = nint(lcsyst) 2084 2085 call getstrl (yold, x, mien(iblk)%p, 2086 & strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:), 2087 & shp(lcsyst,1:nshl,:)) 2088 2089 enddo 2090 2091 2092 do iblk = 1,nelblk 2093 lcsyst = lcblk(3,iblk) 2094 iel = lcblk(1,iblk) 2095 npro = lcblk(1,iblk+1) - iel 2096 lelCat = lcblk(2,iblk) 2097 inum = iel + npro - 1 2098 2099 ngauss = nint(lcsyst) 2100 2101 call scatnu (mien(iblk)%p, strl(iel:inum,:), 2102 & mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:)) 2103 enddo 2104 2105 return 2106 2107 end 2108 subroutine FiltRat (y, shgl, shp, 2109 & iper, ilwork, 2110 & nsons, ifath, x, cdelsq1, fwr4, 2111 & fwr3) 2112 2113 use pointer_data 2114 2115 use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf), 2116c shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf). 2117c Shpf and shglf are the shape funciotns and their 2118c gradient evaluated using the quadrature rule desired 2119c for computing the dmod. Qwt contains the weights of the 2120c quad. points. 2121 2122 include "common.h" 2123 include "mpif.h" 2124 include "auxmpi.h" 2125 2126c 2127 dimension fres(nshg,24), fwr(nshg), 2128 & strnrm(nshg), cdelsq1(nfath), 2129 & xnum(nshg), xden(nshg), 2130 & xmij(nshg,6), xlij(nshg,6), 2131 & xnude(nfath,5), xnuder(nfath,5), 2132 & nsons(nshg), xfac(nshg,5), 2133 & strl(numel,ngauss), xa(nfath,3), 2134 & y(nshg,5), yold(nshg,5), 2135 & ifath(nshg), iper(nshg), 2136 & ilwork(nlwork),! xmudmi(numel,ngauss), 2137 & x(numnp,3), 2138 & shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT), 2139 & xnutf(nfath), xkap(nfath), 2140 & fwr2(nshg), fwr3(nshg), 2141 & xlamb1(nfath), xlamb2(nfath), 2142 & fwr4(nshg) 2143c 2144 2145 fres = zero 2146 yold(:,1)=y(:,4) 2147 yold(:,2:4)=y(:,1:3) 2148c 2149c 2150c hack in an interesting velocity field (uncomment to test dmod) 2151c 2152c yold(:,5) = 1.0 ! Debugging 2153c yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2) 2154c yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2) 2155c yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3) 2156c yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable 2157c suitable for the 2158 2159 2160 intrul=intg(1,itseq) 2161 intind=intpt(intrul) 2162 2163 do iblk = 1,nelblk 2164 lcsyst = lcblk(3,iblk) 2165 iel = lcblk(1,iblk) !Element number where this block begins 2166 npro = lcblk(1,iblk+1) - iel 2167 lelCat = lcblk(2,iblk) 2168 nenl = lcblk(5,iblk) 2169 nshl = lcblk(10,iblk) 2170 inum = iel + npro - 1 2171 2172 ngauss = nint(lcsyst) 2173 ngaussf = nintf(lcsyst) 2174 2175 call asithf (yold, x, strl(iel:inum,:), mien(iblk)%p, fres, 2176 & shglf(lcsyst,:,1:nshl,:), 2177 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 2178 2179 enddo 2180c 2181 2182 if(numpe>1) call commu (fres, ilwork, 24, 'in ') 2183c 2184c account for periodicity in filtered variables 2185c 2186 do j = 1,nshg 2187 i = iper(j) 2188 if (i .ne. j) then 2189 fres(i,:) = fres(i,:) + fres(j,:) 2190 endif 2191 enddo 2192 do j = 1,nshg 2193 i = iper(j) 2194 if (i .ne. j) then 2195 fres(j,:) = fres(i,:) 2196 endif 2197 enddo 2198 2199 if(numpe>1) call commu (fres, ilwork, 24, 'out') 2200 2201 fres(:,23) = one / fres(:,23) 2202 do j = 1,22 2203 fres(:,j) = fres(:,j) * fres(:,23) 2204 enddo 2205c 2206c.....at this point fres is really all of our filtered quantities 2207c at the nodes 2208c 2209 2210 xlij(:,1) = fres(:,4) - fres(:,1)*fres(:,1) 2211 xlij(:,2) = fres(:,5) - fres(:,2)*fres(:,2) 2212 xlij(:,3) = fres(:,6) - fres(:,3)*fres(:,3) 2213 xlij(:,4) = fres(:,7) - fres(:,1)*fres(:,2) 2214 xlij(:,5) = fres(:,8) - fres(:,1)*fres(:,3) 2215 xlij(:,6) = fres(:,9) - fres(:,2)*fres(:,3) 2216 2217 strnrm = sqrt( 2218 & two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2) 2219 & + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) ) 2220 2221 xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 + 2222 & fres(:,12)**2 2223 & + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) ) 2224 2225 xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11) 2226 & + xlij(:,3)*fres(:,12) + 2227 & two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) + 2228 & xlij(:,6)*fres(:,15)) ) 2229 2230 xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17) 2231 & + fres(:,12)*fres(:,18) + 2232 & two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) + 2233 & fres(:,15)*fres(:,21)) ) 2234 2235 xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17) 2236 & + xlij(:,3)*fres(:,18) + 2237 & two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) + 2238 & xlij(:,6)*fres(:,21)) 2239 2240 xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17) 2241 & + fres(:,18)*fres(:,18) + 2242 & two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) + 2243 & fres(:,21)*fres(:,21)) 2244 2245 2246c xfac(:,1) = one ! Debugging 2247c xfac(:,2) = one 2248c xfac(:,3) = two 2249c xfac(:,4) = one 2250c xfac(:,5) = one 2251 2252c zero on processor periodic nodes so that they will not be added twice 2253 2254 do j = 1, nshg 2255 i = iper(j) 2256 if (i .ne. j) then 2257 xfac(j,1) = zero 2258 xfac(j,2) = zero 2259 xfac(j,3) = zero 2260 xfac(j,4) = zero 2261 xfac(j,5) = zero 2262 endif 2263 enddo 2264 2265 if (numpe.gt.1) then 2266 2267 numtask = ilwork(1) 2268 itkbeg = 1 2269 2270c zero the nodes that are "solved" on the other processors 2271 do itask = 1, numtask 2272 2273 iacc = ilwork (itkbeg + 2) 2274 numseg = ilwork (itkbeg + 4) 2275 2276 if (iacc .eq. 0) then 2277 do is = 1,numseg 2278 isgbeg = ilwork (itkbeg + 3 + 2*is) 2279 lenseg = ilwork (itkbeg + 4 + 2*is) 2280 isgend = isgbeg + lenseg - 1 2281 xfac(isgbeg:isgend,:) = zero 2282 enddo 2283 endif 2284 2285 itkbeg = itkbeg + 4 + 2*numseg 2286 2287 enddo 2288 2289 endif 2290 2291c... Debugging 2292 2293 xatm1 = sum(xfac(:,1)) 2294 xatm2 = sum(xfac(:,2)) 2295 xatm3 = sum(xfac(:,3)) 2296 xatm4 = sum(xfac(:,4)) 2297 xatm5 = sum(xfac(:,5)) 2298 2299 2300c 2301c Description of arrays. Each processor has an array of length equal 2302c to the total number of fathers times 2 xnude(nfathers,2). One to collect 2303c the numerator and one to collect the denominator. There is also an array 2304c of length nshg on each processor which tells the father number of each 2305c on processor node, ifath(nnshg). Finally, there is an arry of length 2306c nfathers to tell the total (on all processors combined) number of sons 2307c for each father. 2308c 2309c Now loop over nodes and accumlate the numerator and the denominator 2310c to the father nodes. Only on processor addition at this point. 2311c Note that serrogate fathers are collect some for the case where some 2312c sons are on another processor 2313c 2314 xnude = zero 2315 do i = 1,nshg 2316 xnude(ifath(i),1) = xnude(ifath(i),1) + xfac(i,1) 2317 xnude(ifath(i),2) = xnude(ifath(i),2) + xfac(i,2) 2318 xnude(ifath(i),3) = xnude(ifath(i),3) + xfac(i,3) 2319 xnude(ifath(i),4) = xnude(ifath(i),4) + xfac(i,4) 2320 xnude(ifath(i),5) = xnude(ifath(i),5) + xfac(i,5) 2321 enddo 2322 2323c 2324c Now the true fathers and serrogates combine results and update 2325c each other. 2326c 2327 if(numpe .gt. 1)then 2328 call drvAllreduce(xnude, xnuder,5*nfath) 2329c 2330c xnude is the sum of the sons for each father on this processor 2331c 2332c xnuder is the sum of the sons for each father on all processor combined 2333c (the same as if we had not partitioned the mesh for each processor) 2334c 2335c For each father we have precomputed the number of sons (including 2336c the sons off processor). 2337c 2338c Now divide by number of sons to get the average (not really necessary 2339c for dynamic model since ratio will cancel nsons at each father) 2340c 2341c xnuder(:,1) = xnuder(:,1) / nsons(:) 2342c xnuder(:,2) = xnuder(:,2) / nsons(:) 2343c xnuder(:,3) = xnuder(:,3) / nsons(:) 2344c xnuder(:,4) = xnuder(:,4) / nsons(:) 2345c xnuder(:,5) = xnuder(:,5) / nsons(:) 2346c 2347c the next line are the a, b, c coefficients in the quadratic eq. 2348c 2349 2350 do i = 1,nfath 2351 xa(i,1) = two*cdelsq1(i)*xnuder(i,1) + 2352 & xnuder(i,2) 2353 xa(i,2) = four*cdelsq1(i)*xnuder(i,3) + 2354 & xnuder(i,4) 2355 xa(i,3) = two*cdelsq1(i)*xnuder(i,5) 2356 2357c xa(i,1) = xnuder(ifath(i),1) + ! Debugging 2358c & xnuder(ifath(i),2) 2359c xa(i,2) = xnuder(ifath(i),3) + 2360c & xnuder(ifath(i),4) 2361c xa(i,3) = xnuder(ifath(i),5) 2362 2363 2364 enddo 2365 else 2366 2367c xnude(:,1) = xnude(:,1) / nsons(:) 2368c xnude(:,2) = xnude(:,2) / nsons(:) 2369c xnude(:,3) = xnude(:,3) / nsons(:) 2370c xnude(:,4) = xnude(:,4) / nsons(:) 2371c xnude(:,5) = xnude(:,5) / nsons(:) 2372 2373 do i = 1,nfath 2374 xa(i,1) = two*cdelsq1(i)*xnude(i,1) + 2375 & xnude(i,2) 2376 xa(i,2) = four*cdelsq1(i)*xnude(i,3) + 2377 & xnude(i,4) 2378 xa(i,3) = two*cdelsq1(i)*xnude(i,5) 2379 2380c xa(i,1) = xnude(ifath(i),1) + ! Debugging 2381c & xnude(ifath(i),2) 2382c xa(i,2) = xnude(ifath(i),3) + 2383c & xnude(ifath(i),4) 2384c xa(i,3) = xnude(ifath(i),5) 2385 2386 enddo 2387 endif 2388 2389c... Solve a*x*x - b*x + c 2390 2391 2392 do i = 1, nfath 2393 2394 xdisc = xa(i,2)**2 - four*xa(i,1)*xa(i,3) 2395 2396 if (xdisc .lt. zero) then 2397 write(*,*) '*********Warning on filter width ratio********' 2398 xlamb1(i) = fwr1 2399 xlamb2(i) = fwr1 2400 if (xdisc .lt. -0.5d0) then 2401 write(*,*) '*********Warning on filter width ratio********' 2402 endif 2403 endif 2404 2405 if (xdisc .eq. zero) then 2406 xlamb1(i) = xa(i,2) / (two*xa(i,1)) 2407 xlamb2(i) = xa(i,2) / (two*xa(i,1)) 2408 endif 2409 2410 if (xdisc .gt. zero) then 2411 xlamb1(i)= ( xa(i,2) + sqrt( xa(i,2)**2 - four*xa(i,1)*xa(i,3) ) ) 2412 & / (two*xa(i,1)) 2413 xlamb2(i)= ( xa(i,2) - sqrt( xa(i,2)**2 - four*xa(i,1)*xa(i,3) ) ) 2414 & / (two*xa(i,1)) 2415 endif 2416 2417 enddo 2418 2419 do i = 1, nshg 2420 fwr2(i) = xlamb1(ifath(i)) 2421 fwr3(i) = xlamb2(ifath(i)) 2422 enddo 2423 2424 if (myrank .eq. master) then 2425 do i = 1, nfath 2426 write(23,*)i,xlamb1(i), xlamb2(i) 2427 enddo 2428 endif 2429 call flush(23) 2430 2431 2432 2433 do i = 1, nfath 2434 xkap(i) = cdelsq1(i) / xlamb2(i) 2435 xa(i,1) = two*xkap(i)*xnuder(i,1) 2436 xa(i,2) = four*xkap(i)*xnuder(i,3) - xnuder(i,2) 2437 xa(i,3) = two*xkap(i)*xnuder(i,5) - xnuder(i,4) 2438 2439 xlamb1(i)= ( xa(i,2) + sqrt( xa(i,2)**2 - four*xa(i,1)*xa(i,3) 2440 & ) )/ (two*xa(i,1)) 2441 xlamb2(i)= ( xa(i,2) - sqrt( xa(i,2)**2 - four*xa(i,1)*xa(i,3) 2442 & ) )/ (two*xa(i,1)) 2443 2444 enddo 2445 2446 2447 if (myrank .eq. master) then 2448 do i = 1, nfath 2449 write(255,*)i, xlamb1(i), xlamb2(i) 2450 enddo 2451 endif 2452 call flush(255) 2453 2454 fwr4(:) = xlamb1(ifath(:)) 2455 2456 return 2457 end 2458 subroutine DFWRsfdmc (y, shgl, shp, 2459 & iper, ilwork, 2460 & nsons, ifath, x, fwr2, fwr3) 2461 2462 use pointer_data 2463 2464 use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf), 2465c shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf). 2466c Shpf and shglf are the shape funciotns and their 2467c gradient evaluated using the quadrature rule desired 2468c for computing the dmod. Qwt contains the weights of the 2469c quad. points. 2470 2471 2472 2473 include "common.h" 2474 include "mpif.h" 2475 include "auxmpi.h" 2476 2477c 2478 dimension fres(nshg,24), fwr(nshg), 2479 & strnrm(nshg), cdelsq(nshg), 2480 & cdelsq2(nshg), 2481 & xnum(nshg), xden(nshg), 2482 & xmij(nshg,6), xlij(nshg,6), 2483 & xnude(nfath,2), xnuder(nfath,2), 2484 & ynude(nfath,6), ynuder(nfath,6), 2485 & ui(nfath,3), snorm(nfath), 2486 & uir(nfath,3), snormr(nfath), 2487 & xm(nfath,6), xl(nfath,6) 2488 dimension xl1(nfath,6), xl2(nfath,6), 2489 & xl1r(nfath,6), xl2r(nfath,6), 2490 & xmr(nfath,6), xlr(nfath,6), 2491 & nsons(nshg), 2492 & strl(numel,ngauss), 2493 & y(nshg,5), yold(nshg,5), 2494 & ifath(nshg), iper(nshg), 2495 & ilwork(nlwork),! xmudmi(numel,ngauss), 2496 & x(numnp,3) 2497 dimension shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT), 2498 & xnutf(nfath), xfac(nshg,5), 2499 & fwr2(nshg), fwr3(nshg) 2500 2501 character*10 cname 2502 character*30 fname1, fname2, fname3, fname4, fname5, fname6, 2503 & fname0 2504c 2505c 2506c setup the weights for time averaging of cdelsq (now in quadfilt module) 2507c 2508 denom=max(1.0d0*(lstep),one) 2509 if(dtavei.lt.0) then 2510 wcur=one/denom 2511 else 2512 wcur=dtavei 2513 endif 2514 whist=1.0-wcur 2515 2516 if (istep .eq. 0) then 2517 xnd = zero 2518 xmodcomp = zero 2519 xmcomp = zero 2520 xlcomp = zero 2521 xl1comp = zero 2522 xl2comp = zero 2523 ucomp = zero 2524 scomp = zero 2525 endif 2526 2527 2528 fres = zero 2529 yold(:,1)=y(:,4) 2530 yold(:,2:4)=y(:,1:3) 2531c 2532 2533c 2534c hack in an interesting velocity field (uncomment to test dmod) 2535c 2536c yold(:,5) = 1.0 ! Debugging 2537c yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2) 2538c yold(:,2) = 2.0 2539c yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2) 2540c yold(:,3) = 3.0 2541c yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3) 2542c yold(:,4) = 4.0 2543c yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable 2544c suitable for the 2545 2546 2547 2548 intrul=intg(1,itseq) 2549 intind=intpt(intrul) 2550 2551 do iblk = 1,nelblk 2552 lcsyst = lcblk(3,iblk) 2553 iel = lcblk(1,iblk) !Element number where this block begins 2554 npro = lcblk(1,iblk+1) - iel 2555 lelCat = lcblk(2,iblk) 2556 nenl = lcblk(5,iblk) 2557 nshl = lcblk(10,iblk) 2558 inum = iel + npro - 1 2559 2560 ngauss = nint(lcsyst) 2561 2562 call asithf (yold, x, strl(iel:inum,:), mien(iblk)%p, fres, 2563 & shglf(lcsyst,:,1:nshl,:), 2564 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 2565 2566 enddo 2567c 2568 2569 if (ngaussf .ne. ngauss) then 2570 do iblk = 1,nelblk 2571 lcsyst = lcblk(3,iblk) 2572 iel = lcblk(1,iblk) !Element number where this block begins 2573 npro = lcblk(1,iblk+1) - iel 2574 lelCat = lcblk(2,iblk) 2575 nenl = lcblk(5,iblk) 2576 nshl = lcblk(10,iblk) 2577 inum = iel + npro - 1 2578 2579 ngauss = nint(lcsyst) 2580 2581 call getstrl (yold, x, mien(iblk)%p, 2582 & strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:), 2583 & shp(lcsyst,1:nshl,:)) 2584 2585 enddo 2586 endif 2587c 2588c 2589C must fix for abc and dynamic model 2590c if(iabc==1) !are there any axisym bc's 2591c & call rotabc(res, iBC, BC,nflow, 'in ') 2592c 2593 if(numpe>1) call commu (fres, ilwork, 24, 'in ') 2594c 2595c account for periodicity in filtered variables 2596c 2597 do j = 1,nshg 2598 i = iper(j) 2599 if (i .ne. j) then 2600 fres(i,:) = fres(i,:) + fres(j,:) 2601 endif 2602 enddo 2603 do j = 1,nshg 2604 i = iper(j) 2605 if (i .ne. j) then 2606 fres(j,:) = fres(i,:) 2607 endif 2608 enddo 2609 2610 if(numpe>1) call commu (fres, ilwork, 24, 'out') 2611 2612 fres(:,23) = one / fres(:,23) 2613 do j = 1,22 2614 fres(:,j) = fres(:,j) * fres(:,23) 2615 enddo 2616c fres(:,24) = fres(:,24) * fres(:,23) 2617c 2618c.....at this point fres is really all of our filtered quantities 2619c at the nodes 2620c 2621 2622 strnrm = sqrt( 2623 & two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2) 2624 & + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) ) 2625 2626c fwr = fwr1 * fres(:,22) * strnrm 2627 fwr = fwr3 * fres(:,22) * strnrm 2628 2629 xmij(:,1) = -fwr 2630 & * fres(:,10) + fres(:,16) 2631 xmij(:,2) = -fwr 2632 & * fres(:,11) + fres(:,17) 2633 xmij(:,3) = -fwr 2634 & * fres(:,12) + fres(:,18) 2635 2636 xmij(:,4) = -fwr * fres(:,13) + fres(:,19) 2637 xmij(:,5) = -fwr * fres(:,14) + fres(:,20) 2638 xmij(:,6) = -fwr * fres(:,15) + fres(:,21) 2639 2640 fres(:,22) = one / fres(:,22) 2641 2642 xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1) * fres(:,22) 2643 xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2) * fres(:,22) 2644 xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3) * fres(:,22) 2645 xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2) * fres(:,22) 2646 xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3) * fres(:,22) 2647 xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3) * fres(:,22) 2648 2649 xnum = xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2) 2650 & + xlij(:,3) * xmij(:,3) 2651 & + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5) 2652 & + xlij(:,6) * xmij(:,6)) 2653 xden = xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2) 2654 & + xmij(:,3) * xmij(:,3) 2655 & + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5) 2656 & + xmij(:,6) * xmij(:,6)) 2657 xden = two * xden 2658 2659c... For collectection of statistics on dyn. model components 2660 2661 xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 + 2662 & fres(:,12)**2 2663 & + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) ) 2664 2665 xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11) 2666 & + xlij(:,3)*fres(:,12) + 2667 & two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) + 2668 & xlij(:,6)*fres(:,15)) ) 2669 2670 xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17) 2671 & + fres(:,12)*fres(:,18) + 2672 & two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) + 2673 & fres(:,15)*fres(:,21)) ) 2674 2675 xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17) 2676 & + xlij(:,3)*fres(:,18) + 2677 & two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) + 2678 & xlij(:,6)*fres(:,21)) 2679 2680 xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17) 2681 & + fres(:,18)*fres(:,18) + 2682 & two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) + 2683 & fres(:,21)*fres(:,21)) 2684 2685c zero on processor periodic nodes so that they will not be added twice 2686 do j = 1,numnp 2687 i = iper(j) 2688 if (i .ne. j) then 2689 xnum(j) = zero 2690 xden(j) = zero 2691 xfac(j,:) = zero 2692 xmij(j,:) = zero 2693 xlij(j,:) = zero 2694 fres(j,:) = zero 2695 strnrm(j) = zero 2696 endif 2697 enddo 2698 2699 if (numpe.gt.1) then 2700 2701 numtask = ilwork(1) 2702 itkbeg = 1 2703 2704c zero the nodes that are "solved" on the other processors 2705 do itask = 1, numtask 2706 2707 iacc = ilwork (itkbeg + 2) 2708 numseg = ilwork (itkbeg + 4) 2709 2710 if (iacc .eq. 0) then 2711 do is = 1,numseg 2712 isgbeg = ilwork (itkbeg + 3 + 2*is) 2713 lenseg = ilwork (itkbeg + 4 + 2*is) 2714 isgend = isgbeg + lenseg - 1 2715 xnum(isgbeg:isgend) = zero 2716 xden(isgbeg:isgend) = zero 2717 strnrm(isgbeg:isgend) = zero 2718 xfac(isgbeg:isgend,:) = zero 2719 xmij(isgbeg:isgend,:) = zero 2720 xlij(isgbeg:isgend,:) = zero 2721 fres(isgbeg:isgend,:) = zero 2722 enddo 2723 endif 2724 2725 itkbeg = itkbeg + 4 + 2*numseg 2726 2727 enddo 2728 2729 endif 2730c 2731c Description of arrays. Each processor has an array of length equal 2732c to the total number of fathers times 2 xnude(nfathers,2). One to collect 2733c the numerator and one to collect the denominator. There is also an array 2734c of length nshg on each processor which tells the father number of each 2735c on processor node, ifath(nnshg). Finally, there is an arry of length 2736c nfathers to tell the total (on all processors combined) number of sons 2737c for each father. 2738c 2739c Now loop over nodes and accumlate the numerator and the denominator 2740c to the father nodes. Only on processor addition at this point. 2741c Note that serrogate fathers are collect some for the case where some 2742c sons are on another processor 2743c 2744 xnude = zero 2745 ynude = zero 2746 xm = zero 2747 xl = zero 2748 xl1 = zero 2749 xl2 = zero 2750 ui = zero 2751 snorm = zero 2752 2753 do i = 1,nshg 2754 xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i) 2755 xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i) 2756 2757 ynude(ifath(i),1) = ynude(ifath(i),1) + xfac(i,1) 2758 ynude(ifath(i),2) = ynude(ifath(i),2) + xfac(i,2) 2759 ynude(ifath(i),3) = ynude(ifath(i),3) + xfac(i,3) 2760 ynude(ifath(i),4) = ynude(ifath(i),4) + xfac(i,4) 2761 ynude(ifath(i),5) = ynude(ifath(i),5) + xfac(i,5) 2762 2763 xm(ifath(i),1) = xm(ifath(i),1) + xmij(i,1) 2764 xm(ifath(i),2) = xm(ifath(i),2) + xmij(i,2) 2765 xm(ifath(i),3) = xm(ifath(i),3) + xmij(i,3) 2766 xm(ifath(i),4) = xm(ifath(i),4) + xmij(i,4) 2767 xm(ifath(i),5) = xm(ifath(i),5) + xmij(i,5) 2768 xm(ifath(i),6) = xm(ifath(i),6) + xmij(i,6) 2769 2770 xl(ifath(i),1) = xl(ifath(i),1) + xlij(i,1) 2771 xl(ifath(i),2) = xl(ifath(i),2) + xlij(i,2) 2772 xl(ifath(i),3) = xl(ifath(i),3) + xlij(i,3) 2773 xl(ifath(i),4) = xl(ifath(i),4) + xlij(i,4) 2774 xl(ifath(i),5) = xl(ifath(i),5) + xlij(i,5) 2775 xl(ifath(i),6) = xl(ifath(i),6) + xlij(i,6) 2776 2777 xl1(ifath(i),1) = xl1(ifath(i),1) + fres(i,4) 2778 xl1(ifath(i),2) = xl1(ifath(i),2) + fres(i,5) 2779 xl1(ifath(i),3) = xl1(ifath(i),3) + fres(i,6) 2780 xl1(ifath(i),4) = xl1(ifath(i),4) + fres(i,7) 2781 xl1(ifath(i),5) = xl1(ifath(i),5) + fres(i,8) 2782 xl1(ifath(i),6) = xl1(ifath(i),6) + fres(i,9) 2783 2784 xl2(ifath(i),1) = xl2(ifath(i),1) + fres(i,1)*fres(i,1) 2785 xl2(ifath(i),2) = xl2(ifath(i),2) + fres(i,2)*fres(i,2) 2786 xl2(ifath(i),3) = xl2(ifath(i),3) + fres(i,3)*fres(i,3) 2787 xl2(ifath(i),4) = xl2(ifath(i),4) + fres(i,1)*fres(i,2) 2788 xl2(ifath(i),5) = xl2(ifath(i),5) + fres(i,1)*fres(i,3) 2789 xl2(ifath(i),6) = xl2(ifath(i),6) + fres(i,2)*fres(i,3) 2790 2791 ui(ifath(i),1) = ui(ifath(i),1) + fres(i,1) 2792 ui(ifath(i),2) = ui(ifath(i),2) + fres(i,2) 2793 ui(ifath(i),3) = ui(ifath(i),3) + fres(i,3) 2794 2795 snorm(ifath(i)) = snorm(ifath(i)) + strnrm(i) 2796 2797 enddo 2798 2799c 2800c Now the true fathers and serrogates combine results and update 2801c each other. 2802c 2803 if(numpe .gt. 1)then 2804 call drvAllreduce(xnude, xnuder,2*nfath) 2805 call drvAllreduce(ynude, ynuder,6*nfath) 2806 call drvAllreduce(xm, xmr,6*nfath) 2807 call drvAllreduce(xl, xlr,6*nfath) 2808 call drvAllreduce(xl1, xl1r,6*nfath) 2809 call drvAllreduce(xl2, xl2r,6*nfath) 2810 call drvAllreduce(ui, uir,3*nfath) 2811 call drvAllreduce(snorm, snormr,nfath) 2812 2813 do i = 1, nfath 2814 ynuder(i,6) = ( ynuder(i,4) - fwr1*ynuder(i,2) ) / 2815 & ( two*ynuder(i,5) - four*fwr1*ynuder(i,3) 2816 & + two*fwr1*fwr1*ynuder(i,1) ) 2817 enddo 2818 2819 cdelsq2(:) = ynuder(ifath(:),6) ! For comparison w/ cdelsq 2820c 2821c xnude is the sum of the sons for each father on this processor 2822c 2823c xnuder is the sum of the sons for each father on all processor combined 2824c (the same as if we had not partitioned the mesh for each processor) 2825c 2826c For each father we have precomputed the number of sons (including 2827c the sons off processor). 2828c 2829c Now divide by number of sons to get the average (not really necessary 2830c for dynamic model since ratio will cancel nsons at each father) 2831c 2832 xnuder(:,1) = xnuder(:,1) / nsons(:) 2833 xnuder(:,2) = xnuder(:,2) / nsons(:) 2834 2835 do m = 1, 5 2836 ynuder(:,m) = ynuder(:,m)/nsons(:) 2837 enddo 2838 do m = 1,6 2839 xmr(:,m) = xmr(:,m)/nsons(:) 2840 xlr(:,m) = xlr(:,m)/nsons(:) 2841 xl1r(:,m) = xl1r(:,m)/nsons(:) 2842 xl2r(:,m) = xl2r(:,m)/nsons(:) 2843 enddo 2844 2845 uir(:,1) = uir(:,1)/nsons(:) 2846 uir(:,2) = uir(:,2)/nsons(:) 2847 uir(:,3) = uir(:,3)/nsons(:) 2848 2849 snormr(:) = snormr(:)/nsons(:) 2850c 2851cc the next line is c \Delta^2 2852cc 2853cc xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09) 2854cc do i = 1,nshg 2855cc cdelsq(i) = xnuder(ifath(i),1) 2856cc enddo 2857 2858 numNden(:,1) = whist*numNden(:,1)+wcur*xnuder(ifath(:),1) 2859 numNden(:,2) = whist*numNden(:,2)+wcur*xnuder(ifath(:),2) 2860 cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09) 2861 2862c cdelsq(:) = xnuder(ifath(:),1)/(xnuder(ifath(:),2)+1.d-09) 2863 2864 xnd(:,1) = xnd(:,1) + xnuder(:,1) 2865 xnd(:,2) = xnd(:,2) + xnuder(:,2) 2866 2867 xmodcomp(:,1) = xmodcomp(:,1)+ynuder(:,1) 2868 xmodcomp(:,2) = xmodcomp(:,2)+ynuder(:,2) 2869 xmodcomp(:,3) = xmodcomp(:,3)+ynuder(:,3) 2870 xmodcomp(:,4) = xmodcomp(:,4)+ynuder(:,4) 2871 xmodcomp(:,5) = xmodcomp(:,5)+ynuder(:,5) 2872 2873 xmcomp(:,:) = xmcomp(:,:)+xmr(:,:) 2874 xlcomp(:,:) = xlcomp(:,:)+xlr(:,:) 2875 2876 xl1comp(:,:) = xl1comp(:,:)+xl1r(:,:) 2877 xl2comp(:,:) = xl2comp(:,:)+xl2r(:,:) 2878 2879 ucomp(:,:) = ucomp(:,:)+uir(:,:) 2880 u1 = uir(32,1) 2881 scomp(:) = scomp(:)+snormr(:) 2882 2883 else 2884 2885 xnude(:,1) = xnude(:,1)/nsons(:) 2886 xnude(:,2) = xnude(:,2)/nsons(:) 2887 2888 do m = 1, 5 2889 ynude(:,m) = ynude(:,m)/nsons(:) 2890 enddo 2891 do m = 1,6 2892 xm(:,m) = xm(:,m)/nsons(:) 2893 xl(:,m) = xl(:,m)/nsons(:) 2894 xl1(:,m) = xl1(:,m)/nsons(:) 2895 xl2(:,m) = xl2(:,m)/nsons(:) 2896 enddo 2897 2898 ui(:,1) = ui(:,1)/nsons(:) 2899 ui(:,2) = ui(:,2)/nsons(:) 2900 ui(:,3) = ui(:,3)/nsons(:) 2901 2902 snorm(:) = snorm(:)/nsons(:) 2903 2904c 2905c the next line is c \Delta^2, not nu_T but we want to save the 2906c memory 2907c 2908 2909cc xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09) 2910cc do i = 1,nshg 2911cc cdelsq(i) = xnude(ifath(i),1) 2912cc enddo 2913cc endif 2914 2915 do i = 1, nfath 2916 ynude(i,6) = ( ynude(i,4) - fwr1*ynude(i,2) ) / 2917 & ( two*ynude(i,5) - four*fwr1*ynude(i,3) 2918 & + fwr1*fwr1*ynude(i,1) ) 2919 enddo 2920 2921 numNden(:,1) = whist*numNden(:,1)+wcur*xnude(ifath(:),1) 2922 numNden(:,2) = whist*numNden(:,2)+wcur*xnude(ifath(:),2) 2923 2924 xnd(:,1) = xnd(:,1)+xnude(:,1) 2925 xnd(:,2) = xnd(:,2)+xnude(:,2) 2926 2927 cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09) 2928 2929c cdelsq(:) = xnude(ifath(:),1)/(xnude(ifath(:),2))!+1.d-09) 2930 2931 2932 cdelsq2(:) = ynude(ifath(:),6) ! For comparison w/ cdelsq 2933 2934 xmodcomp(:,1) = xmodcomp(:,1)+ynude(:,1) 2935 xmodcomp(:,2) = xmodcomp(:,2)+ynude(:,2) 2936 xmodcomp(:,3) = xmodcomp(:,3)+ynude(:,3) 2937 xmodcomp(:,4) = xmodcomp(:,4)+ynude(:,4) 2938 xmodcomp(:,5) = xmodcomp(:,5)+ynude(:,5) 2939 2940 xmcomp(:,:) = xmcomp(:,:)+xm(:,:) 2941 xlcomp(:,:) = xlcomp(:,:)+xl(:,:) 2942 2943 xl1comp(:,:) = xl1comp(:,:)+xl1(:,:) 2944 xl2comp(:,:) = xl2comp(:,:)+xl2(:,:) 2945 2946 ucomp(:,:) = ucomp(:,:)+ui(:,:) 2947 scomp(:) = scomp(:)+snorm(:) 2948 2949 endif 2950 2951c do i = 1, nfath 2952c xmodcomp(i,:) = xmodcomp(i,:)/nsons(i) 2953c xmcomp(i,:) = xmcomp(i,:)/nsons(i) 2954c xlcomp(i,:) = xlcomp(i,:)/nsons(i) 2955c xl2comp(i,:) = xl2comp(i,:)/nsons(i) 2956c xl1comp(i,:) = xl1comp(i,:)/nsons(i) 2957c xnd(i,:) = xnd(i,:)/nsons(i) 2958c scomp(i) = scomp(i)/nsons(i) 2959c ucomp(i,:) = ucomp(i,:)/nsons(i) 2960c enddo 2961 2962 if (myrank .eq. master) then 2963 write(*,*)'istep, nstep=', istep, nstep(1) 2964 endif 2965 2966 if ( istep .eq. (nstep(1)-1) ) then 2967 if ( myrank .eq. master) then 2968 2969 do i = 1, nfath 2970 write(365,*)xmodcomp(i,1),xmodcomp(i,2),xmodcomp(i,3), 2971 & xmodcomp(i,4),xmodcomp(i,5) 2972 2973 write(366,*)xmcomp(i,1),xmcomp(i,2),xmcomp(i,3) 2974 write(367,*)xmcomp(i,4),xmcomp(i,5),xmcomp(i,6) 2975 2976 write(368,*)xlcomp(i,1),xlcomp(i,2),xlcomp(i,3) 2977 write(369,*)xlcomp(i,4),xlcomp(i,5),xlcomp(i,6) 2978 2979 write(370,*)xl1comp(i,1),xl1comp(i,2),xl1comp(i,3) 2980 write(371,*)xl1comp(i,4),xl1comp(i,5),xl1comp(i,6) 2981 2982 write(372,*)xl2comp(i,1),xl2comp(i,2),xl2comp(i,3) 2983 write(373,*)xl2comp(i,4),xl2comp(i,5),xl2comp(i,6) 2984 2985 write(374,*)xnd(i,1),xnd(i,2),scomp(i) 2986 write(375,*)ucomp(i,1),ucomp(i,2),ucomp(i,3) 2987 enddo 2988 2989 call flush(365) 2990 call flush(366) 2991 call flush(367) 2992 call flush(368) 2993 call flush(369) 2994 call flush(370) 2995 call flush(371) 2996 call flush(372) 2997 call flush(373) 2998 call flush(374) 2999 call flush(375) 3000 3001c close(852) 3002c close(853) 3003c close(854) 3004 3005 endif 3006 endif 3007 3008 if (myrank .eq. master) then 3009 write(*,*)'uit uic=', ucomp(32,1),u1 3010 endif 3011 3012 555 format(e14.7,4(2x,e14.7)) 3013 556 format(e14.7,5(2x,e14.7)) 3014 3015 3016 3017 3018c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 3019 tmp1 = MINVAL(cdelsq) 3020 tmp2 = MAXVAL(cdelsq) 3021 if(numpe>1) then 3022 call MPI_REDUCE (tmp1, tmp3, 1,MPI_DOUBLE_PRECISION, 3023 & MPI_MIN, master, MPI_COMM_WORLD, ierr) 3024 call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION, 3025 & MPI_MAX, master, MPI_COMM_WORLD, ierr) 3026 tmp1=tmp3 3027 tmp2=tmp4 3028 endif 3029 if (myrank .EQ. master) then !print CDelta^2 range 3030 write(34,*)lstep,tmp1,tmp2 3031 call flush(34) 3032 endif 3033c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 3034 3035 if (myrank .eq. master) then 3036 write(*,*) 'cdelsq=', cdelsq(1),cdelsq(2) 3037 write(*,*) 'cdelsq=', cdelsq2(1),cdelsq2(2) 3038 write(22,*) lstep, cdelsq(1) 3039 call flush(22) 3040 endif 3041 3042 do iblk = 1,nelblk 3043 lcsyst = lcblk(3,iblk) 3044 iel = lcblk(1,iblk) 3045 npro = lcblk(1,iblk+1) - iel 3046 lelCat = lcblk(2,iblk) 3047 inum = iel + npro - 1 3048 3049 ngauss = nint(lcsyst) 3050 3051 call scatnu (mien(iblk)%p, strl(iel:inum,:), 3052 & mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:)) 3053 enddo 3054c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 3055c$$$ tmp1 = MINVAL(xmudmi) 3056c$$$ tmp2 = MAXVAL(xmudmi) 3057c$$$ if(numpe>1) then 3058c$$$ call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION, 3059c$$$ & MPI_MIN, master, MPI_COMM_WORLD, ierr) 3060c$$$ call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION, 3061c$$$ & MPI_MAX, master, MPI_COMM_WORLD, ierr) 3062c$$$ tmp1=tmp3 3063c$$$ tmp2=tmp4 3064c$$$ endif 3065c$$$ if (myrank .EQ. master) then 3066c$$$ write(35,*) lstep,tmp1,tmp2 3067c$$$ call flush(35) 3068c$$$ endif 3069c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 3070 3071c 3072c if flag set, write a restart file with info (reuse xmij's memory) 3073c 3074 if(irs.eq.11) then 3075 lstep=999 3076 xmij(:,1)=xnum(:) 3077 xmij(:,2)=xden(:) 3078 xmij(:,3)=cdelsq(:) 3079 xmij(:,5)=xlij(:,4) !leave M_{12} in 4 and put L_{12} here 3080 call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac 3081 stop 3082 endif 3083c 3084c local clipping moved to scatnu with the creation of mxmudmi pointers 3085c 3086c$$$ rmu=datmat(1,2,1) 3087c$$$ xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu 3088c$$$ xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0 3089c stop !uncomment to test dmod 3090c 3091 3092 3093c write out the nodal values of xnut (estimate since we don't calc strain 3094c there and must use the filtered strain). 3095c 3096 3097 if ((irs .ge. 1) .and. (mod(lstep, ntout) .eq. 0)) then 3098c 3099c collect the average strain into xnude(2) 3100c 3101 xnude(:,2) = zero 3102 do i = 1,numnp 3103 xnude(ifath(i),2) = xnude(ifath(i),2) + strnrm(i) 3104 enddo 3105 3106 if(numpe .gt. 1) then 3107 call drvAllreduce(xnude(:,2), xnuder(:,2),nfath) 3108 else 3109 xnuder=xnude 3110 endif 3111c 3112c nut= cdelsq * |S| 3113c 3114 xnutf=xnuder(:,1)*xnuder(:,2)/nsons(:) 3115c 3116c collect the x and y coords into xnude 3117c 3118 xnude = zero 3119 do i = 1,numnp 3120 xnude(ifath(i),1) = xnude(ifath(i),1) + x(i,1) 3121 xnude(ifath(i),2) = xnude(ifath(i),2) + x(i,2) 3122 enddo 3123 3124 if(numpe .gt. 1) 3125 & call drvAllreduce(xnude, xnuder,2*nfath) 3126 xnuder(:,1)=xnuder(:,1)/nsons(:) 3127 xnuder(:,2)=xnuder(:,2)/nsons(:) 3128c 3129c xnude is the sum of the sons for each father on this processor 3130c 3131 if((myrank.eq.master)) then 3132 do i=1,nfath ! cdelsq * |S| 3133 write(444,*) xnuder(i,1),xnuder(i,2),xnutf(i) 3134 enddo 3135 call flush(444) 3136 endif 3137 endif 3138 3139 return 3140 end 3141 subroutine DFWRwfdmc (y, shgl, shp, 3142 & iper, ilwork, 3143 & nsons, ifath, x, fwr2, fwr3) 3144 3145 use pointer_data 3146 3147 use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf), 3148c shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf). 3149c Shpf and shglf are the shape funciotns and their 3150c gradient evaluated using the quadrature rule desired 3151c for computing the dmod. Qwtf contains the weights of the 3152c quad. points. 3153 3154 include "common.h" 3155 include "mpif.h" 3156 include "auxmpi.h" 3157 3158c 3159 dimension fres(nshg,33), fwr(nshg), 3160 & strnrm(nshg), cdelsq(nshg), 3161 & cdelsq2(nshg), 3162 & xnum(nshg), xden(nshg), 3163 & xmij(nshg,6), xlij(nshg,6), 3164 & xnude(nfath,2), xnuder(nfath,2), 3165 & ynude(nfath,6), ynuder(nfath,6), 3166 & ui(nfath,3), snorm(nfath), 3167 & uir(nfath,3), snormr(nfath) 3168 dimension xm(nfath,6), xl(nfath,6), 3169 & xl1(nfath,6), xl2(nfath,6), 3170 & xl1r(nfath,6), xl2r(nfath,6), 3171 & xmr(nfath,6), xlr(nfath,6), 3172 & nsons(nshg), 3173 & strl(numel,ngauss), 3174 & y(nshg,5), yold(nshg,5), 3175 & ifath(nshg), iper(nshg), 3176 & ilwork(nlwork), 3177 & x(numnp,3), 3178 & shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT), 3179 & xnutf(nfath), 3180 & hfres(nshg,22), 3181 & xfac(nshg,5), fwr2(nshg), 3182 & fwr3(nshg) 3183 3184 real*8 u1 3185 3186 character*10 cname 3187 character*30 fname1, fname2, fname3, fname4, fname5, fname6 3188c 3189 3190c 3191c 3192c setup the weights for time averaging of cdelsq (now in quadfilt module) 3193c 3194 3195 denom=max(1.0d0*(lstep),one) 3196 if(dtavei.lt.0) then 3197 wcur=one/denom 3198 else 3199 wcur=dtavei 3200 endif 3201 whist=1.0-wcur 3202 3203 if (myrank .eq. master) then 3204 write(*,*)'istep=', istep 3205 endif 3206 3207 if (istep .eq. 0) then 3208 xnd = zero 3209 xmodcomp = zero 3210 xmcomp = zero 3211 xlcomp = zero 3212 xl1comp = zero 3213 xl2comp = zero 3214 ucomp = zero 3215 scomp = zero 3216 endif 3217 3218 3219 fres = zero 3220 hfres = zero 3221 3222 yold(:,1)=y(:,4) 3223 yold(:,2:4)=y(:,1:3) 3224 3225c 3226c hack in an interesting velocity field (uncomment to test dmod) 3227c 3228c yold(:,5) = 1.0 ! Debugging 3229c yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2) 3230c yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2) 3231c yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3) 3232c yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable 3233c suitable for the 3234 3235 do iblk = 1,nelblk 3236 lcsyst = lcblk(3,iblk) 3237 iel = lcblk(1,iblk) !Element number where this block begins 3238 npro = lcblk(1,iblk+1) - iel 3239 lelCat = lcblk(2,iblk) 3240 nenl = lcblk(5,iblk) 3241 nshl = lcblk(10,iblk) 3242 inum = iel + npro - 1 3243 3244 ngauss = nint(lcsyst) 3245 ngaussf = nintf(lcsyst) 3246 3247c call hfilterBB (yold, x, mien(iblk)%p, hfres, 3248c & shglf(lcsyst,:,1:nshl,:), 3249c & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 3250 3251 call hfilterCC (yold, x, mien(iblk)%p, hfres, 3252 & shglf(lcsyst,:,1:nshl,:), 3253 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 3254 3255 enddo 3256 3257 if(numpe>1) call commu (hfres, ilwork, 22, 'in ') 3258c 3259c... account for periodicity in filtered variables 3260c 3261 do j = 1,nshg ! Add on-processor slave contribution to masters 3262 i = iper(j) 3263 if (i .ne. j) then 3264 hfres(i,:) = hfres(i,:) + hfres(j,:) 3265 endif 3266 enddo 3267 do j = 1,nshg ! Set on-processor slaves to be the same as masters 3268 i = iper(j) 3269 if (i .ne. j) then 3270 hfres(j,:) = hfres(i,:) 3271 endif 3272 enddo 3273 3274c... Set off-processor slaves to be the same as their masters 3275 3276 if(numpe>1) call commu (hfres, ilwork, 22, 'out') 3277 3278 3279 hfres(:,16) = one / hfres(:,16) ! one/(volume filter kernel) 3280 3281 do j = 1, 15 3282 hfres(:,j) = hfres(:,j) * hfres(:,16) 3283 enddo 3284 do j = 17, 22 3285 hfres(:,j) = hfres(:,j) * hfres(:,16) 3286 enddo 3287 3288c... For debugging 3289 3290c hfres(:,1) = 2.0*x(:,1) - 3.0*x(:,2) 3291c hfres(:,2) = 3.0*x(:,1) + 4.0*x(:,2) 3292c hfres(:,3) = 4.0*x(:,1) + x(:,2) + x(:,3) 3293 3294c... Done w/ h-filtering. Begin 2h-filtering. 3295 3296 do iblk = 1,nelblk 3297 lcsyst = lcblk(3,iblk) 3298 iel = lcblk(1,iblk) !Element number where this block begins 3299 npro = lcblk(1,iblk+1) - iel 3300 lelCat = lcblk(2,iblk) 3301 nenl = lcblk(5,iblk) 3302 nshl = lcblk(10,iblk) 3303 inum = iel + npro - 1 3304 3305 ngauss = nint(lcsyst) 3306 ngaussf = nintf(lcsyst) 3307 3308 call twohfilterBB (yold, x, strl(iel:inum,:), mien(iblk)%p, 3309 & fres, hfres, shglf(lcsyst,:,1:nshl,:), 3310 & shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf)) 3311 3312 enddo 3313c 3314 3315 3316 if(numpe>1) call commu (fres, ilwork, 33, 'in ') 3317c 3318c account for periodicity in filtered variables 3319c 3320 do j = 1,nshg 3321 i = iper(j) 3322 if (i .ne. j) then 3323 fres(i,:) = fres(i,:) + fres(j,:) 3324 endif 3325 enddo 3326 3327 do j = 1,nshg 3328 i = iper(j) 3329 if (i .ne. j) then 3330 fres(j,:) = fres(i,:) 3331 endif 3332 enddo 3333 3334 if(numpe>1)then 3335 call commu (fres, ilwork, 33, 'out') 3336 endif 3337 3338 fres(:,22) = one / fres(:,22) 3339 do j = 1,21 3340 fres(:,j) = fres(:,j) * fres(:,22) 3341 enddo 3342 do j = 23,33 3343 fres(:,j) = fres(:,j) * fres(:,22) 3344 enddo 3345 3346 3347 do iblk = 1,nelblk 3348 lcsyst = lcblk(3,iblk) 3349 iel = lcblk(1,iblk) !Element number where this block begins 3350 npro = lcblk(1,iblk+1) - iel 3351 lelCat = lcblk(2,iblk) 3352 nenl = lcblk(5,iblk) 3353 nshl = lcblk(10,iblk) 3354 inum = iel + npro - 1 3355 3356 ngauss = nint(lcsyst) 3357 3358 call getstrl (yold, x, mien(iblk)%p, 3359 & strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:), 3360 & shp(lcsyst,1:nshl,:)) 3361 3362 enddo 3363 3364c 3365c... Obtain the hat-tilde strain rate norm at the nodes 3366c 3367 3368 strnrm = sqrt( 3369 & two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2) 3370 & + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) ) 3371 3372c fwr = fwr1 * strnrm 3373 3374 fwr = fwr1 * fwr3 * strnrm 3375 3376 xmij(:,1) = -fwr 3377 & * fres(:,10) + fres(:,16) 3378 xmij(:,2) = -fwr 3379 & * fres(:,11) + fres(:,17) 3380 xmij(:,3) = -fwr 3381 & * fres(:,12) + fres(:,18) 3382 3383 xmij(:,4) = -fwr * fres(:,13) + fres(:,19) 3384 xmij(:,5) = -fwr * fres(:,14) + fres(:,20) 3385 xmij(:,6) = -fwr * fres(:,15) + fres(:,21) 3386 3387 3388 xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1) 3389 xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2) 3390 xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3) 3391 xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2) 3392 xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3) 3393 xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3) 3394 3395 xnum = xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2) 3396 & + xlij(:,3) * xmij(:,3) 3397 & + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5) 3398 & + xlij(:,6) * xmij(:,6)) 3399 xden = xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2) 3400 & + xmij(:,3) * xmij(:,3) 3401 & + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5) 3402 & + xmij(:,6) * xmij(:,6)) 3403 xden = two * xden 3404 3405c... For collectection of statistics on dyn. model components 3406 3407 xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 + 3408 & fres(:,12)**2 3409 & + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) ) 3410 3411 xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11) 3412 & + xlij(:,3)*fres(:,12) + 3413 & two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) + 3414 & xlij(:,6)*fres(:,15)) ) 3415 3416 xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17) 3417 & + fres(:,12)*fres(:,18) + 3418 & two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) + 3419 & fres(:,15)*fres(:,21)) ) 3420 3421 xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17) 3422 & + xlij(:,3)*fres(:,18) + 3423 & two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) + 3424 & xlij(:,6)*fres(:,21)) 3425 3426 xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17) 3427 & + fres(:,18)*fres(:,18) + 3428 & two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) + 3429 & fres(:,21)*fres(:,21)) 3430 3431c zero on processor periodic nodes so that they will not be added twice 3432 do j = 1,numnp 3433 i = iper(j) 3434 if (i .ne. j) then 3435 xnum(j) = zero 3436 xden(j) = zero 3437 xfac(j,:) = zero 3438 xmij(j,:) = zero 3439 xlij(j,:) = zero 3440 fres(j,:) = zero 3441 strnrm(j) = zero 3442 endif 3443 enddo 3444 3445 if (numpe.gt.1) then 3446 3447 numtask = ilwork(1) 3448 itkbeg = 1 3449 3450c zero the nodes that are "solved" on the other processors 3451 do itask = 1, numtask 3452 3453 iacc = ilwork (itkbeg + 2) 3454 numseg = ilwork (itkbeg + 4) 3455 3456 if (iacc .eq. 0) then 3457 do is = 1,numseg 3458 isgbeg = ilwork (itkbeg + 3 + 2*is) 3459 lenseg = ilwork (itkbeg + 4 + 2*is) 3460 isgend = isgbeg + lenseg - 1 3461 xnum(isgbeg:isgend) = zero 3462 xden(isgbeg:isgend) = zero 3463 strnrm(isgbeg:isgend) = zero 3464 xfac(isgbeg:isgend,:) = zero 3465 xmij(isgbeg:isgend,:) = zero 3466 xlij(isgbeg:isgend,:) = zero 3467 fres(isgbeg:isgend,:) = zero 3468 enddo 3469 endif 3470 3471 itkbeg = itkbeg + 4 + 2*numseg 3472 3473 enddo 3474 3475 endif 3476c 3477c Description of arrays. Each processor has an array of length equal 3478c to the total number of fathers times 2 xnude(nfathers,2). One to collect 3479c the numerator and one to collect the denominator. There is also an array 3480c of length nshg on each processor which tells the father number of each 3481c on processor node, ifath(nnshg). Finally, there is an arry of length 3482c nfathers to tell the total (on all processors combined) number of sons 3483c for each father. 3484c 3485c Now loop over nodes and accumlate the numerator and the denominator 3486c to the father nodes. Only on processor addition at this point. 3487c Note that serrogate fathers are collect some for the case where some 3488c sons are on another processor 3489c 3490 xnude = zero 3491 ynude = zero 3492 xm = zero 3493 xl = zero 3494 xl1 = zero 3495 xl2 = zero 3496 ui = zero 3497 snorm = zero 3498 3499 do i = 1,nshg 3500 xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i) 3501 xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i) 3502 3503 ynude(ifath(i),1) = ynude(ifath(i),1) + xfac(i,1) 3504 ynude(ifath(i),2) = ynude(ifath(i),2) + xfac(i,2) 3505 ynude(ifath(i),3) = ynude(ifath(i),3) + xfac(i,3) 3506 ynude(ifath(i),4) = ynude(ifath(i),4) + xfac(i,4) 3507 ynude(ifath(i),5) = ynude(ifath(i),5) + xfac(i,5) 3508 3509 xm(ifath(i),1) = xm(ifath(i),1) + xmij(i,1) 3510 xm(ifath(i),2) = xm(ifath(i),2) + xmij(i,2) 3511 xm(ifath(i),3) = xm(ifath(i),3) + xmij(i,3) 3512 xm(ifath(i),4) = xm(ifath(i),4) + xmij(i,4) 3513 xm(ifath(i),5) = xm(ifath(i),5) + xmij(i,5) 3514 xm(ifath(i),6) = xm(ifath(i),6) + xmij(i,6) 3515 3516 xl(ifath(i),1) = xl(ifath(i),1) + xlij(i,1) 3517 xl(ifath(i),2) = xl(ifath(i),2) + xlij(i,2) 3518 xl(ifath(i),3) = xl(ifath(i),3) + xlij(i,3) 3519 xl(ifath(i),4) = xl(ifath(i),4) + xlij(i,4) 3520 xl(ifath(i),5) = xl(ifath(i),5) + xlij(i,5) 3521 xl(ifath(i),6) = xl(ifath(i),6) + xlij(i,6) 3522 3523 xl1(ifath(i),1) = xl1(ifath(i),1) + fres(i,4) 3524 xl1(ifath(i),2) = xl1(ifath(i),2) + fres(i,5) 3525 xl1(ifath(i),3) = xl1(ifath(i),3) + fres(i,6) 3526 xl1(ifath(i),4) = xl1(ifath(i),4) + fres(i,7) 3527 xl1(ifath(i),5) = xl1(ifath(i),5) + fres(i,8) 3528 xl1(ifath(i),6) = xl1(ifath(i),6) + fres(i,9) 3529 3530 xl2(ifath(i),1) = xl2(ifath(i),1) + fres(i,1)*fres(i,1) 3531 xl2(ifath(i),2) = xl2(ifath(i),2) + fres(i,2)*fres(i,2) 3532 xl2(ifath(i),3) = xl2(ifath(i),3) + fres(i,3)*fres(i,3) 3533 xl2(ifath(i),4) = xl2(ifath(i),4) + fres(i,1)*fres(i,2) 3534 xl2(ifath(i),5) = xl2(ifath(i),5) + fres(i,1)*fres(i,3) 3535 xl2(ifath(i),6) = xl2(ifath(i),6) + fres(i,2)*fres(i,3) 3536 3537 ui(ifath(i),1) = ui(ifath(i),1) + fres(i,1) 3538 ui(ifath(i),2) = ui(ifath(i),2) + fres(i,2) 3539 ui(ifath(i),3) = ui(ifath(i),3) + fres(i,3) 3540 3541 snorm(ifath(i)) = snorm(ifath(i)) + strnrm(i) 3542 3543 enddo 3544 3545c 3546c Now the true fathers and serrogates combine results and update 3547c each other. 3548c 3549 if(numpe .gt. 1)then 3550 call drvAllreduce(xnude, xnuder,2*nfath) 3551 call drvAllreduce(ynude, ynuder,6*nfath) 3552 call drvAllreduce(xm, xmr,6*nfath) 3553 call drvAllreduce(xl, xlr,6*nfath) 3554 call drvAllreduce(xl1, xl1r,6*nfath) 3555 call drvAllreduce(xl2, xl2r,6*nfath) 3556 call drvAllreduce(ui, uir,3*nfath) 3557 call drvAllreduce(snorm, snormr,nfath) 3558 3559 do i = 1, nfath 3560 ynuder(i,6) = ( ynuder(i,4) - fwr1*ynuder(i,2) ) / 3561 & ( two*ynuder(i,5) - four*fwr1*ynuder(i,3) 3562 & + two*fwr1*fwr1*ynuder(i,1) ) 3563 enddo 3564 3565 cdelsq2(:) = ynuder(ifath(:),6) ! For comparison w/ cdelsq 3566c 3567c xnude is the sum of the sons for each father on this processor 3568c 3569c xnuder is the sum of the sons for each father on all processor combined 3570c (the same as if we had not partitioned the mesh for each processor) 3571c 3572c For each father we have precomputed the number of sons (including 3573c the sons off processor). 3574c 3575c Now divide by number of sons to get the average (not really necessary 3576c for dynamic model since ratio will cancel nsons at each father) 3577c 3578 xnuder(:,1) = xnuder(:,1) / nsons(:) 3579 xnuder(:,2) = xnuder(:,2) / nsons(:) 3580 3581 do m = 1, 5 3582 ynuder(:,m) = ynuder(:,m)/nsons(:) 3583 enddo 3584 do m = 1,6 3585 xmr(:,m) = xmr(:,m)/nsons(:) 3586 xlr(:,m) = xlr(:,m)/nsons(:) 3587 xl1r(:,m) = xl1r(:,m)/nsons(:) 3588 xl2r(:,m) = xl2r(:,m)/nsons(:) 3589 enddo 3590 3591 uir(:,1) = uir(:,1)/nsons(:) 3592 uir(:,2) = uir(:,2)/nsons(:) 3593 uir(:,3) = uir(:,3)/nsons(:) 3594 3595 snormr(:) = snormr(:)/nsons(:) 3596 3597c 3598cc the next line is c \Delta^2 3599cc 3600cc xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09) 3601cc do i = 1,nshg 3602cc cdelsq(i) = xnuder(ifath(i),1) 3603cc enddo 3604 3605 numNden(:,1) = whist*numNden(:,1)+wcur*xnuder(ifath(:),1) 3606 numNden(:,2) = whist*numNden(:,2)+wcur*xnuder(ifath(:),2) 3607 cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09) 3608 3609c cdelsq(:) = xnuder(ifath(:),1)/(xnuder(ifath(:),2)+1.d-09) 3610 3611 xnd(:,1) = xnd(:,1) + xnuder(:,1) 3612 xnd(:,2) = xnd(:,2) + xnuder(:,2) 3613 3614 xmodcomp(:,1) = xmodcomp(:,1)+ynuder(:,1) 3615 xmodcomp(:,2) = xmodcomp(:,2)+ynuder(:,2) 3616 xmodcomp(:,3) = xmodcomp(:,3)+ynuder(:,3) 3617 xmodcomp(:,4) = xmodcomp(:,4)+ynuder(:,4) 3618 xmodcomp(:,5) = xmodcomp(:,5)+ynuder(:,5) 3619 3620 xmcomp(:,:) = xmcomp(:,:)+xmr(:,:) 3621 xlcomp(:,:) = xlcomp(:,:)+xlr(:,:) 3622 3623 xl1comp(:,:) = xl1comp(:,:)+xl1r(:,:) 3624 xl2comp(:,:) = xl2comp(:,:)+xl2r(:,:) 3625 3626 ucomp(:,:) = ucomp(:,:)+uir(:,:) 3627 u1 = uir(32,1) 3628 scomp(:) = scomp(:)+snormr(:) 3629 3630 else 3631 3632 xnude(:,1) = xnude(:,1)/nsons(:) 3633 xnude(:,2) = xnude(:,2)/nsons(:) 3634 3635 do m = 1, 5 3636 ynude(:,m) = ynude(:,m)/nsons(:) 3637 enddo 3638 do m = 1,6 3639 xm(:,m) = xm(:,m)/nsons(:) 3640 xl(:,m) = xl(:,m)/nsons(:) 3641 xl1(:,m) = xl1(:,m)/nsons(:) 3642 xl2(:,m) = xl2(:,m)/nsons(:) 3643 enddo 3644 3645 ui(:,1) = ui(:,1)/nsons(:) 3646 ui(:,2) = ui(:,2)/nsons(:) 3647 ui(:,3) = ui(:,3)/nsons(:) 3648 3649 snorm(:) = snorm(:)/nsons(:) 3650c 3651c the next line is c \Delta^2, not nu_T but we want to save the 3652c memory 3653c 3654 3655cc xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09) 3656cc do i = 1,nshg 3657cc cdelsq(i) = xnude(ifath(i),1) 3658cc enddo 3659cc endif 3660 3661 do i = 1, nfath 3662 ynude(i,6) = ( ynude(i,4) - fwr1*ynude(i,2) ) / 3663 & ( two*ynude(i,5) - four*fwr1*ynude(i,3) 3664 & + fwr1*fwr1*ynude(i,1) ) 3665 enddo 3666 3667 numNden(:,1) = whist*numNden(:,1)+wcur*xnude(ifath(:),1) 3668 numNden(:,2) = whist*numNden(:,2)+wcur*xnude(ifath(:),2) 3669 3670 xnd(:,1) = xnd(:,1)+xnude(:,1) 3671 xnd(:,2) = xnd(:,2)+xnude(:,2) 3672 3673 cdelsq(:) = numNden(:,1) / (numNden(:,2)) ! + 1.d-09) 3674 3675c cdelsq(:) = xnude(ifath(:),1)/(xnude(ifath(:),2))!+1.d-09) 3676 3677 3678 cdelsq2(:) = ynude(ifath(:),6) ! For comparison w/ cdelsq 3679 3680 xmodcomp(:,1) = xmodcomp(:,1)+ynude(:,1) 3681 xmodcomp(:,2) = xmodcomp(:,2)+ynude(:,2) 3682 xmodcomp(:,3) = xmodcomp(:,3)+ynude(:,3) 3683 xmodcomp(:,4) = xmodcomp(:,4)+ynude(:,4) 3684 xmodcomp(:,5) = xmodcomp(:,5)+ynude(:,5) 3685 3686 xmcomp(:,:) = xmcomp(:,:)+xm(:,:) 3687 xlcomp(:,:) = xlcomp(:,:)+xl(:,:) 3688 3689 xl1comp(:,:) = xl1comp(:,:)+xl1(:,:) 3690 xl2comp(:,:) = xl2comp(:,:)+xl2(:,:) 3691 3692 ucomp(:,:) = ucomp(:,:)+ui(:,:) 3693 u1 = ui(32,1) 3694 scomp(:) = scomp(:)+snorm(:) 3695 3696 endif 3697 3698 3699c do i = 1, nfath 3700c xmodcomp(i,:) = xmodcomp(i,:)/nsons(i) 3701c xmcomp(i,:) = xmcomp(i,:)/nsons(i) 3702c xlcomp(i,:) = xlcomp(i,:)/nsons(i) 3703c xl2comp(i,:) = xl2comp(i,:)/nsons(i) 3704c xl1comp(i,:) = xl1comp(i,:)/nsons(i) 3705c xnd(i,:) = xnd(i,:)/nsons(i) 3706c scomp(i) = scomp(i)/nsons(i) 3707c ucomp(i,:) = ucomp(i,:)/nsons(i) 3708c enddo 3709 3710 if ( istep .eq. (nstep(1)-1) ) then 3711 if ( myrank .eq. master) then 3712 3713 do i = 1, nfath 3714 write(365,*)xmodcomp(i,1),xmodcomp(i,2),xmodcomp(i,3), 3715 & xmodcomp(i,4),xmodcomp(i,5) 3716 3717 write(366,*)xmcomp(i,1),xmcomp(i,2),xmcomp(i,3) 3718 write(367,*)xmcomp(i,4),xmcomp(i,5),xmcomp(i,6) 3719 3720 write(368,*)xlcomp(i,1),xlcomp(i,2),xlcomp(i,3) 3721 write(369,*)xlcomp(i,4),xlcomp(i,5),xlcomp(i,6) 3722 3723 write(370,*)xl1comp(i,1),xl1comp(i,2),xl1comp(i,3) 3724 write(371,*)xl1comp(i,4),xl1comp(i,5),xl1comp(i,6) 3725 3726 write(372,*)xl2comp(i,1),xl2comp(i,2),xl2comp(i,3) 3727 write(373,*)xl2comp(i,4),xl2comp(i,5),xl2comp(i,6) 3728 3729 write(374,*)xnd(i,1),xnd(i,2),scomp(i) 3730 write(375,*)ucomp(i,1),ucomp(i,2),ucomp(i,3) 3731 3732c write(*,*)'uit uic=', ucomp(32,1),u1 3733 enddo 3734 3735 3736 call flush(365) 3737 call flush(366) 3738 call flush(367) 3739 call flush(368) 3740 call flush(369) 3741 call flush(370) 3742 call flush(371) 3743 call flush(372) 3744 call flush(373) 3745 call flush(374) 3746 call flush(375) 3747 3748c if (myrank .eq. master) then 3749c write(*,*)'uit uic=', ucomp(32,1),u1 3750c endif 3751 3752 3753c close(852) 3754c close(853) 3755c close(854) 3756 3757 endif 3758 endif 3759 3760 if (myrank .eq. master) then 3761 write(*,*)'uit uic=', ucomp(32,1),u1 3762 endif 3763 3764 3765 555 format(e14.7,4(2x,e14.7)) 3766 556 format(e14.7,5(2x,e14.7)) 3767 3768c close(849) 3769c close(850) 3770c close(851) 3771c close(852) 3772c close(853) 3773c close(854) 3774 3775c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 3776 tmp1 = MINVAL(cdelsq) 3777 tmp2 = MAXVAL(cdelsq) 3778 if(numpe>1) then 3779 call MPI_REDUCE (tmp1, tmp3, 1,MPI_DOUBLE_PRECISION, 3780 & MPI_MIN, master, MPI_COMM_WORLD, ierr) 3781 call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION, 3782 & MPI_MAX, master, MPI_COMM_WORLD, ierr) 3783 tmp1=tmp3 3784 tmp2=tmp4 3785 endif 3786 if (myrank .EQ. master) then !print CDelta^2 range 3787 write(34,*)lstep,tmp1,tmp2 3788 call flush(34) 3789 endif 3790c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 3791 3792 if (myrank .eq. master) then 3793 write(*,*) 'cdelsq=', cdelsq(1),cdelsq(2) 3794 write(*,*) 'cdelsq=', cdelsq2(1),cdelsq2(2) 3795 write(22,*) lstep, cdelsq(1) 3796 call flush(22) 3797 endif 3798 3799 do iblk = 1,nelblk 3800 lcsyst = lcblk(3,iblk) 3801 iel = lcblk(1,iblk) 3802 npro = lcblk(1,iblk+1) - iel 3803 lelCat = lcblk(2,iblk) 3804 inum = iel + npro - 1 3805 3806 ngauss = nint(lcsyst) 3807 3808 call scatnu (mien(iblk)%p, strl(iel:inum,:), 3809 & mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:)) 3810 enddo 3811c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 3812c$$$ tmp1 = MINVAL(xmudmi) 3813c$$$ tmp2 = MAXVAL(xmudmi) 3814c$$$ if(numpe>1) then 3815c$$$ call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION, 3816c$$$ & MPI_MIN, master, MPI_COMM_WORLD, ierr) 3817c$$$ call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION, 3818c$$$ & MPI_MAX, master, MPI_COMM_WORLD, ierr) 3819c$$$ tmp1=tmp3 3820c$$$ tmp2=tmp4 3821c$$$ endif 3822c$$$ if (myrank .EQ. master) then 3823c$$$ write(35,*) lstep,tmp1,tmp2 3824c$$$ call flush(35) 3825c$$$ endif 3826c $$$$$$$$$$$$$$$$$$$$$$$$$$$ 3827 3828c 3829c if flag set, write a restart file with info (reuse xmij's memory) 3830c 3831 if(irs.eq.11) then 3832 lstep=999 3833 xmij(:,1)=xnum(:) 3834 xmij(:,2)=xden(:) 3835 xmij(:,3)=cdelsq(:) 3836 xmij(:,5)=xlij(:,4) !leave M_{12} in 4 and put L_{12} here 3837 call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac 3838 stop 3839 endif 3840c 3841c local clipping moved to scatnu with the creation of mxmudmi pointers 3842c 3843c$$$ rmu=datmat(1,2,1) 3844c$$$ xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu 3845c$$$ xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0 3846c stop !uncomment to test dmod 3847c 3848 3849 3850c write out the nodal values of xnut (estimate since we don't calc strain 3851c there and must use the filtered strain). 3852c 3853 3854 if ((irs .ge. 1) .and. (mod(lstep, ntout) .eq. 0)) then 3855c 3856c collect the average strain into xnude(2) 3857c 3858 xnude(:,2) = zero 3859 do i = 1,numnp 3860 xnude(ifath(i),2) = xnude(ifath(i),2) + strnrm(i) 3861 enddo 3862 3863 if(numpe .gt. 1) then 3864 call drvAllreduce(xnude(:,2), xnuder(:,2),nfath) 3865 else 3866 xnuder=xnude 3867 endif 3868c 3869c nut= cdelsq * |S| 3870c 3871 xnutf=xnuder(:,1)*xnuder(:,2)/nsons(:) 3872c 3873c collect the x and y coords into xnude 3874c 3875 xnude = zero 3876 do i = 1,numnp 3877 xnude(ifath(i),1) = xnude(ifath(i),1) + x(i,1) 3878 xnude(ifath(i),2) = xnude(ifath(i),2) + x(i,2) 3879 enddo 3880 3881 if(numpe .gt. 1) 3882 & call drvAllreduce(xnude, xnuder,2*nfath) 3883 xnuder(:,1)=xnuder(:,1)/nsons(:) 3884 xnuder(:,2)=xnuder(:,2)/nsons(:) 3885c 3886c xnude is the sum of the sons for each father on this processor 3887c 3888 if((myrank.eq.master)) then 3889 do i=1,nfath ! cdelsq * |S| 3890 write(444,*) xnuder(i,1),xnuder(i,2),xnutf(i) 3891 enddo 3892 call flush(444) 3893 endif 3894 endif 3895 3896 return 3897 end 3898