xref: /phasta/phSolver/incompressible/advLES.f (revision 595995161822a203c8467e0e4a253d7bd7d6df32)
1
2c---------------------------------------------------------------------------
3
4      subroutine SUPGstress (y, ac, x, qres, ien, xmudmi,
5     &     cdelsq, shgl, shp, Qwtf, shglo, shpo, stress, diss, vol)
6
7      use stats
8      use rlssave   ! Use the resolved Leonard stresses at the nodes.
9
10      include "common.h"
11
12      dimension y(nshg,5),                  ac(nshg,5),
13     &          x(numnp,nsd),               ien(npro,nshl),
14     &          shp(nshl,ngauss),            shpfun(npro,nshl),
15     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
16     &          shglo(nsd,nshl,ngauss),      shpo(nshl,ngauss),
17     &          Qwtf(ngaussf),              acl(npro,nshl,ndof),
18     &          yl(npro,nshl,ndof),         xl(npro,nenl,nsd)
19      dimension stress(nshg,9),             stressl(npro,9),
20     &          stressli(npro,9),
21     &          dxdxi(npro,nsd,nsd),        dxidx(npro,nsd,nsd),
22     &          WdetJ(npro),                rho(npro),
23     &          tmp(npro),                  aci(npro,nsd),
24     &          pres(npro),                 u1(npro),
25     &          u2(npro),                   u3(npro)
26      dimension qres(nshg,nsd*nsd),         ql(npro,nshl,nsd*nsd),
27     &          g1yi(npro,ndof),            g2yi(npro,ndof),
28     &          g3yi(npro,ndof),            divqi(npro,3),
29     &          src(npro,nsd),             Temp(npro),
30     &          xx(npro,nsd),
31     &          rlsl(npro,nshl,6),         rlsli(npro,6),
32     &          rLui(npro,3),
33     &          tauC(npro),                tauM(npro),
34     &          tauBar(npro),              uBar(npro,nsd)
35      dimension Sij(npro,6),               Snorm(npro),
36     &          Snorm2(npro),              cdelsq(nshg),
37     &          xmudmi(npro,ngauss),        xmudmif(npro,ngauss),
38     &          dissi(npro,3),             dissl(npro,3),
39     &          voli(npro),                voll(npro),
40     &          vol(nshg),                 diss(nshg,3),
41     &          rmu(npro)
42
43      real*8    omega(3), divu(npro)
44
45c.... Note that the xmudmi passed in here is
46c.... evaluated at quadrature points of the flow. xmudmif will
47c...  be evaluated at the ngaussf quad. pts.
48
49      xmudmif = zero
50
51c.... Debuggin
52
53c      xmudmi = zero
54
55c.... Localization
56
57      call localy(y,      yl,     ien,    ndofl,  'gather  ')
58      call localy(ac,    acl,     ien,    ndofl,  'gather  ')
59      call localx(x,      xl,     ien,    nsd,    'gather  ')
60
61      if (idiff==1 .or. idiff==3) then ! global reconstruction of qdiff
62         call local (qres,   ql,     ien, nsd*nsd, 'gather  ')
63      endif
64
65      if ( idiff==2 .and. ires .eq. 1 ) then
66         call e3ql (yl,        shpo,       shglo,
67     &              xl,        ql,        xmudmi,
68     &              sgn)
69      endif
70
71      if( (iLES.gt.10).and.(iLES.lt.20)) then ! bardina
72         call local (rls, rlsl,     ien,       6, 'gather  ')
73      else
74         rlsl = zero
75      endif
76
77c... Now that everything is localized, begin loop over ngaussf quad. pts.
78
79
80      stressl = zero
81      dissl   = zero
82      voll    = zero
83
84      do intp = 1, ngaussf
85
86c
87c.... ------------->  Primitive variables at int. point  <--------------
88c
89       pres = zero
90       u1   = zero
91       u2   = zero
92       u3   = zero
93c
94       do n = 1, nshl
95          pres(:) = pres(:) + shp(n,intp) * yl(:,n,1)
96          u1(:)   = u1(:)   + shp(n,intp) * yl(:,n,2)
97          u2(:)   = u2(:)   + shp(n,intp) * yl(:,n,3)
98          u3(:)   = u3(:)   + shp(n,intp) * yl(:,n,4)
99       enddo
100
101c
102c.... ----------------------->  accel. at int. point  <----------------------
103c
104       aci = zero
105       do n = 1, nshl
106          aci(:,1) = aci(:,1) + shp(n,intp) * acl(:,n,2)
107          aci(:,2) = aci(:,2) + shp(n,intp) * acl(:,n,3)
108          aci(:,3) = aci(:,3) + shp(n,intp) * acl(:,n,4)
109       enddo
110
111
112c
113c.... --------------->  Element Metrics at int. point <-------------
114c
115c.... compute the deformation gradient
116c
117        dxdxi = zero
118c
119          do n = 1, nenl
120            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
121            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
122            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
123            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
124            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
125            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
126            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
127            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
128            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
129          enddo
130c
131c.... compute the inverse of deformation gradient
132c
133        dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
134     &                 - dxdxi(:,3,2) * dxdxi(:,2,3)
135        dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
136     &                 - dxdxi(:,1,2) * dxdxi(:,3,3)
137        dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
138     &                 - dxdxi(:,1,3) * dxdxi(:,2,2)
139        tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
140     &                       + dxidx(:,1,2) * dxdxi(:,2,1)
141     &                       + dxidx(:,1,3) * dxdxi(:,3,1) )
142        dxidx(:,1,1) = dxidx(:,1,1) * tmp
143        dxidx(:,1,2) = dxidx(:,1,2) * tmp
144        dxidx(:,1,3) = dxidx(:,1,3) * tmp
145        dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
146     &                - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
147        dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
148     &                - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
149        dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
150     &                - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
151        dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
152     &                - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
153        dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
154     &                - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
155        dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
156     &                - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
157c
158
159        wght=Qwtf(intp)
160        WdetJ = wght / tmp
161
162c    Obtain the global gradient of the shape functions at current qpt.
163
164      do n = 1,nshl
165        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
166     &              + shgl(2,n,intp) * dxidx(:,2,1)
167     &              + shgl(3,n,intp) * dxidx(:,3,1))
168        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
169     &              + shgl(2,n,intp) * dxidx(:,2,2)
170     &              + shgl(3,n,intp) * dxidx(:,3,2))
171        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
172     &              + shgl(2,n,intp) * dxidx(:,2,3)
173     &              + shgl(3,n,intp) * dxidx(:,3,3))
174      enddo
175
176c
177c.... compute the global gradient of u and P
178c
179c
180       g1yi = zero
181       g2yi = zero
182       g3yi = zero
183       do n = 1, nshl
184          g1yi(:,1) = g1yi(:,1) + shg(:,n,1) * yl(:,n,1)
185          g1yi(:,2) = g1yi(:,2) + shg(:,n,1) * yl(:,n,2)
186          g1yi(:,3) = g1yi(:,3) + shg(:,n,1) * yl(:,n,3)
187          g1yi(:,4) = g1yi(:,4) + shg(:,n,1) * yl(:,n,4)
188c
189          g2yi(:,1) = g2yi(:,1) + shg(:,n,2) * yl(:,n,1)
190          g2yi(:,2) = g2yi(:,2) + shg(:,n,2) * yl(:,n,2)
191          g2yi(:,3) = g2yi(:,3) + shg(:,n,2) * yl(:,n,3)
192          g2yi(:,4) = g2yi(:,4) + shg(:,n,2) * yl(:,n,4)
193c
194          g3yi(:,1) = g3yi(:,1) + shg(:,n,3) * yl(:,n,1)
195          g3yi(:,2) = g3yi(:,2) + shg(:,n,3) * yl(:,n,2)
196          g3yi(:,3) = g3yi(:,3) + shg(:,n,3) * yl(:,n,3)
197          g3yi(:,4) = g3yi(:,4) + shg(:,n,3) * yl(:,n,4)
198       enddo
199
200c.... Let us build the Sij tensor and its norms
201
202       Sij(:,1) = g1yi(:,2)
203       Sij(:,2) = g2yi(:,3)
204       Sij(:,3) = g3yi(:,4)
205       Sij(:,4) = (g2yi(:,2)+g1yi(:,3))*pt5
206       Sij(:,5) = (g3yi(:,2)+g1yi(:,4))*pt5
207       Sij(:,6) = (g3yi(:,3)+g2yi(:,4))*pt5
208
209       Snorm(:) = Sij(:,1)**2 + Sij(:,2)**2 + Sij(:,3)**2
210     &      + two*(Sij(:,4)**2 + Sij(:,5)**2 + Sij(:,6)**2)
211
212       Snorm2(:) = sqrt( two*(Sij(:,1)**2 + Sij(:,2)**2 + Sij(:,3)**2)
213     &      + four*(Sij(:,4)**2 + Sij(:,5)**2 + Sij(:,6)**2) )
214
215c... Let us build xmudmif at current quad pt. a la scatnu.f
216
217       do n = 1,nshl
218          xmudmif(:,intp) = xmudmif(:,intp) +
219     &         cdelsq(ien(:,n)) * Snorm2(:)*shp(n,intp)
220       enddo
221
222      rmu=datmat(1,2,1)
223      xmudmif(:,intp)=min(xmudmif(:,intp),1000.0*rmu(:)) !
224c                                don't let it get larger than 1000 mu
225      xmudmif(:,intp)=max(xmudmif(:,intp), zero) ! don't let (xmudmi) < 0
226
227c.... Debugging
228
229c      xmudmif(:,intp) = rmu(:)
230
231
232c
233c.... get necessary fluid properties (including the updated viscosity)
234c
235       do i = 1, npro
236          do n = 1, nshl
237             shpfun(i,n) = shp(n,intp)
238          enddo
239       enddo
240
241        call getdiff(yl, shpfun, xmudmif,xl, rmu, rho)
242
243
244       divqi = zero
245       if ( idiff >= 1 ) then
246c
247c.... compute divergence of diffusive flux vector, qi,i
248c
249          do n=1, nshl
250             divqi(:,1) = divqi(:,1) + shg(:,n,1)*ql(:,n,1 )
251     &                               + shg(:,n,2)*ql(:,n,4 )
252     &                               + shg(:,n,3)*ql(:,n,7 )
253
254             divqi(:,2) = divqi(:,2) + shg(:,n,1)*ql(:,n,2 )
255     &                               + shg(:,n,2)*ql(:,n,5 )
256     &                               + shg(:,n,3)*ql(:,n,8)
257
258             divqi(:,3) = divqi(:,3) + shg(:,n,1)*ql(:,n,3 )
259     &                               + shg(:,n,2)*ql(:,n,6 )
260     &                               + shg(:,n,3)*ql(:,n,9 )
261
262          enddo
263
264       endif                    ! diffusive flux computation
265
266c
267c.... take care of the body force term here
268c
269       src = zero
270       if(matflg(5,1) .ge. 1) then
271c
272         bfx      = datmat(1,5,1) ! Boussinesq, g*alfap
273         bfy      = datmat(2,5,1)
274         bfz      = datmat(3,5,1)
275
276         select case ( matflg(5,1) )
277            case ( 1 )               ! standard linear body force
278               src(:,1) = bfx
279               src(:,2) = bfy
280               src(:,3) = bfz
281            case ( 2 )               ! boussinesq body force
282               Temp = zero
283               do n = 1, nshl
284                  Temp = Temp + shp(n,intp) * yl(:,n,5)
285               enddo
286               Tref = datmat(2,2,1)
287               src(:,1) = bfx * (Temp(:)-Tref)
288               src(:,2) = bfy * (Temp(:)-Tref)
289               src(:,3) = bfz * (Temp(:)-Tref)
290            case ( 3 )               ! user specified f(x,y,z)
291               xx = zero
292               do n  = 1,nenl
293                  xx(:,1) = xx(:,1)  + shp(n,intp) * xl(:,n,1)
294                  xx(:,2) = xx(:,2)  + shp(n,intp) * xl(:,n,2)
295                  xx(:,3) = xx(:,3)  + shp(n,intp) * xl(:,n,3)
296               enddo
297
298               call e3source(xx, src)
299          end select
300
301       endif
302c
303c.... -------------------> Coriolis force  <-----------------
304c
305      omag=datmat(3,5,1)  ! frame rotation rate
306       if(omag.ne.0) then
307c
308c.... unit vector of axis of rotation currently selecting the i,j,k
309c
310          e1 = one/sqrt(3.0d0)
311          e2 = e1
312          e3 = e1
313
314          omega(1)=omag*e1
315          omega(2)=omag*e2
316          omega(3)=omag*e3
317
318          if(matflg(5,1) .ne. 3) then ! we need to calculate the int pt. coords
319             xx = zero
320             do n  = 1,nenl
321                xx(:,1) = xx(:,1)  + shp(n,intp) * xl(:,n,1)
322                xx(:,2) = xx(:,2)  + shp(n,intp) * xl(:,n,2)
323                xx(:,3) = xx(:,3)  + shp(n,intp) * xl(:,n,3)
324             enddo
325
326          endif
327c
328c  note that we calculate f as if it contains the usual source
329c  plus the Coriolis and the centrifugal forces taken to the rhs (sign change)
330c  as long as we are doing SUPG with no accounting for these terms in the
331c  LHS this is the only change (which will find its way to the RHS momentum
332c  equation (both Galerkin and SUPG parts)).
333c
334c  uncomment later if you want rotation always about z axis
335c                 orig_src - om x om x r       - two om x u
336c
337c$$$          src(:,1)=src(:,1)+omega(3)*omega(3)*xx(:,1)+two*omega(3)*u2
338c$$$          src(:,2)=src(:,2)+omega(3)*omega(3)*xx(:,2)-two*omega(3)*u1
339c
340c more general for testing
341c
342          src(:,1)=src(:,1)
343     &            -(omega(2)*(omega(1)*xx(:,2)-omega(2)*xx(:,1))
344     &             -omega(3)*(omega(3)*xx(:,1)-omega(1)*xx(:,3)))
345     &            -two*(omega(2)*u3-omega(3)*u2)
346          src(:,2)=src(:,2)
347     &            -(omega(3)*(omega(2)*xx(:,3)-omega(3)*xx(:,2))
348     &             -omega(1)*(omega(1)*xx(:,2)-omega(2)*xx(:,1)))
349     &            -two*(omega(3)*u1-omega(1)*u3)
350          src(:,3)=src(:,3)
351     &            -(omega(1)*(omega(3)*xx(:,1)-omega(1)*xx(:,3))
352     &             -omega(2)*(omega(2)*xx(:,3)-omega(3)*xx(:,2)))
353     &            -two*(omega(1)*u2-omega(2)*u1)
354       endif
355c
356c.... -------------------> momentum residual  <-----------------
357c
358       rLui(:,1) =(aci(:,1) + u1 * g1yi(:,2)
359     &                      + u2 * g2yi(:,2)
360     &                      + u3 * g3yi(:,2) - src(:,1) ) * rho
361     &           + g1yi(:,1)
362     &           - divqi(:,1)
363       rLui(:,2) =(aci(:,2) + u1 * g1yi(:,3)
364     &                      + u2 * g2yi(:,3)
365     &                      + u3 * g3yi(:,3) - src(:,2) ) * rho
366     &           + g2yi(:,1)
367     &           - divqi(:,2)
368       rLui(:,3) =(aci(:,3) + u1 * g1yi(:,4)
369     &                      + u2 * g2yi(:,4)
370     &                      + u3 * g3yi(:,4) - src(:,3) ) * rho
371     &           + g3yi(:,1)
372     &           - divqi(:,3)
373       if(iconvflow.eq.1) then
374          divu(:)  = (g1yi(:,2) + g2yi(:,3) + g3yi(:,4))*rho
375          rLui(:,1)=rlui(:,1)+u1*divu
376          rLui(:,2)=rlui(:,2)+u2*divu
377          rLui(:,3)=rlui(:,3)+u3*divu
378       endif
379
380c
381c.... compute the stabilization terms
382c
383        call e3stab (rho,          u1,       u2,
384     &               u3,           dxidx,    rLui,
385     &               rmu,          tauC,     tauM,
386     &               tauBar,       uBar )
387c
388c... Compute the SUPG stress at the current quad point multiplied
389c... by the quadrature point weight.
390c
391        stressli(:,1) = u1(:)*rLui(:,1)
392        stressli(:,2) = u1(:)*rLui(:,2)
393        stressli(:,3) = u1(:)*rLui(:,3)
394        stressli(:,4) = u2(:)*rLui(:,1)
395        stressli(:,5) = u2(:)*rLui(:,2)
396        stressli(:,6) = u2(:)*rLui(:,3)
397        stressli(:,7) = u3(:)*rLui(:,1)
398        stressli(:,8) = u3(:)*rLui(:,2)
399        stressli(:,9) = u3(:)*rLui(:,3)
400
401        if (iconvflow .eq. 1) then
402           stressli(:,1) = stressli(:,1) + u1(:)*rLui(:,1)
403           stressli(:,2) = stressli(:,2) + u2(:)*rLui(:,1)
404           stressli(:,3) = stressli(:,3) + u3(:)*rLui(:,1)
405           stressli(:,4) = stressli(:,4) + u1(:)*rLui(:,2)
406           stressli(:,5) = stressli(:,5) + u2(:)*rLui(:,2)
407           stressli(:,6) = stressli(:,6) + u3(:)*rLui(:,2)
408           stressli(:,7) = stressli(:,7) + u1(:)*rLui(:,3)
409           stressli(:,8) = stressli(:,8) + u2(:)*rLui(:,3)
410           stressli(:,9) = stressli(:,9) + u3(:)*rLui(:,3)
411        endif
412
413c.... Debugging
414
415c        stressli = two
416c        tauM     = one
417
418c.... Multiply  ui*Luj times tauM and times WdetJ
419
420        do l = 1, 9
421           do k = 1, npro
422              stressli(k,l) = stressli(k,l)*WdetJ(k)*tauM(k)
423           enddo
424        enddo
425
426c.... Obtain the SUPG energy dissipation (tau_{ij} S_{ij}) at the
427c.... current qpt.
428
429        dissi(:,1) = stressli(:,1)*Sij(:,1) + stressli(:,5)*Sij(:,2)
430     &       + stressli(:,9)*Sij(:,3) + stressli(:,4)*Sij(:,4)
431     &       + stressli(:,7)*Sij(:,5) + stressli(:,8)*Sij(:,6)
432     &       + stressli(:,2)*Sij(:,4) + stressli(:,3)*Sij(:,5)
433     &       + stressli(:,6)*Sij(:,6)
434
435c.... Obtain the eddy viscosity dissipation multiplied by WdetJ
436
437        dissi(:,2) = xmudmif(:,intp)*Snorm(:)*rho(:)*WdetJ(:)
438        dissi(:,3) = rmu(:)*Snorm(:)*rho(:)*WdetJ(:) ! Total dissipation
439c                                             from molec. and eddy
440
441c.... Debugging
442
443c        dissi(:,1) = two*WdetJ(:)
444c        dissi(:,2) = two*WdetJ(:)
445c        dissi(:,3) = two*WdetJ(:)
446
447c..... Volume of element
448
449        voli = WdetJ  ! Volume of element patch
450c
451c.... For debugging purposes let us keep track of rLui
452
453c        rLui(:,1) = rLui(:,1)*WdetJ(:)
454c        rLui(:,1) = rLui(:,2)*WdetJ(:)
455c        rLui(:,1) = rLui(:,3)*WdetJ(:)
456
457c.... Acumulate integration point contributions for each each element
458
459        do l = 1, 9
460           stressl(:,l) = stressl(:,l) + stressli(:,l)
461        enddo
462
463        do l = 1, 3
464           dissl(:,l) = dissl(:,l) + dissi(:,l)
465        enddo
466
467        voll = voll + voli
468
469      enddo    ! End loop over quadrature points.
470
471      do j = 1,nshl
472      do nel = 1,npro
473        stress(ien(nel,j),:) = stress(ien(nel,j),:) + stressl(nel,:)
474      enddo
475      enddo
476
477      do j = 1,nshl
478      do nel = 1,npro
479        diss(ien(nel,j),:) = diss(ien(nel,j),:) + dissl(nel,:)
480      enddo
481      enddo
482
483      do j = 1,nshl
484      do nel = 1,npro
485         vol(ien(nel,j)) = vol(ien(nel,j)) + voll(nel)
486      enddo
487      enddo
488
489      return
490      end
491      subroutine cpjdmcnoi (y,      shgl,      shp,
492     &                   iper,   ilwork,       x,
493     &                   rowp,   colm,
494     &                   iBC,    BC)
495
496      use pointer_data
497
498      use lhsGkeep ! This module stores the mass (Gram) matrix.
499
500      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
501c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
502c                    Shpf and shglf are the shape funciotns and their
503c                    gradient evaluated using the quadrature rule desired
504c                    for computing the dmod. Qwt contains the weights of the
505c                    quad. points.
506
507      include "common.h"
508      include "mpif.h"
509      include "auxmpi.h"
510
511c
512      dimension fres(nshg,24),         fwr(nshg),
513     &          strnrm(nshg),         cdelsq(nshg),
514     &          xnum(nshg),           xden(nshg),
515     &          xmij(nshg,6),         xlij(nshg,6),
516     &          xnude(nfath,2),        xnuder(nfath,2),
517     &          strl(numel,ngauss),
518     &          y(nshg,5),            yold(nshg,5),
519     &          iper(nshg),
520     &          ilwork(nlwork),
521     &          x(numnp,3),
522     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
523     &          pfres(nshg,22),                ifath(nshg),
524     &          nsons(nshg),                   iBC(nshg),
525     &          BC(nshg,ndofBC),               xnutf(nfath),
526     &          xnut(nshg)
527
528        integer   rowp(nshg*nnz),         colm(nshg+1)
529
530        real*8, allocatable, dimension(:,:,:) :: em
531
532
533      denom=max(1.0d0*(lstep),one)
534      if(dtavei.lt.0) then
535         wcur=one/denom
536      else
537         wcur=dtavei
538      endif
539      whist=1.0-wcur
540
541      if (istep .eq. 0) then
542         lhsG = zero
543      endif
544
545      fres = zero
546      yold(:,1)=y(:,4)
547      yold(:,2:4)=y(:,1:3)
548c
549c  hack in an interesting velocity field (uncomment to test dmod)
550c
551c      do i = 1, nshg  ! No periodicity for testing
552c      iper(i) = i
553c      enddo
554c      yold(:,5) = 1.0
555c      yold(:,2) = 3.0d0
556c      yold(:,2) = 2.0*x(:,1) - 3*x(:,2)
557c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
558c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
559c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
560c                               suitable for the
561
562
563      intrul=intg(1,itseq)
564      intind=intpt(intrul)
565
566      do iblk = 1,nelblk
567        lcsyst = lcblk(3,iblk)
568        iel  = lcblk(1,iblk) !Element number where this block begins
569        npro = lcblk(1,iblk+1) - iel
570        lelCat = lcblk(2,iblk)
571        nenl = lcblk(5,iblk)
572        nshl = lcblk(10,iblk)
573        inum  = iel + npro - 1
574
575        ngauss = nint(lcsyst)
576        ngaussf = nintf(lcsyst)
577
578        call hfilterBB (yold, x, mien(iblk)%p, fres,
579     &               shglf(lcsyst,:,1:nshl,:),
580     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
581
582
583        if ( istep.eq.0 ) then
584
585           allocate ( em(npro,nshl,nshl) )
586
587           call getgram2 (x, mien(iblk)%p,
588     &          shgl(lcsyst,:,1:nshl,:),  shp(lcsyst,1:nshl,:),
589     &          shglf(lcsyst,:,1:nshl,:), shpf(lcsyst,1:nshl,:), em,
590     &          Qwtf(lcsyst,1:ngaussf))
591
592           call fillsparseSclr (mien(iblk)%p,
593     &                          em,            lhsG,
594     &                          rowp,          colm)
595
596
597           deallocate ( em )
598
599        endif
600
601      enddo   ! End loop over element blocks
602c
603
604c      write(*,*)'Im here'
605
606      if(numpe>1) call commu (fres, ilwork, 24, 'in ')
607c
608c account for periodicity in filtered variables
609c
610      do j = 1,nshg
611        i = iper(j)
612        if (i .ne. j) then
613           fres(i,:) = fres(i,:) + fres(j,:)
614        endif
615      enddo
616
617      do j = 1,nshg
618        i = iper(j)
619        if (i .ne. j) then
620           fres(j,:) = zero
621        endif
622      enddo
623
624c     Need to zero off-processor slaves as well.
625
626      if (numpe.gt.1) then
627
628         numtask = ilwork(1)
629         itkbeg = 1
630
631c zero the nodes that are "solved" on the other processors
632
633         do itask = 1, numtask
634
635            iacc   = ilwork (itkbeg + 2)
636            numseg = ilwork (itkbeg + 4)
637
638            if (iacc .eq. 0) then
639               do is = 1,numseg
640                  isgbeg = ilwork (itkbeg + 3 + 2*is)
641                  lenseg = ilwork (itkbeg + 4 + 2*is)
642                  isgend = isgbeg + lenseg - 1
643                  fres(isgbeg:isgend,:) = zero
644               enddo
645            endif
646
647            itkbeg = itkbeg + 4 + 2*numseg
648
649         enddo
650
651      endif
652
653c... At this point fres has the right hand side vector (b) and lhsG has
654c... the Gram matrix (M_{AB}) (in sparse storage). Now we need to solve
655c... Ax = b using the conjugate gradient method to finish off the
656c... L2-projection.
657
658
659      do i = 1, 21
660         call sparseCG (fres(:,i), pfres(:,i), lhsG,
661     &        rowp, colm, iper, ilwork,
662     &        iBC,  BC)
663      enddo
664
665
666      write(*,*)'Done with least-squares projection'
667
668      do i = 1, 21
669         fres(:,i) = pfres(:,i)
670      enddo
671
672      fres(:,22) = one
673
674      do iblk = 1,nelblk
675        lcsyst = lcblk(3,iblk)
676        iel  = lcblk(1,iblk) !Element number where this block begins
677        npro = lcblk(1,iblk+1) - iel
678        lelCat = lcblk(2,iblk)
679        nenl = lcblk(5,iblk)
680        nshl = lcblk(10,iblk)
681        inum  = iel + npro - 1
682
683        ngauss = nint(lcsyst)
684
685        call getstrl (yold, x,      mien(iblk)%p,
686     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
687     &               shp(lcsyst,1:nshl,:))
688
689      enddo
690
691
692      strnrm = sqrt(
693     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
694     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
695
696      fwr = fwr1 * fres(:,22) * strnrm
697
698      xmij(:,1) = -fwr
699     &             * fres(:,10) + fres(:,16)
700      xmij(:,2) = -fwr
701     &             * fres(:,11) + fres(:,17)
702      xmij(:,3) = -fwr
703     &             * fres(:,12) + fres(:,18)
704
705      xmij(:,4) = -fwr * fres(:,13) + fres(:,19)
706      xmij(:,5) = -fwr * fres(:,14) + fres(:,20)
707      xmij(:,6) = -fwr * fres(:,15) + fres(:,21)
708
709      fres(:,22) = one / fres(:,22)
710
711      xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1) * fres(:,22)
712      xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2) * fres(:,22)
713      xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3) * fres(:,22)
714      xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2) * fres(:,22)
715      xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3) * fres(:,22)
716      xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3) * fres(:,22)
717
718      xnum =        xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2)
719     &                                    + xlij(:,3) * xmij(:,3)
720     &     + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5)
721     &                                    + xlij(:,6) * xmij(:,6))
722      xden =        xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2)
723     &                                    + xmij(:,3) * xmij(:,3)
724     &     + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5)
725     &                                    + xmij(:,6) * xmij(:,6))
726      xden = two * xden
727
728c
729c don't account for periodic nodes twice
730c
731      do j = 1,numnp
732        i = iper(j)
733        if (i .ne. j) then
734           xden(j) = zero
735           xnum(j) = zero
736        endif
737      enddo
738
739         if(numpe.gt.1) then
740c
741c.... nodes treated on another processor are eliminated
742c
743            numtask = ilwork(1)
744            itkbeg = 1
745
746            do itask = 1, numtask
747
748               iacc   = ilwork (itkbeg + 2)
749               numseg = ilwork (itkbeg + 4)
750
751               if (iacc .eq. 0) then
752                  do is = 1,numseg
753                     isgbeg = ilwork (itkbeg + 3 + 2*is)
754                     lenseg = ilwork (itkbeg + 4 + 2*is)
755                     isgend = isgbeg + lenseg - 1
756                     xnum(isgbeg:isgend) = zero
757                     xden(isgbeg:isgend) = zero
758                  enddo
759               endif
760
761               itkbeg = itkbeg + 4 + 2*numseg
762
763            enddo
764
765c            if (myrank.eq.0)then
766c               do i = 1, numnp
767c                  write(253,*)xnum(i),xden(i),myrank
768c               enddo
769c            endif
770c            if (myrank.eq.1)then
771c               do i = 1, numnp
772c                  write(254,*)xnum(i),xden(i),myrank
773c               enddo
774c            endif
775
776c            xnuml = sum(xnum)
777c            xdenl = sum(xden)
778
779            xnuml = zero
780            xdenl = zero
781            do i = 1, numnp
782               xnuml = xnuml + xnum(i)
783               xdenl = xdenl + xden(i)
784            enddo
785
786c            write(*,*)xnuml,xdenl,myrank
787
788            call drvAllreducesclr ( xnuml, xnumt )
789            call drvAllreducesclr ( xdenl, xdent )
790cd
791         else
792
793c            xnumt = sum(xnum)
794c            xdent = sum(xden)
795            xnumt = zero
796            xdent = zero
797            do i = 1, numnp
798               xnumt = xnumt + xnum(i)
799               xdent = xdent + xden(i)
800            enddo
801
802
803         endif
804
805         scalar = xnumt / (xdent + 1.d-09)
806         xnut = scalar
807
808
809      if (myrank .eq. 0)then
810         write(*,*) 'xnut=', xnut(100)
811      endif
812c      do i = 1, numnp
813c         write(*,*)xnumt/xdent,myrank
814c      enddo
815c
816      do iblk = 1,nelblk
817         lcsyst = lcblk(3,iblk)
818         iel  = lcblk(1,iblk)
819         npro = lcblk(1,iblk+1) - iel
820         lelCat = lcblk(2,iblk)
821         inum  = iel + npro - 1
822
823         ngauss = nint(lcsyst)
824
825         call scatnu (mien(iblk)%p, strl(iel:inum,:),
826     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
827      enddo
828c     $$$$$$$$$$$$$$$$$$$$$$$$$$$
829c$$$  tmp1 =  MINVAL(xmudmi)
830c$$$  tmp2 =  MAXVAL(xmudmi)
831c$$$  if(numpe>1) then
832c$$$  call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION,
833c$$$  &                 MPI_MIN, master, MPI_COMM_WORLD, ierr)
834c$$$  call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
835c$$$  &                 MPI_MAX, master, MPI_COMM_WORLD, ierr)
836c$$$      tmp1=tmp3
837c$$$  tmp2=tmp4
838c$$$  endif
839c$$$  if (myrank .EQ. master) then
840c$$$  write(35,*) lstep,tmp1,tmp2
841c$$$  call flush(35)
842c$$$  endif
843c $$$$$$$$$$$$$$$$$$$$$$$$$$$
844
845c
846c  if flag set, write a restart file with info (reuse xmij's memory)
847c
848      if(irs.eq.11) then
849         lstep=999
850         xmij(:,1)=xnum(:)
851         xmij(:,2)=xden(:)
852         xmij(:,3)=cdelsq(:)
853         xmij(:,5)=xlij(:,4)    !leave M_{12} in 4 and put L_{12} here
854         call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac
855         stop
856      endif
857c
858c  local clipping moved to scatnu with the creation of mxmudmi pointers
859c
860c$$$      rmu=datmat(1,2,1)
861c$$$      xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu
862c$$$      xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0
863c      stop !uncomment to test dmod
864c
865
866
867c  write out the nodal values of xnut (estimate since we don't calc strain
868c  there and must use the filtered strain).
869c
870
871
872
873      return
874      end
875
876c-----------------------------------------------------
877
878      subroutine getgram (x, ien, shgl, shp, em, Qwtf)
879
880      include "common.h"
881
882      dimension x(numnp,nsd),            xl(npro,nenl,nsd)
883      dimension ien(npro,nshl),
884     &          shgl(nsd,nshl,ngauss),    shp(nshl,ngauss),
885     &          em(npro,nshl,nshl),      Qwtf(ngaussf)
886
887      call localx(x,      xl,     ien,    nsd,    'gather  ')
888
889      call cmass(shp,shgl,xl,em)
890
891
892      return
893
894      end
895
896c----------------------------------------------------------------------
897
898
899      subroutine getgram2 (x, ien, shgl, shp, shglf, shpf, em, Qwtf)
900
901      include "common.h"
902
903      dimension x(numnp,nsd),            xl(npro,nenl,nsd)
904      dimension ien(npro,nshl),
905     &          shgl(nsd,nshl,ngauss),    shp(nshl,ngauss),
906     &          shglf(nsd,nshl,ngauss),   shpf(nshl,ngauss),
907     &          em(npro,nshl,nshl),      Qwtf(ngaussf)
908
909
910      call localx(x,      xl,     ien,    nsd,    'gather  ')
911
912      call cmassl(shp,shgl,shpf,shglf,xl,em,Qwtf)
913
914
915      return
916
917      end
918
919c-----------------------------------------------------------------------
920
921      subroutine getgram3 (x, ien, shgl, shp, shglf, shpf, em, Qwtf)
922
923      include "common.h"
924
925      dimension x(numnp,nsd),            xl(npro,nenl,nsd)
926      dimension ien(npro,nshl),
927     &          shgl(nsd,nshl,ngauss),    shp(nshl,ngauss),
928     &          shglf(nsd,nshl,ngauss),   shpf(nshl,ngauss),
929     &          em(npro,nshl,nshl),      Qwtf(ngaussf)
930
931
932      call localx(x,      xl,     ien,    nsd,    'gather  ')
933
934      call cmasstl(shp,shgl,shpf,shglf,xl,em,Qwtf)
935
936
937      return
938
939      end
940      subroutine cdelBHsq (y,      shgl,      shp,
941     &                   iper,   ilwork,
942     &                   nsons,  ifath,     x, cdelsq1)
943
944      use pointer_data
945
946      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
947c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
948c                    Shpf and shglf are the shape funciotns and their
949c                    gradient evaluated using the quadrature rule desired
950c                    for computing the dmod. Qwtf contains the weights of the
951c                    quad. points.
952
953      include "common.h"
954      include "mpif.h"
955      include "auxmpi.h"
956
957c
958      dimension fres(nshg,33),         fwr(nshg),
959     &          strnrm(nshg),         cdelsq1(nfath),
960     &          xnum(nshg),           xden(nshg),
961     &          xmij(nshg,6),         xlij(nshg,6),
962     &          xnude(nfath,2),        xnuder(nfath,2),
963     &          nsons(nshg),
964     &          strl(numel,ngauss),
965     &          y(nshg,5),            yold(nshg,5),
966     &          ifath(nshg),          iper(nshg),
967     &          ilwork(nlwork),
968     &          x(numnp,3),
969     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
970     &          xnutf(nfath),
971     &          hfres(nshg,16)
972
973c
974
975      fres = zero
976      hfres = zero
977
978      yold(:,1)=y(:,4)
979      yold(:,2:4)=y(:,1:3)
980
981c
982c  hack in an interesting velocity field (uncomment to test dmod)
983c
984c      yold(:,5) = 1.0  ! Debugging
985c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
986c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
987c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
988c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
989c                               suitable for the
990
991      do iblk = 1,nelblk
992        lcsyst = lcblk(3,iblk)
993        iel  = lcblk(1,iblk) !Element number where this block begins
994        npro = lcblk(1,iblk+1) - iel
995        lelCat = lcblk(2,iblk)
996        nenl = lcblk(5,iblk)
997        nshl = lcblk(10,iblk)
998        inum  = iel + npro - 1
999
1000        ngauss = nint(lcsyst)
1001        ngaussf = nintf(lcsyst)
1002
1003c        call hfilterB (yold, x, mien(iblk)%p, hfres,
1004c     &               shglf(lcsyst,:,1:nshl,:),
1005c     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
1006
1007        call hfilterC (yold, x, mien(iblk)%p, hfres,
1008     &               shglf(lcsyst,:,1:nshl,:),
1009     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
1010
1011      enddo
1012
1013      if(numpe>1) call commu (hfres, ilwork, 16, 'in ')
1014c
1015c... account for periodicity in filtered variables
1016c
1017      do j = 1,nshg  !    Add on-processor slave contribution to masters
1018        i = iper(j)
1019        if (i .ne. j) then
1020           hfres(i,:) = hfres(i,:) + hfres(j,:)
1021        endif
1022      enddo
1023      do j = 1,nshg ! Set on-processor slaves to be the same as masters
1024        i = iper(j)
1025        if (i .ne. j) then
1026           hfres(j,:) = hfres(i,:)
1027        endif
1028      enddo
1029
1030c... Set off-processor slaves to be the same as their masters
1031
1032      if(numpe>1)   call commu (hfres, ilwork, 16, 'out')
1033
1034
1035      hfres(:,16) = one / hfres(:,16) ! one/(volume of hat filter kernel)
1036
1037      do j = 1, 15
1038	hfres(:,j) = hfres(:,j) * hfres(:,16)
1039      enddo
1040
1041c... For debugging
1042
1043c      hfres(:,1) = 2.0*x(:,1) - 3.0*x(:,2)
1044c      hfres(:,2) = 3.0*x(:,1) + 4.0*x(:,2)
1045c      hfres(:,3) = 4.0*x(:,1) + x(:,2) + x(:,3)
1046
1047c... Done w/ h-filtering. Begin 2h-filtering.
1048
1049      do iblk = 1,nelblk
1050        lcsyst = lcblk(3,iblk)
1051        iel  = lcblk(1,iblk) !Element number where this block begins
1052        npro = lcblk(1,iblk+1) - iel
1053        lelCat = lcblk(2,iblk)
1054        nenl = lcblk(5,iblk)
1055        nshl = lcblk(10,iblk)
1056        inum  = iel + npro - 1
1057
1058        ngauss = nint(lcsyst)
1059        ngaussf = nintf(lcsyst)
1060
1061        call twohfilterB (yold, x, strl(iel:inum,:), mien(iblk)%p,
1062     &               fres, hfres, shgl(lcsyst,:,1:nshl,:),
1063     &               shp(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
1064
1065      enddo
1066c
1067
1068
1069      if(numpe>1) call commu (fres, ilwork, 33, 'in ')
1070c
1071c account for periodicity in filtered variables
1072c
1073      do j = 1,nshg
1074        i = iper(j)
1075        if (i .ne. j) then
1076           fres(i,:) = fres(i,:) + fres(j,:)
1077        endif
1078      enddo
1079
1080      do j = 1,nshg
1081        i = iper(j)
1082        if (i .ne. j) then
1083           fres(j,:) = fres(i,:)
1084        endif
1085      enddo
1086
1087      if(numpe>1)then
1088         call commu (fres, ilwork, 33, 'out')
1089      endif
1090
1091      fres(:,22) = one / fres(:,22)
1092      do j = 1,21
1093        fres(:,j) = fres(:,j) * fres(:,22)
1094      enddo
1095      do j = 23,33
1096        fres(:,j) = fres(:,j) * fres(:,22)
1097      enddo
1098
1099
1100      do iblk = 1,nelblk
1101        lcsyst = lcblk(3,iblk)
1102        iel  = lcblk(1,iblk) !Element number where this block begins
1103        npro = lcblk(1,iblk+1) - iel
1104        lelCat = lcblk(2,iblk)
1105        nenl = lcblk(5,iblk)
1106        nshl = lcblk(10,iblk)
1107        inum  = iel + npro - 1
1108
1109        ngauss = nint(lcsyst)
1110
1111        call getstrl (yold, x,      mien(iblk)%p,
1112     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
1113     &               shp(lcsyst,1:nshl,:))
1114
1115      enddo
1116
1117c
1118c... Obtain the hat-tilde strain rate norm at the nodes
1119c
1120
1121      strnrm = sqrt(
1122     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
1123     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
1124
1125      fwr = fwr1 * strnrm
1126
1127      xmij(:,1) = -fwr
1128     &             * fres(:,10) + fres(:,16)
1129      xmij(:,2) = -fwr
1130     &             * fres(:,11) + fres(:,17)
1131      xmij(:,3) = -fwr
1132     &             * fres(:,12) + fres(:,18)
1133
1134      xmij(:,4) = -fwr * fres(:,13) + fres(:,19)
1135      xmij(:,5) = -fwr * fres(:,14) + fres(:,20)
1136      xmij(:,6) = -fwr * fres(:,15) + fres(:,21)
1137
1138
1139      xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1)
1140      xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2)
1141      xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3)
1142      xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2)
1143      xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3)
1144      xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3)
1145
1146      xnum =        xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2)
1147     &                                    + xlij(:,3) * xmij(:,3)
1148     &     + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5)
1149     &                                    + xlij(:,6) * xmij(:,6))
1150      xden =        xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2)
1151     &                                    + xmij(:,3) * xmij(:,3)
1152     &     + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5)
1153     &                                    + xmij(:,6) * xmij(:,6))
1154      xden = two * xden
1155
1156c  zero on processor periodic nodes so that they will not be added twice
1157        do j = 1,numnp
1158          i = iper(j)
1159          if (i .ne. j) then
1160            xnum(j) = zero
1161            xden(j) = zero
1162          endif
1163        enddo
1164
1165      if (numpe.gt.1) then
1166
1167         numtask = ilwork(1)
1168         itkbeg = 1
1169
1170c zero the nodes that are "solved" on the other processors
1171         do itask = 1, numtask
1172
1173            iacc   = ilwork (itkbeg + 2)
1174            numseg = ilwork (itkbeg + 4)
1175
1176            if (iacc .eq. 0) then
1177               do is = 1,numseg
1178                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1179                  lenseg = ilwork (itkbeg + 4 + 2*is)
1180                  isgend = isgbeg + lenseg - 1
1181                  xnum(isgbeg:isgend) = zero
1182                  xden(isgbeg:isgend) = zero
1183               enddo
1184            endif
1185
1186            itkbeg = itkbeg + 4 + 2*numseg
1187
1188         enddo
1189
1190      endif
1191c
1192c Description of arrays.   Each processor has an array of length equal
1193c to the total number of fathers times 2 xnude(nfathers,2). One to collect
1194c the numerator and one to collect the denominator.  There is also an array
1195c of length nshg on each processor which tells the father number of each
1196c on processor node, ifath(nnshg).  Finally, there is an arry of length
1197c nfathers to tell the total (on all processors combined) number of sons
1198c for each father.
1199c
1200c  Now loop over nodes and accumlate the numerator and the denominator
1201c  to the father nodes.  Only on processor addition at this point.
1202c  Note that serrogate fathers are collect some for the case where some
1203c  sons are on another processor
1204c
1205      xnude = zero
1206      do i = 1,nshg
1207         xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i)
1208         xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i)
1209      enddo
1210
1211c
1212c Now  the true fathers and serrogates combine results and update
1213c each other.
1214c
1215      if(numpe .gt. 1)then
1216         call drvAllreduce(xnude, xnuder,2*nfath)
1217c
1218c  xnude is the sum of the sons for each father on this processor
1219c
1220c  xnuder is the sum of the sons for each father on all processor combined
1221c  (the same as if we had not partitioned the mesh for each processor)
1222c
1223c   For each father we have precomputed the number of sons (including
1224c   the sons off processor).
1225c
1226c   Now divide by number of sons to get the average (not really necessary
1227c   for dynamic model since ratio will cancel nsons at each father)
1228c
1229c         xnuder(:,1) = xnuder(:,1) ! / nsons(:)
1230c         xnuder(:,2) = xnuder(:,2) ! / nsons(:)
1231c
1232c  the next line is c \Delta^2
1233c
1234         xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09)
1235         do i = 1,nfath
1236            cdelsq1(i) = xnuder(i,1)
1237         enddo
1238      else
1239c
1240c     the next line is c \Delta^2, not nu_T but we want to save the
1241c     memory
1242c
1243         xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09)
1244         do i = 1,nfath
1245            cdelsq1(i) = xnude(i,1)
1246         enddo
1247      endif
1248
1249      if (myrank .eq. master) then
1250         if (numpe .gt. 1) then
1251            do i = 1, nfath
1252               write(22,*)i, xnuder(i,1)
1253            enddo
1254         else
1255            do i = 1, nfath
1256               write(22,*)i, xnude(i,1)
1257            enddo
1258         endif
1259      endif
1260      call flush(22)
1261
1262      do i = 1, nfath
1263         if (cdelsq1(i) .lt. zero) then
1264            cdelsq1(i) = zero
1265         endif
1266      enddo
1267
1268      return
1269      end
1270      subroutine SUPGdis (y,           ac,         shgl,
1271     &                  shp,         iper,       ilwork,
1272     &                  nsons,       ifath,      x,
1273     &                  iBC,    BC,  stabdis,    xavegt)
1274
1275
1276      use stats            !
1277      use pointer_data     ! brings in the pointers for the blocked arrays
1278      use local_mass
1279      use rlssave  ! Use the resolved Leonard stresses at the nodes.
1280      use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
1281c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
1282c                    Shpf and shglf are the shape funciotns and their
1283c                    gradient evaluated using the quadrature rule desired
1284c                    for computing the dmod. Qwt contains the weights of the
1285c                    quad. points.
1286
1287
1288
1289      include "common.h"
1290      include "mpif.h"
1291      include "auxmpi.h"
1292
1293
1294      dimension y(nshg,ndof),                  ac(nshg,ndof),
1295     &          yold(nshg,ndof),
1296     &          ifath(nshg),                   nsons(nshg),
1297     &          iper(nshg),                    ilwork(nlwork),
1298     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
1299     &          x(numnp,3),
1300     &          qres(nshg,nsd*nsd),             rmass(nshg),
1301     &          iBC(nshg),                      BC(nshg,ndofBC),
1302     &          cdelsq(nshg),                   vol(nshg),
1303     &          stress(nshg,9),                 diss(nshg,3),
1304     &          xave(nshg,12),                  xaveg(nfath,12),
1305     &          xavegr(nfath,12),               stabdis(nfath),
1306     &          dmodc(nfath),                   strl(numel,ngauss),
1307     &          xavegt(nfath,12)
1308
1309      character*5  cname
1310      character*30 fname
1311
1312      yold(:,1)=y(:,4)
1313      yold(:,2:4)=y(:,1:3)
1314
1315c
1316c  hack in an interesting velocity field (uncomment to test dmod)
1317c
1318c      yold(:,5) = 1.0  ! Debugging
1319c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
1320c      yold(:,2) = 2.0
1321c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
1322c      yold(:,3) = 3.0
1323c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
1324c      yold(:,4) = 4.0
1325c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
1326c                               suitable for the
1327
1328c.... First let us obtain cdelsq at each node in the domain.
1329c.... We use numNden which lives in the quadfilt module.
1330
1331      if ( (istep .eq. 0) ) then
1332         fname =  'dmodc.dat' // cname (myrank+1)
1333         open (99,file=fname,form='unformatted',status='unknown')
1334         read(99) dmodc
1335         close(99)
1336         cdelsq(:) = dmodc(ifath(:))
1337      else
1338         cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
1339      endif
1340
1341c      if (myrank .eq. master) then
1342c         do i = 1, nfath
1343c            write(*,*)'dmod=', dmodc(i)
1344c         enddo
1345c      endif
1346
1347      if ( istep .eq. (nstep(1)-1) ) then
1348         dmodc(ifath(:)) = cdelsq(:)
1349         fname =  'dmodc.dat' // cname (myrank+1)
1350         open (99,file=fname,form='unformatted', status='replace')
1351         write(99) dmodc
1352         close(99)
1353c         if (myrank .eq. master) then
1354c            do i = 1, nfath
1355c               write(*,*)'dmod=', dmodc(i)
1356c            enddo
1357c         endif
1358
1359      endif
1360
1361c      if (istep .eq. 0)
1362      do iblk = 1,nelblk
1363        lcsyst = lcblk(3,iblk)
1364        iel  = lcblk(1,iblk) !Element number where this block begins
1365        npro = lcblk(1,iblk+1) - iel
1366        lelCat = lcblk(2,iblk)
1367        nenl = lcblk(5,iblk)
1368        nshl = lcblk(10,iblk)
1369        inum  = iel + npro - 1
1370
1371        ngauss = nint(lcsyst)
1372
1373        call getstrl (yold, x,      mien(iblk)%p,
1374     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
1375     &               shp(lcsyst,1:nshl,:))
1376
1377      enddo
1378
1379      do iblk = 1,nelblk
1380         lcsyst = lcblk(3,iblk)
1381         iel  = lcblk(1,iblk)
1382         npro = lcblk(1,iblk+1) - iel
1383         lelCat = lcblk(2,iblk)
1384         inum  = iel + npro - 1
1385
1386         ngauss = nint(lcsyst)
1387
1388         call scatnu (mien(iblk)%p, strl(iel:inum,:),
1389     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
1390      enddo
1391c      endif
1392
1393
1394
1395        if (idiff==1 .or. idiff==3) then ! global reconstruction of qdiff
1396c
1397c loop over element blocks for the global reconstruction
1398c of the diffusive flux vector, q, and lumped mass matrix, rmass
1399c
1400           qres = zero
1401           rmass = zero
1402
1403           do iblk = 1, nelblk
1404              iel    = lcblk(1,iblk)
1405              lelCat = lcblk(2,iblk)
1406              lcsyst = lcblk(3,iblk)
1407              iorder = lcblk(4,iblk)
1408              nenl   = lcblk(5,iblk) ! no. of vertices per element
1409              nshl   = lcblk(10,iblk)
1410              mattyp = lcblk(7,iblk)
1411              ndofl  = lcblk(8,iblk)
1412              nsymdl = lcblk(9,iblk)
1413              npro   = lcblk(1,iblk+1) - iel
1414              ngauss = nint(lcsyst)
1415c
1416c.... compute and assemble diffusive flux vector residual, qres,
1417c     and lumped mass matrix, rmass
1418
1419              call AsIq (y,                x,
1420     &                   shp(lcsyst,1:nshl,:),
1421     &                   shgl(lcsyst,:,1:nshl,:),
1422     &                   mien(iblk)%p,     mxmudmi(iblk)%p,
1423     &                   qres,             rmass )
1424           enddo
1425
1426c
1427c.... form the diffusive flux approximation
1428c
1429           call qpbc( rmass, qres, iBC, BC, iper, ilwork )
1430c
1431        endif
1432
1433
1434c.... form the SUPG stresses well as dissipation due to eddy viscosity,
1435c...  and SUPG stabilization.
1436
1437
1438        stress = zero
1439        vol    = zero
1440        diss   = zero
1441
1442      do iblk = 1,nelblk
1443        lcsyst = lcblk(3,iblk)
1444        iel  = lcblk(1,iblk) !Element number where this block begins
1445        npro = lcblk(1,iblk+1) - iel
1446        lelCat = lcblk(2,iblk)
1447        nenl = lcblk(5,iblk)
1448        nshl = lcblk(10,iblk)
1449        inum  = iel + npro - 1
1450
1451        ngauss = nint(lcsyst)
1452        ngaussf = nintf(lcsyst)
1453
1454        call SUPGstress (y, ac, x, qres, mien(iblk)%p, mxmudmi(iblk)%p,
1455     &                   cdelsq, shglf(lcsyst,:,1:nshl,:),
1456     &                   shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf),
1457     &                   shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:),
1458     &                   stress, diss, vol)
1459
1460      enddo
1461
1462      if(numpe>1) call commu (stress, ilwork, 9, 'in ')
1463      if(numpe>1) call commu (diss, ilwork, 3, 'in ')
1464      if(numpe>1) call commu (vol, ilwork, 1, 'in ')
1465
1466c
1467c account for periodicity
1468c
1469      do j = 1,nshg
1470        i = iper(j)
1471        if (i .ne. j) then
1472           stress(i,:) = stress(i,:) + stress(j,:)
1473           diss(i,:)   = diss(i,:)   + diss(j,:)
1474           vol(i)      = vol(i)      + vol(j)
1475        endif
1476      enddo
1477
1478      do j = 1,nshg
1479        i = iper(j)
1480        if (i .ne. j) then
1481           stress(j,:) = stress(i,:)
1482           diss(j,:)   = diss(i,:)
1483           vol(j)      = vol(i)
1484        endif
1485      enddo
1486
1487      if(numpe>1) call commu (stress, ilwork, 9, 'out ')
1488      if(numpe>1) call commu (diss, ilwork, 3, 'out ')
1489      if(numpe>1) call commu (vol, ilwork, 1, 'out ')
1490
1491      vol = one / vol
1492      do i = 1, 9
1493         stress(:,i) = stress(:,i)*vol(:)
1494      enddo
1495      do i = 1, 3
1496         diss(:,i) = diss(:,i)*vol(:)
1497      enddo
1498
1499c---------- > Begin averaging dissipations and SUPG stress <--------------
1500
1501      do i = 1, 9
1502         xave(:,i) = stress(:,i)
1503      enddo
1504      xave(:,10) = diss(:,1)
1505      xave(:,11) = diss(:,2)
1506      xave(:,12) = diss(:,3)
1507
1508c  zero on processor periodic nodes so that they will not be added twice
1509        do j = 1,numnp
1510          i = iper(j)
1511          if (i .ne. j) then
1512            xave(j,:) = zero
1513          endif
1514        enddo
1515
1516      if (numpe.gt.1) then
1517
1518         numtask = ilwork(1)
1519         itkbeg = 1
1520
1521c zero the nodes that are "solved" on the other processors
1522         do itask = 1, numtask
1523
1524            iacc   = ilwork (itkbeg + 2)
1525            numseg = ilwork (itkbeg + 4)
1526
1527            if (iacc .eq. 0) then
1528               do is = 1,numseg
1529                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1530                  lenseg = ilwork (itkbeg + 4 + 2*is)
1531                  isgend = isgbeg + lenseg - 1
1532                  xave(isgbeg:isgend,:) = zero
1533               enddo
1534            endif
1535
1536            itkbeg = itkbeg + 4 + 2*numseg
1537
1538         enddo
1539
1540      endif
1541c
1542
1543      xaveg = zero
1544      do i = 1,nshg
1545         xaveg(ifath(i),:) = xaveg(ifath(i),:) + xave(i,:)
1546      enddo
1547
1548      if(numpe .gt. 1)then
1549         call drvAllreduce(xaveg, xavegr,12*nfath)
1550
1551         do m = 1, 12
1552            xavegr(:,m) = xavegr(:,m)/nsons(:)
1553         enddo
1554
1555c         if (myrank .eq. master) then
1556c            write(*,*)'diss=', xavegt(14,11), xavegr(14,11)
1557c         endif
1558
1559         do m = 1, 12
1560            xavegt(:,m) = xavegt(:,m) + xavegr(:,m)
1561         enddo
1562
1563         stabdis(:) = xavegr(:,10)
1564
1565      else
1566
1567         do m = 1, 12
1568            xaveg(:,m) = xaveg(:,m)/nsons(:)
1569         enddo
1570
1571         do m = 1, 12
1572            xavegt(:,m) = xavegt(:,m) + xaveg(:,m)
1573         enddo
1574
1575         stabdis(:) = xaveg(:,10)
1576
1577      endif
1578
1579c      if (myrank .eq. master) then
1580c         write(*,*)'diss=', xavegt(14,11), xavegr(14,11)
1581c      endif
1582
1583       if ( istep .eq. (nstep(1)-1) ) then
1584          if ( myrank .eq. master) then
1585
1586             do i = 1, nfath
1587c               write(376,*)xavegt(i,1),xavegt(i,2),xavegt(i,3)
1588c               write(377,*)xavegt(i,4),xavegt(i,5),xavegt(i,6)
1589c               write(378,*)xavegt(i,7),xavegt(i,8),xavegt(i,9)
1590                write(380,*)xavegt(i,10),xavegt(i,11),xavegt(i,12)
1591            enddo
1592
1593c            call flush(376)
1594c            call flush(377)
1595c            call flush(378)
1596            call flush(380)
1597
1598         endif
1599      endif
1600
1601
1602      return
1603
1604      end
1605      subroutine dmcSUPG(y,           ac,         shgl,
1606     &                  shp,         iper,       ilwork,
1607     &                  nsons,       ifath,      x,
1608     &                  iBC,    BC,  rowp,       colm,
1609     &                  xavegt, stabdis)
1610
1611      use lhsGkeep ! This module stores the mass (Gram) matrix.
1612      use stats            !
1613      use pointer_data     ! brings in the pointers for the blocked arrays
1614      use local_mass
1615      use rlssave  ! Use the resolved Leonard stresses at the nodes.
1616      use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
1617c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
1618c                    Shpf and shglf are the shape funciotns and their
1619c                    gradient evaluated using the quadrature rule desired
1620c                    for computing the dmod. Qwt contains the weights of the
1621c                    quad. points.
1622
1623
1624
1625      include "common.h"
1626      include "mpif.h"
1627      include "auxmpi.h"
1628
1629
1630      dimension y(nshg,ndof),                  ac(nshg,ndof),
1631     &          ifath(nshg),                   nsons(nshg),
1632     &          iper(nshg),                    ilwork(nlwork),
1633     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
1634     &          x(numnp,3),
1635     &          qres(nshg,nsd*nsd),             rmass(nshg),
1636     &          iBC(nshg),                      BC(nshg,ndofBC),
1637     &          cdelsq(nshg),                   vol(nshg),
1638     &          stress(nshg,9),                 diss(nshg,3),
1639     &          xave(nshg,12),                  xaveg(nfath,12),
1640     &          xavegr(nfath,12),               stabdis(nfath),
1641     &          yold(nshg,ndof),                xavegt(nfath,12),
1642     &          fres(nshg,24),                  pfres(nshg,24),
1643     &          cdel(nfath),                    xnume(nfath),
1644     &          xdeno(nfath),                    strl(numel,ngauss),
1645     &          rden(nshg),                     rnum(nshg)
1646
1647
1648      integer   rowp(nshg*nnz),         colm(nshg+1)
1649
1650      real*8, allocatable, dimension(:,:,:) :: em
1651
1652      real*8, allocatable, dimension(:,:) :: fakexmu
1653
1654
1655      yold(:,1)=y(:,4)
1656      yold(:,2:4)=y(:,1:3)
1657      fres = zero
1658
1659c
1660c  hack in an interesting velocity field (uncomment to test dmod)
1661c
1662c      yold(:,5) = 1.0  ! Debugging
1663c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
1664c      yold(:,2) = 2.0
1665c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
1666c      yold(:,3) = 3.0
1667c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
1668c      yold(:,4) = 4.0
1669c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
1670c                               suitable for the
1671
1672
1673      intrul=intg(1,itseq)
1674      intind=intpt(intrul)
1675
1676      do iblk = 1,nelblk
1677        lcsyst = lcblk(3,iblk)
1678        iel  = lcblk(1,iblk) !Element number where this block begins
1679        npro = lcblk(1,iblk+1) - iel
1680        lelCat = lcblk(2,iblk)
1681        nenl = lcblk(5,iblk)
1682        nshl = lcblk(10,iblk)
1683        inum  = iel + npro - 1
1684
1685        ngauss = nint(lcsyst)
1686        ngaussf = nintf(lcsyst)
1687
1688        call resSij (yold, x, mien(iblk)%p, fres,
1689     &               shglf(lcsyst,:,1:nshl,:),
1690     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
1691
1692        if ( istep.eq.0 ) then
1693
1694           allocate ( em(npro,nshl,nshl) )
1695
1696           call getgram2 (x, mien(iblk)%p,
1697     &          shgl(lcsyst,:,1:nshl,:),  shp(lcsyst,1:nshl,:),
1698     &          shglf(lcsyst,:,1:nshl,:), shpf(lcsyst,1:nshl,:), em,
1699     &          Qwtf(lcsyst,1:ngaussf))
1700
1701c           call getgram (x, mien(iblk)%p,
1702c     &          shgl(lcsyst,:,1:nshl,:),  shp(lcsyst,1:nshl,:),
1703c     &          em, Qwtf(lcsyst,1:ngaussf))
1704
1705           call fillsparseSclr (mien(iblk)%p,
1706     &                          em,            lhsG,
1707     &                          rowp,          colm)
1708
1709
1710           deallocate ( em )
1711
1712        endif
1713
1714      enddo   ! End loop over element blocks
1715c
1716
1717      if(numpe>1) call commu (fres, ilwork, 24, 'in ')
1718c
1719c account for periodicity in filtered variables
1720c
1721      do j = 1,nshg
1722        i = iper(j)
1723        if (i .ne. j) then
1724           fres(i,:) = fres(i,:) + fres(j,:)
1725        endif
1726      enddo
1727      do j = 1,nshg
1728        i = iper(j)
1729        if (i .ne. j) then
1730           fres(j,:) = fres(i,:)
1731        endif
1732      enddo
1733
1734      if(numpe>1)   call commu (fres, ilwork, 24, 'out')
1735
1736      fres(:,22) = one / fres(:,22)
1737      do j = 1,21
1738        fres(:,j) = fres(:,j) * fres(:,22)
1739      enddo
1740      pfres = fres
1741
1742c---- Needed for consistent projection
1743
1744c      if(numpe>1) call commu (fres, ilwork, 24, 'in ')
1745c
1746c account for periodicity in filtered variables
1747c
1748c      do j = 1,nshg
1749c        i = iper(j)
1750c        if (i .ne. j) then
1751c           fres(i,:) = fres(i,:) + fres(j,:)
1752c        endif
1753c      enddo
1754
1755c      do j = 1,nshg
1756c        i = iper(j)
1757c        if (i .ne. j) then
1758c           fres(j,:) = zero
1759c        endif
1760c      enddo
1761
1762c     Need to zero off-processor slaves as well.
1763
1764c      if (numpe.gt.1 .and. nsons(1).gt.1) then
1765
1766c         numtask = ilwork(1)
1767c         itkbeg = 1
1768
1769c zero the nodes that are "solved" on the other processors
1770
1771c         do itask = 1, numtask
1772
1773c            iacc   = ilwork (itkbeg + 2)
1774c            numseg = ilwork (itkbeg + 4)
1775
1776c            if (iacc .eq. 0) then
1777c               do is = 1,numseg
1778c                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1779c                  lenseg = ilwork (itkbeg + 4 + 2*is)
1780c                  isgend = isgbeg + lenseg - 1
1781c                  fres(isgbeg:isgend,:) = zero
1782c               enddo
1783c            endif
1784
1785c            itkbeg = itkbeg + 4 + 2*numseg
1786
1787c         enddo
1788
1789c      endif
1790
1791c... At this point fres has the right hand side vector (b) and lhsG has
1792c... the Gram matrix (M_{AB}) (in sparse storage). Now we need to solve
1793c... Ax = b using the conjugate gradient method to finish off the
1794c... L2-projection.
1795
1796
1797c      do i = 16, 16
1798c         call sparseCG (fres(:,i), pfres(:,i), lhsG,
1799c     &        rowp, colm, iper, ilwork,
1800c     &        iBC,  BC)
1801c         write(*,*) 'i=', i
1802c      enddo
1803
1804
1805c      write(*,*)'Done with least-squares projection'
1806
1807
1808
1809
1810
1811c.... First let us obtain cdelsq at each node in the domain.
1812c.... We use numNden which lives in the quadfilt module.
1813
1814      cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
1815c      cdelsq(:) = zero ! Debugging
1816
1817      if (istep .eq. 0) then
1818         xavegt = zero  ! For averaging dissipations and SUPG stresses
1819      endif
1820
1821        if (idiff==1 .or. idiff==3) then ! global reconstruction of qdiff
1822c
1823c loop over element blocks for the global reconstruction
1824c of the diffusive flux vector, q, and lumped mass matrix, rmass
1825c
1826           qres = zero
1827           rmass = zero
1828
1829           do iblk = 1, nelblk
1830              iel    = lcblk(1,iblk)
1831              lelCat = lcblk(2,iblk)
1832              lcsyst = lcblk(3,iblk)
1833              iorder = lcblk(4,iblk)
1834              nenl   = lcblk(5,iblk) ! no. of vertices per element
1835              nshl   = lcblk(10,iblk)
1836              mattyp = lcblk(7,iblk)
1837              ndofl  = lcblk(8,iblk)
1838              nsymdl = lcblk(9,iblk)
1839              npro   = lcblk(1,iblk+1) - iel
1840              ngauss = nint(lcsyst)
1841
1842              allocate ( fakexmu(npro,ngauss) )
1843              fakexmu = zero
1844
1845c
1846c.... compute and assemble diffusive flux vector residual, qres,
1847c     and lumped mass matrix, rmass
1848
1849              call AsIq (y,                x,
1850     &                   shp(lcsyst,1:nshl,:),
1851     &                   shgl(lcsyst,:,1:nshl,:),
1852     &                   mien(iblk)%p,     mxmudmi(iblk)%p,
1853     &                   qres,             rmass )
1854
1855              deallocate ( fakexmu )
1856           enddo
1857
1858c
1859c.... form the diffusive flux approximation
1860c
1861           call qpbc( rmass, qres, iBC, BC, iper, ilwork )
1862c
1863        endif
1864
1865
1866c.... form the SUPG stresses well as dissipation due to eddy viscosity,
1867c...  and SUPG stabilization.
1868
1869
1870        stress = zero
1871        vol    = zero
1872        diss   = zero
1873
1874      do iblk = 1,nelblk
1875        lcsyst = lcblk(3,iblk)
1876        iel  = lcblk(1,iblk) !Element number where this block begins
1877        npro = lcblk(1,iblk+1) - iel
1878        lelCat = lcblk(2,iblk)
1879        nenl = lcblk(5,iblk)
1880        nshl = lcblk(10,iblk)
1881        inum  = iel + npro - 1
1882
1883        ngauss = nint(lcsyst)
1884        ngaussf = nintf(lcsyst)
1885
1886        allocate ( fakexmu(npro,ngauss) )
1887        fakexmu = zero
1888
1889        call SUPGstress (y, ac, x, qres, mien(iblk)%p, fakexmu,
1890     &                   cdelsq, shglf(lcsyst,:,1:nshl,:),
1891     &                   shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf),
1892     &                   shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:),
1893     &                   stress, diss, vol)
1894
1895        deallocate ( fakexmu )
1896      enddo
1897
1898      if(numpe>1) call commu (stress, ilwork, 9, 'in ')
1899      if(numpe>1) call commu (diss, ilwork, 3, 'in ')
1900      if(numpe>1) call commu (vol, ilwork, 1, 'in ')
1901
1902c
1903c account for periodicity
1904c
1905      do j = 1,nshg
1906        i = iper(j)
1907        if (i .ne. j) then
1908           stress(i,:) = stress(i,:) + stress(j,:)
1909           diss(i,:)   = diss(i,:)   + diss(j,:)
1910           vol(i)      = vol(i)      + vol(j)
1911        endif
1912      enddo
1913
1914      do j = 1,nshg
1915        i = iper(j)
1916        if (i .ne. j) then
1917           stress(j,:) = stress(i,:)
1918           diss(j,:)   = diss(i,:)
1919           vol(j)      = vol(i)
1920        endif
1921      enddo
1922
1923      if(numpe>1) call commu (stress, ilwork, 9, 'out ')
1924      if(numpe>1) call commu (diss, ilwork, 3, 'out ')
1925      if(numpe>1) call commu (vol, ilwork, 1, 'out ')
1926
1927      vol = one / vol
1928      do i = 1, 9
1929         stress(:,i) = stress(:,i)*vol(:)
1930      enddo
1931      do i = 1, 3
1932         diss(:,i) = diss(:,i)*vol(:)
1933      enddo
1934
1935c---------- > Begin averaging dissipations and SUPG stress <--------------
1936
1937      do i = 1, 9
1938         xave(:,i) = stress(:,i)
1939      enddo
1940      xave(:,10) = diss(:,1)
1941      xave(:,11) = diss(:,2)
1942      xave(:,12) = pfres(:,16)
1943
1944c  zero on processor periodic nodes so that they will not be added twice
1945        do j = 1,numnp
1946          i = iper(j)
1947          if (i .ne. j) then
1948            xave(j,:) = zero
1949          endif
1950        enddo
1951
1952      if (numpe.gt.1) then
1953
1954         numtask = ilwork(1)
1955         itkbeg = 1
1956
1957c zero the nodes that are "solved" on the other processors
1958         do itask = 1, numtask
1959
1960            iacc   = ilwork (itkbeg + 2)
1961            numseg = ilwork (itkbeg + 4)
1962
1963            if (iacc .eq. 0) then
1964               do is = 1,numseg
1965                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1966                  lenseg = ilwork (itkbeg + 4 + 2*is)
1967                  isgend = isgbeg + lenseg - 1
1968                  xave(isgbeg:isgend,:) = zero
1969               enddo
1970            endif
1971
1972            itkbeg = itkbeg + 4 + 2*numseg
1973
1974         enddo
1975
1976      endif
1977c
1978
1979      xaveg = zero
1980      do i = 1,nshg
1981         xaveg(ifath(i),:) = xaveg(ifath(i),:) + xave(i,:)
1982      enddo
1983
1984      if(numpe .gt. 1)then
1985         call drvAllreduce(xaveg, xavegr,12*nfath)
1986
1987         do m = 1, 12
1988            xavegr(:,m) = xavegr(:,m)/nsons(:)
1989         enddo
1990
1991c         if (myrank .eq. master) then
1992c            write(*,*)'diss=', xavegt(14,11), xavegr(14,11)
1993c         endif
1994
1995         do m = 1, 12
1996            xavegt(:,m) = xavegt(:,m) + xavegr(:,m)
1997         enddo
1998
1999      else
2000
2001         do m = 1, 12
2002            xaveg(:,m) = xaveg(:,m)/nsons(:)
2003         enddo
2004
2005         do m = 1, 12
2006            xavegt(:,m) = xavegt(:,m) + xaveg(:,m)
2007         enddo
2008
2009      endif
2010
2011      if (myrank .eq. master) then
2012         write(*,*)'diss0=', xavegt(14,11), xavegr(14,11)
2013      endif
2014
2015      if ( istep .eq. (nstep(1)-1) ) then
2016         if ( myrank .eq. master) then
2017
2018            do i = 1, nfath
2019c               write(376,*)xavegt(i,1),xavegt(i,2),xavegt(i,3)
2020c               write(377,*)xavegt(i,4),xavegt(i,5),xavegt(i,6)
2021c               write(378,*)xavegt(i,7),xavegt(i,8),xavegt(i,9)
2022               write(381,*)xavegt(i,10),xavegt(i,11),xavegt(i,12)
2023            enddo
2024
2025c            call flush(376)
2026c            call flush(377)
2027c            call flush(378)
2028c            call flush(379)
2029            call flush(381)
2030         endif
2031      endif
2032
2033      rnum(ifath(:)) = numNden(:,1)
2034      rden(ifath(:)) = numNden(:,2)
2035
2036      if (numpe .gt. 1) then
2037      do i = 1, nfath
2038         if (stabdis(i) .gt. zero) then
2039            cdel(i) = (two*xavegr(i,11)-stabdis(i))/xavegr(i,12)
2040            xnume(i) = two*xavegr(i,11)-stabdis(i)
2041            xdeno(i) = xavegr(i,12)
2042         else
2043            xnume(i) = rnum(i)
2044            xdeno(i) = rden(i)
2045         endif
2046      enddo
2047      else
2048      do i = 1, nfath
2049         if (stabdis(i) .gt. zero) then
2050            cdel(i) = (two*xaveg(i,11)-stabdis(i))/xaveg(i,12)
2051            xnume(i) = two*xaveg(i,11)-stabdis(i)
2052            xdeno(i) = xaveg(i,12)
2053         else
2054            xnume(i) = rnum(i)
2055            xdeno(i) = rden(i)
2056         endif
2057      enddo
2058      endif
2059
2060      do i = 1, nfath
2061         if (xnume(i) .lt. zero) then
2062            xnume(i) = rnum(i)
2063            xdeno(i) = rden(i)
2064         endif
2065      enddo
2066
2067      do i = 1, nshg
2068            numNden(i,1) = xnume(ifath(i))
2069            numNden(i,2) = xdeno(ifath(i))
2070      enddo
2071
2072      cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2073
2074      do iblk = 1,nelblk
2075        lcsyst = lcblk(3,iblk)
2076        iel  = lcblk(1,iblk) !Element number where this block begins
2077        npro = lcblk(1,iblk+1) - iel
2078        lelCat = lcblk(2,iblk)
2079        nenl = lcblk(5,iblk)
2080        nshl = lcblk(10,iblk)
2081        inum  = iel + npro - 1
2082
2083        ngauss = nint(lcsyst)
2084
2085        call getstrl (yold, x,      mien(iblk)%p,
2086     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
2087     &               shp(lcsyst,1:nshl,:))
2088
2089      enddo
2090
2091
2092      do iblk = 1,nelblk
2093         lcsyst = lcblk(3,iblk)
2094         iel  = lcblk(1,iblk)
2095         npro = lcblk(1,iblk+1) - iel
2096         lelCat = lcblk(2,iblk)
2097         inum  = iel + npro - 1
2098
2099         ngauss = nint(lcsyst)
2100
2101         call scatnu (mien(iblk)%p, strl(iel:inum,:),
2102     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
2103      enddo
2104
2105      return
2106
2107      end
2108      subroutine FiltRat (y,      shgl,      shp,
2109     &                   iper,   ilwork,
2110     &                   nsons,  ifath,     x,   cdelsq1, fwr4,
2111     &                   fwr3)
2112
2113      use pointer_data
2114
2115      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
2116c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
2117c                    Shpf and shglf are the shape funciotns and their
2118c                    gradient evaluated using the quadrature rule desired
2119c                    for computing the dmod. Qwt contains the weights of the
2120c                    quad. points.
2121
2122      include "common.h"
2123      include "mpif.h"
2124      include "auxmpi.h"
2125
2126c
2127      dimension fres(nshg,24),         fwr(nshg),
2128     &          strnrm(nshg),         cdelsq1(nfath),
2129     &          xnum(nshg),           xden(nshg),
2130     &          xmij(nshg,6),         xlij(nshg,6),
2131     &          xnude(nfath,5),        xnuder(nfath,5),
2132     &          nsons(nshg),           xfac(nshg,5),
2133     &          strl(numel,ngauss),     xa(nfath,3),
2134     &          y(nshg,5),            yold(nshg,5),
2135     &          ifath(nshg),          iper(nshg),
2136     &          ilwork(nlwork),!        xmudmi(numel,ngauss),
2137     &          x(numnp,3),
2138     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
2139     &          xnutf(nfath),          xkap(nfath),
2140     &          fwr2(nshg),            fwr3(nshg),
2141     &          xlamb1(nfath),         xlamb2(nfath),
2142     &          fwr4(nshg)
2143c
2144
2145      fres = zero
2146      yold(:,1)=y(:,4)
2147      yold(:,2:4)=y(:,1:3)
2148c
2149c
2150c  hack in an interesting velocity field (uncomment to test dmod)
2151c
2152c      yold(:,5) = 1.0  ! Debugging
2153c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
2154c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
2155c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
2156c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
2157c                               suitable for the
2158
2159
2160      intrul=intg(1,itseq)
2161      intind=intpt(intrul)
2162
2163      do iblk = 1,nelblk
2164        lcsyst = lcblk(3,iblk)
2165        iel  = lcblk(1,iblk) !Element number where this block begins
2166        npro = lcblk(1,iblk+1) - iel
2167        lelCat = lcblk(2,iblk)
2168        nenl = lcblk(5,iblk)
2169        nshl = lcblk(10,iblk)
2170        inum  = iel + npro - 1
2171
2172        ngauss = nint(lcsyst)
2173        ngaussf = nintf(lcsyst)
2174
2175        call asithf (yold, x, strl(iel:inum,:), mien(iblk)%p, fres,
2176     &               shglf(lcsyst,:,1:nshl,:),
2177     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
2178
2179      enddo
2180c
2181
2182      if(numpe>1) call commu (fres, ilwork, 24, 'in ')
2183c
2184c account for periodicity in filtered variables
2185c
2186      do j = 1,nshg
2187        i = iper(j)
2188        if (i .ne. j) then
2189           fres(i,:) = fres(i,:) + fres(j,:)
2190        endif
2191      enddo
2192      do j = 1,nshg
2193        i = iper(j)
2194        if (i .ne. j) then
2195           fres(j,:) = fres(i,:)
2196        endif
2197      enddo
2198
2199      if(numpe>1)   call commu (fres, ilwork, 24, 'out')
2200
2201      fres(:,23) = one / fres(:,23)
2202      do j = 1,22
2203        fres(:,j) = fres(:,j) * fres(:,23)
2204      enddo
2205c
2206c.....at this point fres is really all of our filtered quantities
2207c     at the nodes
2208c
2209
2210      xlij(:,1) = fres(:,4) - fres(:,1)*fres(:,1)
2211      xlij(:,2) = fres(:,5) - fres(:,2)*fres(:,2)
2212      xlij(:,3) = fres(:,6) - fres(:,3)*fres(:,3)
2213      xlij(:,4) = fres(:,7) - fres(:,1)*fres(:,2)
2214      xlij(:,5) = fres(:,8) - fres(:,1)*fres(:,3)
2215      xlij(:,6) = fres(:,9) - fres(:,2)*fres(:,3)
2216
2217      strnrm = sqrt(
2218     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
2219     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
2220
2221      xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 +
2222     &     fres(:,12)**2
2223     &     + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
2224
2225      xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11)
2226     &     + xlij(:,3)*fres(:,12) +
2227     &     two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) +
2228     &     xlij(:,6)*fres(:,15)) )
2229
2230      xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17)
2231     &     + fres(:,12)*fres(:,18) +
2232     &     two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) +
2233     &     fres(:,15)*fres(:,21)) )
2234
2235      xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17)
2236     &     + xlij(:,3)*fres(:,18) +
2237     &     two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) +
2238     &     xlij(:,6)*fres(:,21))
2239
2240      xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17)
2241     &     + fres(:,18)*fres(:,18) +
2242     &     two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) +
2243     &     fres(:,21)*fres(:,21))
2244
2245
2246c      xfac(:,1) = one ! Debugging
2247c      xfac(:,2) = one
2248c      xfac(:,3) = two
2249c      xfac(:,4) = one
2250c      xfac(:,5) = one
2251
2252c  zero on processor periodic nodes so that they will not be added twice
2253
2254      do j = 1, nshg
2255          i = iper(j)
2256          if (i .ne. j) then
2257            xfac(j,1) = zero
2258            xfac(j,2) = zero
2259            xfac(j,3) = zero
2260            xfac(j,4) = zero
2261            xfac(j,5) = zero
2262          endif
2263       enddo
2264
2265      if (numpe.gt.1) then
2266
2267         numtask = ilwork(1)
2268         itkbeg = 1
2269
2270c zero the nodes that are "solved" on the other processors
2271         do itask = 1, numtask
2272
2273            iacc   = ilwork (itkbeg + 2)
2274            numseg = ilwork (itkbeg + 4)
2275
2276            if (iacc .eq. 0) then
2277               do is = 1,numseg
2278                  isgbeg = ilwork (itkbeg + 3 + 2*is)
2279                  lenseg = ilwork (itkbeg + 4 + 2*is)
2280                  isgend = isgbeg + lenseg - 1
2281                  xfac(isgbeg:isgend,:) = zero
2282               enddo
2283            endif
2284
2285            itkbeg = itkbeg + 4 + 2*numseg
2286
2287         enddo
2288
2289      endif
2290
2291c... Debugging
2292
2293      xatm1 = sum(xfac(:,1))
2294      xatm2 = sum(xfac(:,2))
2295      xatm3 = sum(xfac(:,3))
2296      xatm4 = sum(xfac(:,4))
2297      xatm5 = sum(xfac(:,5))
2298
2299
2300c
2301c Description of arrays.   Each processor has an array of length equal
2302c to the total number of fathers times 2 xnude(nfathers,2). One to collect
2303c the numerator and one to collect the denominator.  There is also an array
2304c of length nshg on each processor which tells the father number of each
2305c on processor node, ifath(nnshg).  Finally, there is an arry of length
2306c nfathers to tell the total (on all processors combined) number of sons
2307c for each father.
2308c
2309c  Now loop over nodes and accumlate the numerator and the denominator
2310c  to the father nodes.  Only on processor addition at this point.
2311c  Note that serrogate fathers are collect some for the case where some
2312c  sons are on another processor
2313c
2314      xnude = zero
2315      do i = 1,nshg
2316         xnude(ifath(i),1) = xnude(ifath(i),1) + xfac(i,1)
2317         xnude(ifath(i),2) = xnude(ifath(i),2) + xfac(i,2)
2318         xnude(ifath(i),3) = xnude(ifath(i),3) + xfac(i,3)
2319         xnude(ifath(i),4) = xnude(ifath(i),4) + xfac(i,4)
2320         xnude(ifath(i),5) = xnude(ifath(i),5) + xfac(i,5)
2321      enddo
2322
2323c
2324c Now  the true fathers and serrogates combine results and update
2325c each other.
2326c
2327      if(numpe .gt. 1)then
2328         call drvAllreduce(xnude, xnuder,5*nfath)
2329c
2330c  xnude is the sum of the sons for each father on this processor
2331c
2332c  xnuder is the sum of the sons for each father on all processor combined
2333c  (the same as if we had not partitioned the mesh for each processor)
2334c
2335c   For each father we have precomputed the number of sons (including
2336c   the sons off processor).
2337c
2338c   Now divide by number of sons to get the average (not really necessary
2339c   for dynamic model since ratio will cancel nsons at each father)
2340c
2341c         xnuder(:,1) = xnuder(:,1)  / nsons(:)
2342c         xnuder(:,2) = xnuder(:,2)  / nsons(:)
2343c         xnuder(:,3) = xnuder(:,3)  / nsons(:)
2344c         xnuder(:,4) = xnuder(:,4)  / nsons(:)
2345c         xnuder(:,5) = xnuder(:,5)  / nsons(:)
2346c
2347c  the next line are the  a, b, c coefficients in the quadratic eq.
2348c
2349
2350         do i = 1,nfath
2351            xa(i,1) = two*cdelsq1(i)*xnuder(i,1) +
2352     &           xnuder(i,2)
2353            xa(i,2) = four*cdelsq1(i)*xnuder(i,3) +
2354     &           xnuder(i,4)
2355            xa(i,3) = two*cdelsq1(i)*xnuder(i,5)
2356
2357c            xa(i,1) = xnuder(ifath(i),1) + ! Debugging
2358c     &           xnuder(ifath(i),2)
2359c            xa(i,2) = xnuder(ifath(i),3) +
2360c     &           xnuder(ifath(i),4)
2361c            xa(i,3) = xnuder(ifath(i),5)
2362
2363
2364         enddo
2365      else
2366
2367c         xnude(:,1) = xnude(:,1)  / nsons(:)
2368c         xnude(:,2) = xnude(:,2)  / nsons(:)
2369c         xnude(:,3) = xnude(:,3)  / nsons(:)
2370c         xnude(:,4) = xnude(:,4)  / nsons(:)
2371c         xnude(:,5) = xnude(:,5)  / nsons(:)
2372
2373         do i = 1,nfath
2374            xa(i,1) = two*cdelsq1(i)*xnude(i,1) +
2375     &           xnude(i,2)
2376            xa(i,2) = four*cdelsq1(i)*xnude(i,3) +
2377     &           xnude(i,4)
2378            xa(i,3) = two*cdelsq1(i)*xnude(i,5)
2379
2380c            xa(i,1) = xnude(ifath(i),1) + ! Debugging
2381c     &           xnude(ifath(i),2)
2382c            xa(i,2) = xnude(ifath(i),3) +
2383c     &           xnude(ifath(i),4)
2384c            xa(i,3) = xnude(ifath(i),5)
2385
2386         enddo
2387      endif
2388
2389c... Solve a*x*x - b*x + c
2390
2391
2392      do i = 1, nfath
2393
2394      xdisc = xa(i,2)**2 - four*xa(i,1)*xa(i,3)
2395
2396      if (xdisc .lt. zero) then
2397         write(*,*) '*********Warning on filter width ratio********'
2398      xlamb1(i) = fwr1
2399      xlamb2(i) = fwr1
2400      if (xdisc .lt. -0.5d0) then
2401         write(*,*) '*********Warning on filter width ratio********'
2402      endif
2403      endif
2404
2405      if (xdisc .eq. zero) then
2406      xlamb1(i) = xa(i,2) / (two*xa(i,1))
2407      xlamb2(i) = xa(i,2) / (two*xa(i,1))
2408      endif
2409
2410      if (xdisc .gt. zero) then
2411      xlamb1(i)= ( xa(i,2) + sqrt( xa(i,2)**2 - four*xa(i,1)*xa(i,3) ) )
2412     &     / (two*xa(i,1))
2413      xlamb2(i)= ( xa(i,2) - sqrt( xa(i,2)**2 - four*xa(i,1)*xa(i,3) ) )
2414     &     / (two*xa(i,1))
2415      endif
2416
2417      enddo
2418
2419      do i = 1, nshg
2420         fwr2(i) = xlamb1(ifath(i))
2421         fwr3(i) = xlamb2(ifath(i))
2422      enddo
2423
2424      if (myrank .eq. master) then
2425         do i = 1, nfath
2426            write(23,*)i,xlamb1(i), xlamb2(i)
2427         enddo
2428      endif
2429      call flush(23)
2430
2431
2432
2433      do i = 1, nfath
2434         xkap(i) = cdelsq1(i) / xlamb2(i)
2435         xa(i,1) = two*xkap(i)*xnuder(i,1)
2436         xa(i,2) = four*xkap(i)*xnuder(i,3) - xnuder(i,2)
2437         xa(i,3) = two*xkap(i)*xnuder(i,5) - xnuder(i,4)
2438
2439         xlamb1(i)= ( xa(i,2) + sqrt( xa(i,2)**2 - four*xa(i,1)*xa(i,3)
2440     &        ) )/ (two*xa(i,1))
2441         xlamb2(i)= ( xa(i,2) - sqrt( xa(i,2)**2 - four*xa(i,1)*xa(i,3)
2442     &        ) )/ (two*xa(i,1))
2443
2444      enddo
2445
2446
2447      if (myrank .eq. master) then
2448         do i = 1, nfath
2449            write(255,*)i, xlamb1(i), xlamb2(i)
2450         enddo
2451      endif
2452      call flush(255)
2453
2454      fwr4(:) = xlamb1(ifath(:))
2455
2456      return
2457      end
2458      subroutine DFWRsfdmc (y,      shgl,      shp,
2459     &                   iper,   ilwork,
2460     &                   nsons,  ifath,     x, fwr2, fwr3)
2461
2462      use pointer_data
2463
2464      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
2465c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
2466c                    Shpf and shglf are the shape funciotns and their
2467c                    gradient evaluated using the quadrature rule desired
2468c                    for computing the dmod. Qwt contains the weights of the
2469c                    quad. points.
2470
2471
2472
2473      include "common.h"
2474      include "mpif.h"
2475      include "auxmpi.h"
2476
2477c
2478      dimension fres(nshg,24),         fwr(nshg),
2479     &          strnrm(nshg),         cdelsq(nshg),
2480     &          cdelsq2(nshg),
2481     &          xnum(nshg),           xden(nshg),
2482     &          xmij(nshg,6),         xlij(nshg,6),
2483     &          xnude(nfath,2),        xnuder(nfath,2),
2484     &          ynude(nfath,6),        ynuder(nfath,6),
2485     &          ui(nfath,3),           snorm(nfath),
2486     &          uir(nfath,3),          snormr(nfath),
2487     &          xm(nfath,6),           xl(nfath,6)
2488      dimension xl1(nfath,6),          xl2(nfath,6),
2489     &          xl1r(nfath,6),         xl2r(nfath,6),
2490     &          xmr(nfath,6),          xlr(nfath,6),
2491     &          nsons(nshg),
2492     &          strl(numel,ngauss),
2493     &          y(nshg,5),            yold(nshg,5),
2494     &          ifath(nshg),          iper(nshg),
2495     &          ilwork(nlwork),!        xmudmi(numel,ngauss),
2496     &          x(numnp,3)
2497      dimension shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
2498     &          xnutf(nfath),         xfac(nshg,5),
2499     &          fwr2(nshg),           fwr3(nshg)
2500
2501      character*10 cname
2502      character*30 fname1, fname2, fname3, fname4, fname5, fname6,
2503     &             fname0
2504c
2505c
2506c   setup the weights for time averaging of cdelsq (now in quadfilt module)
2507c
2508      denom=max(1.0d0*(lstep),one)
2509      if(dtavei.lt.0) then
2510         wcur=one/denom
2511      else
2512         wcur=dtavei
2513      endif
2514      whist=1.0-wcur
2515
2516      if (istep .eq. 0) then
2517         xnd      = zero
2518         xmodcomp = zero
2519         xmcomp  = zero
2520         xlcomp  = zero
2521         xl1comp  = zero
2522         xl2comp  = zero
2523         ucomp    = zero
2524         scomp    = zero
2525      endif
2526
2527
2528      fres = zero
2529      yold(:,1)=y(:,4)
2530      yold(:,2:4)=y(:,1:3)
2531c
2532
2533c
2534c  hack in an interesting velocity field (uncomment to test dmod)
2535c
2536c      yold(:,5) = 1.0  ! Debugging
2537c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
2538c      yold(:,2) = 2.0
2539c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
2540c      yold(:,3) = 3.0
2541c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
2542c      yold(:,4) = 4.0
2543c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
2544c                               suitable for the
2545
2546
2547
2548      intrul=intg(1,itseq)
2549      intind=intpt(intrul)
2550
2551      do iblk = 1,nelblk
2552        lcsyst = lcblk(3,iblk)
2553        iel  = lcblk(1,iblk) !Element number where this block begins
2554        npro = lcblk(1,iblk+1) - iel
2555        lelCat = lcblk(2,iblk)
2556        nenl = lcblk(5,iblk)
2557        nshl = lcblk(10,iblk)
2558        inum  = iel + npro - 1
2559
2560        ngauss = nint(lcsyst)
2561
2562        call asithf (yold, x, strl(iel:inum,:), mien(iblk)%p, fres,
2563     &               shglf(lcsyst,:,1:nshl,:),
2564     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
2565
2566      enddo
2567c
2568
2569      if (ngaussf .ne. ngauss) then
2570      do iblk = 1,nelblk
2571        lcsyst = lcblk(3,iblk)
2572        iel  = lcblk(1,iblk) !Element number where this block begins
2573        npro = lcblk(1,iblk+1) - iel
2574        lelCat = lcblk(2,iblk)
2575        nenl = lcblk(5,iblk)
2576        nshl = lcblk(10,iblk)
2577        inum  = iel + npro - 1
2578
2579        ngauss = nint(lcsyst)
2580
2581        call getstrl (yold, x,      mien(iblk)%p,
2582     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
2583     &               shp(lcsyst,1:nshl,:))
2584
2585      enddo
2586      endif
2587c
2588c
2589C must fix for abc and dynamic model
2590c      if(iabc==1)   !are there any axisym bc's
2591c     &      call rotabc(res, iBC, BC,nflow, 'in ')
2592c
2593      if(numpe>1) call commu (fres, ilwork, 24, 'in ')
2594c
2595c account for periodicity in filtered variables
2596c
2597      do j = 1,nshg
2598        i = iper(j)
2599        if (i .ne. j) then
2600           fres(i,:) = fres(i,:) + fres(j,:)
2601        endif
2602      enddo
2603      do j = 1,nshg
2604        i = iper(j)
2605        if (i .ne. j) then
2606           fres(j,:) = fres(i,:)
2607        endif
2608      enddo
2609
2610      if(numpe>1)   call commu (fres, ilwork, 24, 'out')
2611
2612      fres(:,23) = one / fres(:,23)
2613      do j = 1,22
2614        fres(:,j) = fres(:,j) * fres(:,23)
2615      enddo
2616c     fres(:,24) = fres(:,24) * fres(:,23)
2617c
2618c.....at this point fres is really all of our filtered quantities
2619c     at the nodes
2620c
2621
2622      strnrm = sqrt(
2623     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
2624     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
2625
2626c      fwr = fwr1 * fres(:,22) * strnrm
2627      fwr = fwr3 * fres(:,22) * strnrm
2628
2629      xmij(:,1) = -fwr
2630     &             * fres(:,10) + fres(:,16)
2631      xmij(:,2) = -fwr
2632     &             * fres(:,11) + fres(:,17)
2633      xmij(:,3) = -fwr
2634     &             * fres(:,12) + fres(:,18)
2635
2636      xmij(:,4) = -fwr * fres(:,13) + fres(:,19)
2637      xmij(:,5) = -fwr * fres(:,14) + fres(:,20)
2638      xmij(:,6) = -fwr * fres(:,15) + fres(:,21)
2639
2640      fres(:,22) = one / fres(:,22)
2641
2642      xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1) * fres(:,22)
2643      xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2) * fres(:,22)
2644      xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3) * fres(:,22)
2645      xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2) * fres(:,22)
2646      xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3) * fres(:,22)
2647      xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3) * fres(:,22)
2648
2649      xnum =        xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2)
2650     &                                    + xlij(:,3) * xmij(:,3)
2651     &     + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5)
2652     &                                    + xlij(:,6) * xmij(:,6))
2653      xden =        xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2)
2654     &                                    + xmij(:,3) * xmij(:,3)
2655     &     + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5)
2656     &                                    + xmij(:,6) * xmij(:,6))
2657      xden = two * xden
2658
2659c... For collectection of statistics on dyn. model components
2660
2661      xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 +
2662     &     fres(:,12)**2
2663     &     + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
2664
2665      xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11)
2666     &     + xlij(:,3)*fres(:,12) +
2667     &     two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) +
2668     &     xlij(:,6)*fres(:,15)) )
2669
2670      xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17)
2671     &     + fres(:,12)*fres(:,18) +
2672     &     two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) +
2673     &     fres(:,15)*fres(:,21)) )
2674
2675      xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17)
2676     &     + xlij(:,3)*fres(:,18) +
2677     &     two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) +
2678     &     xlij(:,6)*fres(:,21))
2679
2680      xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17)
2681     &     + fres(:,18)*fres(:,18) +
2682     &     two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) +
2683     &     fres(:,21)*fres(:,21))
2684
2685c  zero on processor periodic nodes so that they will not be added twice
2686        do j = 1,numnp
2687          i = iper(j)
2688          if (i .ne. j) then
2689            xnum(j) = zero
2690            xden(j) = zero
2691            xfac(j,:) = zero
2692            xmij(j,:) = zero
2693            xlij(j,:) = zero
2694            fres(j,:) = zero
2695            strnrm(j) = zero
2696          endif
2697        enddo
2698
2699      if (numpe.gt.1) then
2700
2701         numtask = ilwork(1)
2702         itkbeg = 1
2703
2704c zero the nodes that are "solved" on the other processors
2705         do itask = 1, numtask
2706
2707            iacc   = ilwork (itkbeg + 2)
2708            numseg = ilwork (itkbeg + 4)
2709
2710            if (iacc .eq. 0) then
2711               do is = 1,numseg
2712                  isgbeg = ilwork (itkbeg + 3 + 2*is)
2713                  lenseg = ilwork (itkbeg + 4 + 2*is)
2714                  isgend = isgbeg + lenseg - 1
2715                  xnum(isgbeg:isgend) = zero
2716                  xden(isgbeg:isgend) = zero
2717                  strnrm(isgbeg:isgend) = zero
2718                  xfac(isgbeg:isgend,:) = zero
2719                  xmij(isgbeg:isgend,:) = zero
2720                  xlij(isgbeg:isgend,:) = zero
2721                  fres(isgbeg:isgend,:) = zero
2722               enddo
2723            endif
2724
2725            itkbeg = itkbeg + 4 + 2*numseg
2726
2727         enddo
2728
2729      endif
2730c
2731c Description of arrays.   Each processor has an array of length equal
2732c to the total number of fathers times 2 xnude(nfathers,2). One to collect
2733c the numerator and one to collect the denominator.  There is also an array
2734c of length nshg on each processor which tells the father number of each
2735c on processor node, ifath(nnshg).  Finally, there is an arry of length
2736c nfathers to tell the total (on all processors combined) number of sons
2737c for each father.
2738c
2739c  Now loop over nodes and accumlate the numerator and the denominator
2740c  to the father nodes.  Only on processor addition at this point.
2741c  Note that serrogate fathers are collect some for the case where some
2742c  sons are on another processor
2743c
2744      xnude = zero
2745      ynude = zero
2746      xm    = zero
2747      xl    = zero
2748      xl1   = zero
2749      xl2   = zero
2750      ui    = zero
2751      snorm = zero
2752
2753      do i = 1,nshg
2754         xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i)
2755         xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i)
2756
2757         ynude(ifath(i),1) = ynude(ifath(i),1) + xfac(i,1)
2758         ynude(ifath(i),2) = ynude(ifath(i),2) + xfac(i,2)
2759         ynude(ifath(i),3) = ynude(ifath(i),3) + xfac(i,3)
2760         ynude(ifath(i),4) = ynude(ifath(i),4) + xfac(i,4)
2761         ynude(ifath(i),5) = ynude(ifath(i),5) + xfac(i,5)
2762
2763         xm(ifath(i),1) = xm(ifath(i),1) + xmij(i,1)
2764         xm(ifath(i),2) = xm(ifath(i),2) + xmij(i,2)
2765         xm(ifath(i),3) = xm(ifath(i),3) + xmij(i,3)
2766         xm(ifath(i),4) = xm(ifath(i),4) + xmij(i,4)
2767         xm(ifath(i),5) = xm(ifath(i),5) + xmij(i,5)
2768         xm(ifath(i),6) = xm(ifath(i),6) + xmij(i,6)
2769
2770         xl(ifath(i),1) = xl(ifath(i),1) + xlij(i,1)
2771         xl(ifath(i),2) = xl(ifath(i),2) + xlij(i,2)
2772         xl(ifath(i),3) = xl(ifath(i),3) + xlij(i,3)
2773         xl(ifath(i),4) = xl(ifath(i),4) + xlij(i,4)
2774         xl(ifath(i),5) = xl(ifath(i),5) + xlij(i,5)
2775         xl(ifath(i),6) = xl(ifath(i),6) + xlij(i,6)
2776
2777         xl1(ifath(i),1) = xl1(ifath(i),1) + fres(i,4)
2778         xl1(ifath(i),2) = xl1(ifath(i),2) + fres(i,5)
2779         xl1(ifath(i),3) = xl1(ifath(i),3) + fres(i,6)
2780         xl1(ifath(i),4) = xl1(ifath(i),4) + fres(i,7)
2781         xl1(ifath(i),5) = xl1(ifath(i),5) + fres(i,8)
2782         xl1(ifath(i),6) = xl1(ifath(i),6) + fres(i,9)
2783
2784         xl2(ifath(i),1) = xl2(ifath(i),1) + fres(i,1)*fres(i,1)
2785         xl2(ifath(i),2) = xl2(ifath(i),2) + fres(i,2)*fres(i,2)
2786         xl2(ifath(i),3) = xl2(ifath(i),3) + fres(i,3)*fres(i,3)
2787         xl2(ifath(i),4) = xl2(ifath(i),4) + fres(i,1)*fres(i,2)
2788         xl2(ifath(i),5) = xl2(ifath(i),5) + fres(i,1)*fres(i,3)
2789         xl2(ifath(i),6) = xl2(ifath(i),6) + fres(i,2)*fres(i,3)
2790
2791         ui(ifath(i),1) = ui(ifath(i),1) + fres(i,1)
2792         ui(ifath(i),2) = ui(ifath(i),2) + fres(i,2)
2793         ui(ifath(i),3) = ui(ifath(i),3) + fres(i,3)
2794
2795         snorm(ifath(i)) = snorm(ifath(i)) + strnrm(i)
2796
2797      enddo
2798
2799c
2800c Now  the true fathers and serrogates combine results and update
2801c each other.
2802c
2803      if(numpe .gt. 1)then
2804         call drvAllreduce(xnude, xnuder,2*nfath)
2805         call drvAllreduce(ynude, ynuder,6*nfath)
2806         call drvAllreduce(xm, xmr,6*nfath)
2807         call drvAllreduce(xl, xlr,6*nfath)
2808         call drvAllreduce(xl1, xl1r,6*nfath)
2809         call drvAllreduce(xl2, xl2r,6*nfath)
2810         call drvAllreduce(ui, uir,3*nfath)
2811         call drvAllreduce(snorm, snormr,nfath)
2812
2813         do i = 1, nfath
2814            ynuder(i,6) = ( ynuder(i,4) - fwr1*ynuder(i,2) ) /
2815     &           ( two*ynuder(i,5) - four*fwr1*ynuder(i,3)
2816     &           + two*fwr1*fwr1*ynuder(i,1) )
2817         enddo
2818
2819         cdelsq2(:) = ynuder(ifath(:),6)  ! For comparison w/ cdelsq
2820c
2821c  xnude is the sum of the sons for each father on this processor
2822c
2823c  xnuder is the sum of the sons for each father on all processor combined
2824c  (the same as if we had not partitioned the mesh for each processor)
2825c
2826c   For each father we have precomputed the number of sons (including
2827c   the sons off processor).
2828c
2829c   Now divide by number of sons to get the average (not really necessary
2830c   for dynamic model since ratio will cancel nsons at each father)
2831c
2832         xnuder(:,1) = xnuder(:,1) / nsons(:)
2833         xnuder(:,2) = xnuder(:,2) / nsons(:)
2834
2835         do m = 1, 5
2836         ynuder(:,m) = ynuder(:,m)/nsons(:)
2837         enddo
2838         do m = 1,6
2839         xmr(:,m) = xmr(:,m)/nsons(:)
2840         xlr(:,m) = xlr(:,m)/nsons(:)
2841         xl1r(:,m) = xl1r(:,m)/nsons(:)
2842         xl2r(:,m) = xl2r(:,m)/nsons(:)
2843         enddo
2844
2845         uir(:,1) = uir(:,1)/nsons(:)
2846         uir(:,2) = uir(:,2)/nsons(:)
2847         uir(:,3) = uir(:,3)/nsons(:)
2848
2849         snormr(:) = snormr(:)/nsons(:)
2850c
2851cc  the next line is c \Delta^2
2852cc
2853cc         xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09)
2854cc         do i = 1,nshg
2855cc            cdelsq(i) = xnuder(ifath(i),1)
2856cc         enddo
2857
2858            numNden(:,1) = whist*numNden(:,1)+wcur*xnuder(ifath(:),1)
2859            numNden(:,2) = whist*numNden(:,2)+wcur*xnuder(ifath(:),2)
2860            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2861
2862c            cdelsq(:) = xnuder(ifath(:),1)/(xnuder(ifath(:),2)+1.d-09)
2863
2864            xnd(:,1) = xnd(:,1) + xnuder(:,1)
2865            xnd(:,2) = xnd(:,2) + xnuder(:,2)
2866
2867            xmodcomp(:,1) = xmodcomp(:,1)+ynuder(:,1)
2868            xmodcomp(:,2) = xmodcomp(:,2)+ynuder(:,2)
2869            xmodcomp(:,3) = xmodcomp(:,3)+ynuder(:,3)
2870            xmodcomp(:,4) = xmodcomp(:,4)+ynuder(:,4)
2871            xmodcomp(:,5) = xmodcomp(:,5)+ynuder(:,5)
2872
2873            xmcomp(:,:) = xmcomp(:,:)+xmr(:,:)
2874            xlcomp(:,:) = xlcomp(:,:)+xlr(:,:)
2875
2876            xl1comp(:,:) = xl1comp(:,:)+xl1r(:,:)
2877            xl2comp(:,:) = xl2comp(:,:)+xl2r(:,:)
2878
2879            ucomp(:,:) = ucomp(:,:)+uir(:,:)
2880            u1 = uir(32,1)
2881            scomp(:)   = scomp(:)+snormr(:)
2882
2883      else
2884
2885         xnude(:,1) = xnude(:,1)/nsons(:)
2886         xnude(:,2) = xnude(:,2)/nsons(:)
2887
2888         do m = 1, 5
2889         ynude(:,m) = ynude(:,m)/nsons(:)
2890         enddo
2891         do m = 1,6
2892         xm(:,m) = xm(:,m)/nsons(:)
2893         xl(:,m) = xl(:,m)/nsons(:)
2894         xl1(:,m) = xl1(:,m)/nsons(:)
2895         xl2(:,m) = xl2(:,m)/nsons(:)
2896         enddo
2897
2898         ui(:,1) = ui(:,1)/nsons(:)
2899         ui(:,2) = ui(:,2)/nsons(:)
2900         ui(:,3) = ui(:,3)/nsons(:)
2901
2902         snorm(:) = snorm(:)/nsons(:)
2903
2904c
2905c     the next line is c \Delta^2, not nu_T but we want to save the
2906c     memory
2907c
2908
2909cc         xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09)
2910cc        do i = 1,nshg
2911cc            cdelsq(i) = xnude(ifath(i),1)
2912cc         enddo
2913cc      endif
2914
2915         do i = 1, nfath
2916            ynude(i,6) = ( ynude(i,4) - fwr1*ynude(i,2) ) /
2917     &           ( two*ynude(i,5) - four*fwr1*ynude(i,3)
2918     &           + fwr1*fwr1*ynude(i,1) )
2919         enddo
2920
2921            numNden(:,1) = whist*numNden(:,1)+wcur*xnude(ifath(:),1)
2922            numNden(:,2) = whist*numNden(:,2)+wcur*xnude(ifath(:),2)
2923
2924            xnd(:,1) = xnd(:,1)+xnude(:,1)
2925            xnd(:,2) = xnd(:,2)+xnude(:,2)
2926
2927            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2928
2929c            cdelsq(:) = xnude(ifath(:),1)/(xnude(ifath(:),2))!+1.d-09)
2930
2931
2932          cdelsq2(:) = ynude(ifath(:),6)  ! For comparison w/ cdelsq
2933
2934            xmodcomp(:,1) = xmodcomp(:,1)+ynude(:,1)
2935            xmodcomp(:,2) = xmodcomp(:,2)+ynude(:,2)
2936            xmodcomp(:,3) = xmodcomp(:,3)+ynude(:,3)
2937            xmodcomp(:,4) = xmodcomp(:,4)+ynude(:,4)
2938            xmodcomp(:,5) = xmodcomp(:,5)+ynude(:,5)
2939
2940            xmcomp(:,:) = xmcomp(:,:)+xm(:,:)
2941            xlcomp(:,:) = xlcomp(:,:)+xl(:,:)
2942
2943            xl1comp(:,:) = xl1comp(:,:)+xl1(:,:)
2944            xl2comp(:,:) = xl2comp(:,:)+xl2(:,:)
2945
2946            ucomp(:,:) = ucomp(:,:)+ui(:,:)
2947            scomp(:)   = scomp(:)+snorm(:)
2948
2949         endif
2950
2951c         do i = 1, nfath
2952c            xmodcomp(i,:) = xmodcomp(i,:)/nsons(i)
2953c            xmcomp(i,:) = xmcomp(i,:)/nsons(i)
2954c            xlcomp(i,:) = xlcomp(i,:)/nsons(i)
2955c            xl2comp(i,:) = xl2comp(i,:)/nsons(i)
2956c            xl1comp(i,:) = xl1comp(i,:)/nsons(i)
2957c            xnd(i,:) = xnd(i,:)/nsons(i)
2958c            scomp(i) = scomp(i)/nsons(i)
2959c            ucomp(i,:) = ucomp(i,:)/nsons(i)
2960c         enddo
2961
2962         if (myrank .eq. master) then
2963            write(*,*)'istep, nstep=', istep, nstep(1)
2964         endif
2965
2966         if ( istep .eq. (nstep(1)-1) ) then
2967         if ( myrank .eq. master) then
2968
2969            do i = 1, nfath
2970            write(365,*)xmodcomp(i,1),xmodcomp(i,2),xmodcomp(i,3),
2971     &              xmodcomp(i,4),xmodcomp(i,5)
2972
2973            write(366,*)xmcomp(i,1),xmcomp(i,2),xmcomp(i,3)
2974            write(367,*)xmcomp(i,4),xmcomp(i,5),xmcomp(i,6)
2975
2976            write(368,*)xlcomp(i,1),xlcomp(i,2),xlcomp(i,3)
2977            write(369,*)xlcomp(i,4),xlcomp(i,5),xlcomp(i,6)
2978
2979            write(370,*)xl1comp(i,1),xl1comp(i,2),xl1comp(i,3)
2980            write(371,*)xl1comp(i,4),xl1comp(i,5),xl1comp(i,6)
2981
2982            write(372,*)xl2comp(i,1),xl2comp(i,2),xl2comp(i,3)
2983            write(373,*)xl2comp(i,4),xl2comp(i,5),xl2comp(i,6)
2984
2985            write(374,*)xnd(i,1),xnd(i,2),scomp(i)
2986            write(375,*)ucomp(i,1),ucomp(i,2),ucomp(i,3)
2987            enddo
2988
2989            call flush(365)
2990            call flush(366)
2991            call flush(367)
2992            call flush(368)
2993            call flush(369)
2994            call flush(370)
2995            call flush(371)
2996            call flush(372)
2997            call flush(373)
2998            call flush(374)
2999            call flush(375)
3000
3001c            close(852)
3002c            close(853)
3003c            close(854)
3004
3005         endif
3006         endif
3007
3008            if (myrank .eq. master) then
3009               write(*,*)'uit uic=', ucomp(32,1),u1
3010            endif
3011
3012 555     format(e14.7,4(2x,e14.7))
3013 556     format(e14.7,5(2x,e14.7))
3014
3015
3016
3017
3018c $$$$$$$$$$$$$$$$$$$$$$$$$$$
3019      tmp1 =  MINVAL(cdelsq)
3020      tmp2 =  MAXVAL(cdelsq)
3021      if(numpe>1) then
3022         call MPI_REDUCE (tmp1, tmp3, 1,MPI_DOUBLE_PRECISION,
3023     &        MPI_MIN, master, MPI_COMM_WORLD, ierr)
3024         call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
3025     &        MPI_MAX, master, MPI_COMM_WORLD, ierr)
3026         tmp1=tmp3
3027         tmp2=tmp4
3028      endif
3029      if (myrank .EQ. master) then !print CDelta^2 range
3030         write(34,*)lstep,tmp1,tmp2
3031         call flush(34)
3032      endif
3033c $$$$$$$$$$$$$$$$$$$$$$$$$$$
3034
3035      if (myrank .eq. master) then
3036         write(*,*) 'cdelsq=', cdelsq(1),cdelsq(2)
3037         write(*,*) 'cdelsq=', cdelsq2(1),cdelsq2(2)
3038         write(22,*) lstep, cdelsq(1)
3039         call flush(22)
3040      endif
3041
3042      do iblk = 1,nelblk
3043         lcsyst = lcblk(3,iblk)
3044         iel  = lcblk(1,iblk)
3045         npro = lcblk(1,iblk+1) - iel
3046         lelCat = lcblk(2,iblk)
3047         inum  = iel + npro - 1
3048
3049         ngauss = nint(lcsyst)
3050
3051         call scatnu (mien(iblk)%p, strl(iel:inum,:),
3052     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
3053      enddo
3054c     $$$$$$$$$$$$$$$$$$$$$$$$$$$
3055c$$$  tmp1 =  MINVAL(xmudmi)
3056c$$$  tmp2 =  MAXVAL(xmudmi)
3057c$$$  if(numpe>1) then
3058c$$$  call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION,
3059c$$$  &                 MPI_MIN, master, MPI_COMM_WORLD, ierr)
3060c$$$  call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
3061c$$$  &                 MPI_MAX, master, MPI_COMM_WORLD, ierr)
3062c$$$      tmp1=tmp3
3063c$$$  tmp2=tmp4
3064c$$$  endif
3065c$$$  if (myrank .EQ. master) then
3066c$$$  write(35,*) lstep,tmp1,tmp2
3067c$$$  call flush(35)
3068c$$$  endif
3069c $$$$$$$$$$$$$$$$$$$$$$$$$$$
3070
3071c
3072c  if flag set, write a restart file with info (reuse xmij's memory)
3073c
3074      if(irs.eq.11) then
3075         lstep=999
3076         xmij(:,1)=xnum(:)
3077         xmij(:,2)=xden(:)
3078         xmij(:,3)=cdelsq(:)
3079         xmij(:,5)=xlij(:,4)    !leave M_{12} in 4 and put L_{12} here
3080         call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac
3081         stop
3082      endif
3083c
3084c  local clipping moved to scatnu with the creation of mxmudmi pointers
3085c
3086c$$$      rmu=datmat(1,2,1)
3087c$$$      xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu
3088c$$$      xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0
3089c      stop !uncomment to test dmod
3090c
3091
3092
3093c  write out the nodal values of xnut (estimate since we don't calc strain
3094c  there and must use the filtered strain).
3095c
3096
3097      if ((irs .ge. 1) .and. (mod(lstep, ntout) .eq. 0)) then
3098c
3099c  collect the average strain into xnude(2)
3100c
3101         xnude(:,2) = zero
3102         do i = 1,numnp
3103            xnude(ifath(i),2) = xnude(ifath(i),2) + strnrm(i)
3104         enddo
3105
3106         if(numpe .gt. 1) then
3107             call drvAllreduce(xnude(:,2), xnuder(:,2),nfath)
3108          else
3109             xnuder=xnude
3110          endif
3111c
3112c          nut= cdelsq    * |S|
3113c
3114         xnutf=xnuder(:,1)*xnuder(:,2)/nsons(:)
3115c
3116c  collect the x and y coords into xnude
3117c
3118         xnude = zero
3119         do i = 1,numnp
3120            xnude(ifath(i),1) = xnude(ifath(i),1) + x(i,1)
3121            xnude(ifath(i),2) = xnude(ifath(i),2) + x(i,2)
3122         enddo
3123
3124         if(numpe .gt. 1)
3125     &        call drvAllreduce(xnude, xnuder,2*nfath)
3126         xnuder(:,1)=xnuder(:,1)/nsons(:)
3127         xnuder(:,2)=xnuder(:,2)/nsons(:)
3128c
3129c  xnude is the sum of the sons for each father on this processor
3130c
3131         if((myrank.eq.master)) then
3132            do i=1,nfath      ! cdelsq   * |S|
3133               write(444,*) xnuder(i,1),xnuder(i,2),xnutf(i)
3134            enddo
3135            call flush(444)
3136         endif
3137      endif
3138
3139      return
3140      end
3141      subroutine DFWRwfdmc (y,      shgl,      shp,
3142     &                   iper,   ilwork,
3143     &                   nsons,  ifath,     x,    fwr2, fwr3)
3144
3145      use pointer_data
3146
3147      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
3148c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
3149c                    Shpf and shglf are the shape funciotns and their
3150c                    gradient evaluated using the quadrature rule desired
3151c                    for computing the dmod. Qwtf contains the weights of the
3152c                    quad. points.
3153
3154      include "common.h"
3155      include "mpif.h"
3156      include "auxmpi.h"
3157
3158c
3159      dimension fres(nshg,33),         fwr(nshg),
3160     &          strnrm(nshg),         cdelsq(nshg),
3161     &          cdelsq2(nshg),
3162     &          xnum(nshg),           xden(nshg),
3163     &          xmij(nshg,6),         xlij(nshg,6),
3164     &          xnude(nfath,2),        xnuder(nfath,2),
3165     &          ynude(nfath,6),        ynuder(nfath,6),
3166     &          ui(nfath,3),           snorm(nfath),
3167     &          uir(nfath,3),          snormr(nfath)
3168      dimension xm(nfath,6),           xl(nfath,6),
3169     &          xl1(nfath,6),          xl2(nfath,6),
3170     &          xl1r(nfath,6),         xl2r(nfath,6),
3171     &          xmr(nfath,6),          xlr(nfath,6),
3172     &          nsons(nshg),
3173     &          strl(numel,ngauss),
3174     &          y(nshg,5),            yold(nshg,5),
3175     &          ifath(nshg),          iper(nshg),
3176     &          ilwork(nlwork),
3177     &          x(numnp,3),
3178     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
3179     &          xnutf(nfath),
3180     &          hfres(nshg,22),
3181     &          xfac(nshg,5),         fwr2(nshg),
3182     &          fwr3(nshg)
3183
3184      real*8 u1
3185
3186      character*10 cname
3187      character*30 fname1, fname2, fname3, fname4, fname5, fname6
3188c
3189
3190c
3191c
3192c   setup the weights for time averaging of cdelsq (now in quadfilt module)
3193c
3194
3195      denom=max(1.0d0*(lstep),one)
3196      if(dtavei.lt.0) then
3197         wcur=one/denom
3198      else
3199         wcur=dtavei
3200      endif
3201      whist=1.0-wcur
3202
3203      if (myrank .eq. master) then
3204         write(*,*)'istep=', istep
3205      endif
3206
3207      if (istep .eq. 0) then
3208         xnd      = zero
3209         xmodcomp = zero
3210         xmcomp  = zero
3211         xlcomp  = zero
3212         xl1comp  = zero
3213         xl2comp  = zero
3214         ucomp    = zero
3215         scomp    = zero
3216      endif
3217
3218
3219      fres = zero
3220      hfres = zero
3221
3222      yold(:,1)=y(:,4)
3223      yold(:,2:4)=y(:,1:3)
3224
3225c
3226c  hack in an interesting velocity field (uncomment to test dmod)
3227c
3228c      yold(:,5) = 1.0  ! Debugging
3229c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
3230c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
3231c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
3232c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
3233c                               suitable for the
3234
3235      do iblk = 1,nelblk
3236        lcsyst = lcblk(3,iblk)
3237        iel  = lcblk(1,iblk) !Element number where this block begins
3238        npro = lcblk(1,iblk+1) - iel
3239        lelCat = lcblk(2,iblk)
3240        nenl = lcblk(5,iblk)
3241        nshl = lcblk(10,iblk)
3242        inum  = iel + npro - 1
3243
3244        ngauss = nint(lcsyst)
3245        ngaussf = nintf(lcsyst)
3246
3247c        call hfilterBB (yold, x, mien(iblk)%p, hfres,
3248c     &               shglf(lcsyst,:,1:nshl,:),
3249c     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
3250
3251        call hfilterCC (yold, x, mien(iblk)%p, hfres,
3252     &               shglf(lcsyst,:,1:nshl,:),
3253     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
3254
3255      enddo
3256
3257      if(numpe>1) call commu (hfres, ilwork, 22, 'in ')
3258c
3259c... account for periodicity in filtered variables
3260c
3261      do j = 1,nshg  !    Add on-processor slave contribution to masters
3262        i = iper(j)
3263        if (i .ne. j) then
3264           hfres(i,:) = hfres(i,:) + hfres(j,:)
3265        endif
3266      enddo
3267      do j = 1,nshg ! Set on-processor slaves to be the same as masters
3268        i = iper(j)
3269        if (i .ne. j) then
3270           hfres(j,:) = hfres(i,:)
3271        endif
3272      enddo
3273
3274c... Set off-processor slaves to be the same as their masters
3275
3276      if(numpe>1)   call commu (hfres, ilwork, 22, 'out')
3277
3278
3279      hfres(:,16) = one / hfres(:,16) ! one/(volume filter kernel)
3280
3281      do j = 1, 15
3282	hfres(:,j) = hfres(:,j) * hfres(:,16)
3283      enddo
3284      do j = 17, 22
3285	hfres(:,j) = hfres(:,j) * hfres(:,16)
3286      enddo
3287
3288c... For debugging
3289
3290c      hfres(:,1) = 2.0*x(:,1) - 3.0*x(:,2)
3291c      hfres(:,2) = 3.0*x(:,1) + 4.0*x(:,2)
3292c      hfres(:,3) = 4.0*x(:,1) + x(:,2) + x(:,3)
3293
3294c... Done w/ h-filtering. Begin 2h-filtering.
3295
3296      do iblk = 1,nelblk
3297        lcsyst = lcblk(3,iblk)
3298        iel  = lcblk(1,iblk) !Element number where this block begins
3299        npro = lcblk(1,iblk+1) - iel
3300        lelCat = lcblk(2,iblk)
3301        nenl = lcblk(5,iblk)
3302        nshl = lcblk(10,iblk)
3303        inum  = iel + npro - 1
3304
3305        ngauss = nint(lcsyst)
3306        ngaussf = nintf(lcsyst)
3307
3308        call twohfilterBB (yold, x, strl(iel:inum,:), mien(iblk)%p,
3309     &               fres, hfres, shglf(lcsyst,:,1:nshl,:),
3310     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
3311
3312      enddo
3313c
3314
3315
3316      if(numpe>1) call commu (fres, ilwork, 33, 'in ')
3317c
3318c account for periodicity in filtered variables
3319c
3320      do j = 1,nshg
3321        i = iper(j)
3322        if (i .ne. j) then
3323           fres(i,:) = fres(i,:) + fres(j,:)
3324        endif
3325      enddo
3326
3327      do j = 1,nshg
3328        i = iper(j)
3329        if (i .ne. j) then
3330           fres(j,:) = fres(i,:)
3331        endif
3332      enddo
3333
3334      if(numpe>1)then
3335         call commu (fres, ilwork, 33, 'out')
3336      endif
3337
3338      fres(:,22) = one / fres(:,22)
3339      do j = 1,21
3340        fres(:,j) = fres(:,j) * fres(:,22)
3341      enddo
3342      do j = 23,33
3343        fres(:,j) = fres(:,j) * fres(:,22)
3344      enddo
3345
3346
3347      do iblk = 1,nelblk
3348        lcsyst = lcblk(3,iblk)
3349        iel  = lcblk(1,iblk) !Element number where this block begins
3350        npro = lcblk(1,iblk+1) - iel
3351        lelCat = lcblk(2,iblk)
3352        nenl = lcblk(5,iblk)
3353        nshl = lcblk(10,iblk)
3354        inum  = iel + npro - 1
3355
3356        ngauss = nint(lcsyst)
3357
3358        call getstrl (yold, x,      mien(iblk)%p,
3359     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
3360     &               shp(lcsyst,1:nshl,:))
3361
3362      enddo
3363
3364c
3365c... Obtain the hat-tilde strain rate norm at the nodes
3366c
3367
3368      strnrm = sqrt(
3369     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
3370     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
3371
3372c      fwr = fwr1 * strnrm
3373
3374      fwr = fwr1 * fwr3 * strnrm
3375
3376      xmij(:,1) = -fwr
3377     &             * fres(:,10) + fres(:,16)
3378      xmij(:,2) = -fwr
3379     &             * fres(:,11) + fres(:,17)
3380      xmij(:,3) = -fwr
3381     &             * fres(:,12) + fres(:,18)
3382
3383      xmij(:,4) = -fwr * fres(:,13) + fres(:,19)
3384      xmij(:,5) = -fwr * fres(:,14) + fres(:,20)
3385      xmij(:,6) = -fwr * fres(:,15) + fres(:,21)
3386
3387
3388      xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1)
3389      xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2)
3390      xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3)
3391      xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2)
3392      xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3)
3393      xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3)
3394
3395      xnum =        xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2)
3396     &                                    + xlij(:,3) * xmij(:,3)
3397     &     + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5)
3398     &                                    + xlij(:,6) * xmij(:,6))
3399      xden =        xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2)
3400     &                                    + xmij(:,3) * xmij(:,3)
3401     &     + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5)
3402     &                                    + xmij(:,6) * xmij(:,6))
3403      xden = two * xden
3404
3405c... For collectection of statistics on dyn. model components
3406
3407      xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 +
3408     &     fres(:,12)**2
3409     &     + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
3410
3411      xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11)
3412     &     + xlij(:,3)*fres(:,12) +
3413     &     two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) +
3414     &     xlij(:,6)*fres(:,15)) )
3415
3416      xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17)
3417     &     + fres(:,12)*fres(:,18) +
3418     &     two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) +
3419     &     fres(:,15)*fres(:,21)) )
3420
3421      xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17)
3422     &     + xlij(:,3)*fres(:,18) +
3423     &     two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) +
3424     &     xlij(:,6)*fres(:,21))
3425
3426      xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17)
3427     &     + fres(:,18)*fres(:,18) +
3428     &     two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) +
3429     &     fres(:,21)*fres(:,21))
3430
3431c  zero on processor periodic nodes so that they will not be added twice
3432        do j = 1,numnp
3433          i = iper(j)
3434          if (i .ne. j) then
3435            xnum(j) = zero
3436            xden(j) = zero
3437            xfac(j,:) = zero
3438            xmij(j,:) = zero
3439            xlij(j,:) = zero
3440            fres(j,:) = zero
3441            strnrm(j) = zero
3442          endif
3443        enddo
3444
3445      if (numpe.gt.1) then
3446
3447         numtask = ilwork(1)
3448         itkbeg = 1
3449
3450c zero the nodes that are "solved" on the other processors
3451         do itask = 1, numtask
3452
3453            iacc   = ilwork (itkbeg + 2)
3454            numseg = ilwork (itkbeg + 4)
3455
3456            if (iacc .eq. 0) then
3457               do is = 1,numseg
3458                  isgbeg = ilwork (itkbeg + 3 + 2*is)
3459                  lenseg = ilwork (itkbeg + 4 + 2*is)
3460                  isgend = isgbeg + lenseg - 1
3461                  xnum(isgbeg:isgend) = zero
3462                  xden(isgbeg:isgend) = zero
3463                  strnrm(isgbeg:isgend) = zero
3464                  xfac(isgbeg:isgend,:) = zero
3465                  xmij(isgbeg:isgend,:) = zero
3466                  xlij(isgbeg:isgend,:) = zero
3467                  fres(isgbeg:isgend,:) = zero
3468               enddo
3469            endif
3470
3471            itkbeg = itkbeg + 4 + 2*numseg
3472
3473         enddo
3474
3475      endif
3476c
3477c Description of arrays.   Each processor has an array of length equal
3478c to the total number of fathers times 2 xnude(nfathers,2). One to collect
3479c the numerator and one to collect the denominator.  There is also an array
3480c of length nshg on each processor which tells the father number of each
3481c on processor node, ifath(nnshg).  Finally, there is an arry of length
3482c nfathers to tell the total (on all processors combined) number of sons
3483c for each father.
3484c
3485c  Now loop over nodes and accumlate the numerator and the denominator
3486c  to the father nodes.  Only on processor addition at this point.
3487c  Note that serrogate fathers are collect some for the case where some
3488c  sons are on another processor
3489c
3490      xnude = zero
3491      ynude = zero
3492      xm    = zero
3493      xl    = zero
3494      xl1   = zero
3495      xl2   = zero
3496      ui    = zero
3497      snorm = zero
3498
3499      do i = 1,nshg
3500         xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i)
3501         xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i)
3502
3503         ynude(ifath(i),1) = ynude(ifath(i),1) + xfac(i,1)
3504         ynude(ifath(i),2) = ynude(ifath(i),2) + xfac(i,2)
3505         ynude(ifath(i),3) = ynude(ifath(i),3) + xfac(i,3)
3506         ynude(ifath(i),4) = ynude(ifath(i),4) + xfac(i,4)
3507         ynude(ifath(i),5) = ynude(ifath(i),5) + xfac(i,5)
3508
3509         xm(ifath(i),1) = xm(ifath(i),1) + xmij(i,1)
3510         xm(ifath(i),2) = xm(ifath(i),2) + xmij(i,2)
3511         xm(ifath(i),3) = xm(ifath(i),3) + xmij(i,3)
3512         xm(ifath(i),4) = xm(ifath(i),4) + xmij(i,4)
3513         xm(ifath(i),5) = xm(ifath(i),5) + xmij(i,5)
3514         xm(ifath(i),6) = xm(ifath(i),6) + xmij(i,6)
3515
3516         xl(ifath(i),1) = xl(ifath(i),1) + xlij(i,1)
3517         xl(ifath(i),2) = xl(ifath(i),2) + xlij(i,2)
3518         xl(ifath(i),3) = xl(ifath(i),3) + xlij(i,3)
3519         xl(ifath(i),4) = xl(ifath(i),4) + xlij(i,4)
3520         xl(ifath(i),5) = xl(ifath(i),5) + xlij(i,5)
3521         xl(ifath(i),6) = xl(ifath(i),6) + xlij(i,6)
3522
3523         xl1(ifath(i),1) = xl1(ifath(i),1) + fres(i,4)
3524         xl1(ifath(i),2) = xl1(ifath(i),2) + fres(i,5)
3525         xl1(ifath(i),3) = xl1(ifath(i),3) + fres(i,6)
3526         xl1(ifath(i),4) = xl1(ifath(i),4) + fres(i,7)
3527         xl1(ifath(i),5) = xl1(ifath(i),5) + fres(i,8)
3528         xl1(ifath(i),6) = xl1(ifath(i),6) + fres(i,9)
3529
3530         xl2(ifath(i),1) = xl2(ifath(i),1) + fres(i,1)*fres(i,1)
3531         xl2(ifath(i),2) = xl2(ifath(i),2) + fres(i,2)*fres(i,2)
3532         xl2(ifath(i),3) = xl2(ifath(i),3) + fres(i,3)*fres(i,3)
3533         xl2(ifath(i),4) = xl2(ifath(i),4) + fres(i,1)*fres(i,2)
3534         xl2(ifath(i),5) = xl2(ifath(i),5) + fres(i,1)*fres(i,3)
3535         xl2(ifath(i),6) = xl2(ifath(i),6) + fres(i,2)*fres(i,3)
3536
3537         ui(ifath(i),1) = ui(ifath(i),1) + fres(i,1)
3538         ui(ifath(i),2) = ui(ifath(i),2) + fres(i,2)
3539         ui(ifath(i),3) = ui(ifath(i),3) + fres(i,3)
3540
3541         snorm(ifath(i)) = snorm(ifath(i)) + strnrm(i)
3542
3543      enddo
3544
3545c
3546c Now  the true fathers and serrogates combine results and update
3547c each other.
3548c
3549      if(numpe .gt. 1)then
3550         call drvAllreduce(xnude, xnuder,2*nfath)
3551         call drvAllreduce(ynude, ynuder,6*nfath)
3552         call drvAllreduce(xm, xmr,6*nfath)
3553         call drvAllreduce(xl, xlr,6*nfath)
3554         call drvAllreduce(xl1, xl1r,6*nfath)
3555         call drvAllreduce(xl2, xl2r,6*nfath)
3556         call drvAllreduce(ui, uir,3*nfath)
3557         call drvAllreduce(snorm, snormr,nfath)
3558
3559         do i = 1, nfath
3560            ynuder(i,6) = ( ynuder(i,4) - fwr1*ynuder(i,2) ) /
3561     &           ( two*ynuder(i,5) - four*fwr1*ynuder(i,3)
3562     &           + two*fwr1*fwr1*ynuder(i,1) )
3563         enddo
3564
3565         cdelsq2(:) = ynuder(ifath(:),6)  ! For comparison w/ cdelsq
3566c
3567c  xnude is the sum of the sons for each father on this processor
3568c
3569c  xnuder is the sum of the sons for each father on all processor combined
3570c  (the same as if we had not partitioned the mesh for each processor)
3571c
3572c   For each father we have precomputed the number of sons (including
3573c   the sons off processor).
3574c
3575c   Now divide by number of sons to get the average (not really necessary
3576c   for dynamic model since ratio will cancel nsons at each father)
3577c
3578         xnuder(:,1) = xnuder(:,1) / nsons(:)
3579         xnuder(:,2) = xnuder(:,2) / nsons(:)
3580
3581         do m = 1, 5
3582         ynuder(:,m) = ynuder(:,m)/nsons(:)
3583         enddo
3584         do m = 1,6
3585         xmr(:,m) = xmr(:,m)/nsons(:)
3586         xlr(:,m) = xlr(:,m)/nsons(:)
3587         xl1r(:,m) = xl1r(:,m)/nsons(:)
3588         xl2r(:,m) = xl2r(:,m)/nsons(:)
3589         enddo
3590
3591         uir(:,1) = uir(:,1)/nsons(:)
3592         uir(:,2) = uir(:,2)/nsons(:)
3593         uir(:,3) = uir(:,3)/nsons(:)
3594
3595         snormr(:) = snormr(:)/nsons(:)
3596
3597c
3598cc  the next line is c \Delta^2
3599cc
3600cc         xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09)
3601cc         do i = 1,nshg
3602cc            cdelsq(i) = xnuder(ifath(i),1)
3603cc         enddo
3604
3605            numNden(:,1) = whist*numNden(:,1)+wcur*xnuder(ifath(:),1)
3606            numNden(:,2) = whist*numNden(:,2)+wcur*xnuder(ifath(:),2)
3607            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
3608
3609c            cdelsq(:) = xnuder(ifath(:),1)/(xnuder(ifath(:),2)+1.d-09)
3610
3611            xnd(:,1) = xnd(:,1) + xnuder(:,1)
3612            xnd(:,2) = xnd(:,2) + xnuder(:,2)
3613
3614            xmodcomp(:,1) = xmodcomp(:,1)+ynuder(:,1)
3615            xmodcomp(:,2) = xmodcomp(:,2)+ynuder(:,2)
3616            xmodcomp(:,3) = xmodcomp(:,3)+ynuder(:,3)
3617            xmodcomp(:,4) = xmodcomp(:,4)+ynuder(:,4)
3618            xmodcomp(:,5) = xmodcomp(:,5)+ynuder(:,5)
3619
3620            xmcomp(:,:) = xmcomp(:,:)+xmr(:,:)
3621            xlcomp(:,:) = xlcomp(:,:)+xlr(:,:)
3622
3623            xl1comp(:,:) = xl1comp(:,:)+xl1r(:,:)
3624            xl2comp(:,:) = xl2comp(:,:)+xl2r(:,:)
3625
3626            ucomp(:,:) = ucomp(:,:)+uir(:,:)
3627            u1 = uir(32,1)
3628            scomp(:)   = scomp(:)+snormr(:)
3629
3630      else
3631
3632         xnude(:,1) = xnude(:,1)/nsons(:)
3633         xnude(:,2) = xnude(:,2)/nsons(:)
3634
3635         do m = 1, 5
3636         ynude(:,m) = ynude(:,m)/nsons(:)
3637         enddo
3638         do m = 1,6
3639         xm(:,m) = xm(:,m)/nsons(:)
3640         xl(:,m) = xl(:,m)/nsons(:)
3641         xl1(:,m) = xl1(:,m)/nsons(:)
3642         xl2(:,m) = xl2(:,m)/nsons(:)
3643         enddo
3644
3645         ui(:,1) = ui(:,1)/nsons(:)
3646         ui(:,2) = ui(:,2)/nsons(:)
3647         ui(:,3) = ui(:,3)/nsons(:)
3648
3649         snorm(:) = snorm(:)/nsons(:)
3650c
3651c     the next line is c \Delta^2, not nu_T but we want to save the
3652c     memory
3653c
3654
3655cc         xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09)
3656cc        do i = 1,nshg
3657cc            cdelsq(i) = xnude(ifath(i),1)
3658cc         enddo
3659cc      endif
3660
3661         do i = 1, nfath
3662            ynude(i,6) = ( ynude(i,4) - fwr1*ynude(i,2) ) /
3663     &           ( two*ynude(i,5) - four*fwr1*ynude(i,3)
3664     &           + fwr1*fwr1*ynude(i,1) )
3665         enddo
3666
3667            numNden(:,1) = whist*numNden(:,1)+wcur*xnude(ifath(:),1)
3668            numNden(:,2) = whist*numNden(:,2)+wcur*xnude(ifath(:),2)
3669
3670            xnd(:,1) = xnd(:,1)+xnude(:,1)
3671            xnd(:,2) = xnd(:,2)+xnude(:,2)
3672
3673            cdelsq(:) = numNden(:,1) / (numNden(:,2)) ! + 1.d-09)
3674
3675c            cdelsq(:) = xnude(ifath(:),1)/(xnude(ifath(:),2))!+1.d-09)
3676
3677
3678          cdelsq2(:) = ynude(ifath(:),6)  ! For comparison w/ cdelsq
3679
3680            xmodcomp(:,1) = xmodcomp(:,1)+ynude(:,1)
3681            xmodcomp(:,2) = xmodcomp(:,2)+ynude(:,2)
3682            xmodcomp(:,3) = xmodcomp(:,3)+ynude(:,3)
3683            xmodcomp(:,4) = xmodcomp(:,4)+ynude(:,4)
3684            xmodcomp(:,5) = xmodcomp(:,5)+ynude(:,5)
3685
3686            xmcomp(:,:) = xmcomp(:,:)+xm(:,:)
3687            xlcomp(:,:) = xlcomp(:,:)+xl(:,:)
3688
3689            xl1comp(:,:) = xl1comp(:,:)+xl1(:,:)
3690            xl2comp(:,:) = xl2comp(:,:)+xl2(:,:)
3691
3692            ucomp(:,:) = ucomp(:,:)+ui(:,:)
3693            u1 = ui(32,1)
3694            scomp(:)   = scomp(:)+snorm(:)
3695
3696         endif
3697
3698
3699c         do i = 1, nfath
3700c            xmodcomp(i,:) = xmodcomp(i,:)/nsons(i)
3701c            xmcomp(i,:) = xmcomp(i,:)/nsons(i)
3702c            xlcomp(i,:) = xlcomp(i,:)/nsons(i)
3703c            xl2comp(i,:) = xl2comp(i,:)/nsons(i)
3704c            xl1comp(i,:) = xl1comp(i,:)/nsons(i)
3705c            xnd(i,:) = xnd(i,:)/nsons(i)
3706c            scomp(i) = scomp(i)/nsons(i)
3707c            ucomp(i,:) = ucomp(i,:)/nsons(i)
3708c         enddo
3709
3710         if ( istep .eq. (nstep(1)-1) ) then
3711         if ( myrank .eq. master) then
3712
3713            do i = 1, nfath
3714            write(365,*)xmodcomp(i,1),xmodcomp(i,2),xmodcomp(i,3),
3715     &              xmodcomp(i,4),xmodcomp(i,5)
3716
3717            write(366,*)xmcomp(i,1),xmcomp(i,2),xmcomp(i,3)
3718            write(367,*)xmcomp(i,4),xmcomp(i,5),xmcomp(i,6)
3719
3720            write(368,*)xlcomp(i,1),xlcomp(i,2),xlcomp(i,3)
3721            write(369,*)xlcomp(i,4),xlcomp(i,5),xlcomp(i,6)
3722
3723            write(370,*)xl1comp(i,1),xl1comp(i,2),xl1comp(i,3)
3724            write(371,*)xl1comp(i,4),xl1comp(i,5),xl1comp(i,6)
3725
3726            write(372,*)xl2comp(i,1),xl2comp(i,2),xl2comp(i,3)
3727            write(373,*)xl2comp(i,4),xl2comp(i,5),xl2comp(i,6)
3728
3729            write(374,*)xnd(i,1),xnd(i,2),scomp(i)
3730            write(375,*)ucomp(i,1),ucomp(i,2),ucomp(i,3)
3731
3732c            write(*,*)'uit uic=', ucomp(32,1),u1
3733            enddo
3734
3735
3736            call flush(365)
3737            call flush(366)
3738            call flush(367)
3739            call flush(368)
3740            call flush(369)
3741            call flush(370)
3742            call flush(371)
3743            call flush(372)
3744            call flush(373)
3745            call flush(374)
3746            call flush(375)
3747
3748c            if (myrank .eq. master) then
3749c               write(*,*)'uit uic=', ucomp(32,1),u1
3750c            endif
3751
3752
3753c            close(852)
3754c            close(853)
3755c            close(854)
3756
3757         endif
3758         endif
3759
3760            if (myrank .eq. master) then
3761               write(*,*)'uit uic=', ucomp(32,1),u1
3762            endif
3763
3764
3765 555     format(e14.7,4(2x,e14.7))
3766 556     format(e14.7,5(2x,e14.7))
3767
3768c         close(849)
3769c         close(850)
3770c         close(851)
3771c         close(852)
3772c         close(853)
3773c         close(854)
3774
3775c $$$$$$$$$$$$$$$$$$$$$$$$$$$
3776      tmp1 =  MINVAL(cdelsq)
3777      tmp2 =  MAXVAL(cdelsq)
3778      if(numpe>1) then
3779         call MPI_REDUCE (tmp1, tmp3, 1,MPI_DOUBLE_PRECISION,
3780     &        MPI_MIN, master, MPI_COMM_WORLD, ierr)
3781         call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
3782     &        MPI_MAX, master, MPI_COMM_WORLD, ierr)
3783         tmp1=tmp3
3784         tmp2=tmp4
3785      endif
3786      if (myrank .EQ. master) then !print CDelta^2 range
3787         write(34,*)lstep,tmp1,tmp2
3788         call flush(34)
3789      endif
3790c $$$$$$$$$$$$$$$$$$$$$$$$$$$
3791
3792      if (myrank .eq. master) then
3793         write(*,*) 'cdelsq=', cdelsq(1),cdelsq(2)
3794         write(*,*) 'cdelsq=', cdelsq2(1),cdelsq2(2)
3795         write(22,*) lstep, cdelsq(1)
3796         call flush(22)
3797      endif
3798
3799      do iblk = 1,nelblk
3800         lcsyst = lcblk(3,iblk)
3801         iel  = lcblk(1,iblk)
3802         npro = lcblk(1,iblk+1) - iel
3803         lelCat = lcblk(2,iblk)
3804         inum  = iel + npro - 1
3805
3806         ngauss = nint(lcsyst)
3807
3808         call scatnu (mien(iblk)%p, strl(iel:inum,:),
3809     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
3810      enddo
3811c     $$$$$$$$$$$$$$$$$$$$$$$$$$$
3812c$$$  tmp1 =  MINVAL(xmudmi)
3813c$$$  tmp2 =  MAXVAL(xmudmi)
3814c$$$  if(numpe>1) then
3815c$$$  call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION,
3816c$$$  &                 MPI_MIN, master, MPI_COMM_WORLD, ierr)
3817c$$$  call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
3818c$$$  &                 MPI_MAX, master, MPI_COMM_WORLD, ierr)
3819c$$$      tmp1=tmp3
3820c$$$  tmp2=tmp4
3821c$$$  endif
3822c$$$  if (myrank .EQ. master) then
3823c$$$  write(35,*) lstep,tmp1,tmp2
3824c$$$  call flush(35)
3825c$$$  endif
3826c $$$$$$$$$$$$$$$$$$$$$$$$$$$
3827
3828c
3829c  if flag set, write a restart file with info (reuse xmij's memory)
3830c
3831      if(irs.eq.11) then
3832         lstep=999
3833         xmij(:,1)=xnum(:)
3834         xmij(:,2)=xden(:)
3835         xmij(:,3)=cdelsq(:)
3836         xmij(:,5)=xlij(:,4)    !leave M_{12} in 4 and put L_{12} here
3837         call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac
3838         stop
3839      endif
3840c
3841c  local clipping moved to scatnu with the creation of mxmudmi pointers
3842c
3843c$$$      rmu=datmat(1,2,1)
3844c$$$      xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu
3845c$$$      xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0
3846c      stop !uncomment to test dmod
3847c
3848
3849
3850c  write out the nodal values of xnut (estimate since we don't calc strain
3851c  there and must use the filtered strain).
3852c
3853
3854      if ((irs .ge. 1) .and. (mod(lstep, ntout) .eq. 0)) then
3855c
3856c  collect the average strain into xnude(2)
3857c
3858         xnude(:,2) = zero
3859         do i = 1,numnp
3860            xnude(ifath(i),2) = xnude(ifath(i),2) + strnrm(i)
3861         enddo
3862
3863         if(numpe .gt. 1) then
3864             call drvAllreduce(xnude(:,2), xnuder(:,2),nfath)
3865          else
3866             xnuder=xnude
3867          endif
3868c
3869c          nut= cdelsq    * |S|
3870c
3871         xnutf=xnuder(:,1)*xnuder(:,2)/nsons(:)
3872c
3873c  collect the x and y coords into xnude
3874c
3875         xnude = zero
3876         do i = 1,numnp
3877            xnude(ifath(i),1) = xnude(ifath(i),1) + x(i,1)
3878            xnude(ifath(i),2) = xnude(ifath(i),2) + x(i,2)
3879         enddo
3880
3881         if(numpe .gt. 1)
3882     &        call drvAllreduce(xnude, xnuder,2*nfath)
3883         xnuder(:,1)=xnuder(:,1)/nsons(:)
3884         xnuder(:,2)=xnuder(:,2)/nsons(:)
3885c
3886c  xnude is the sum of the sons for each father on this processor
3887c
3888         if((myrank.eq.master)) then
3889            do i=1,nfath      ! cdelsq   * |S|
3890               write(444,*) xnuder(i,1),xnuder(i,2),xnutf(i)
3891            enddo
3892            call flush(444)
3893         endif
3894      endif
3895
3896      return
3897      end
3898