xref: /phasta/phSolver/incompressible/filters.f (revision 712d3df0b59ebebaaeaea358162c8d2c043c6e08)
1      subroutine hfilterC (y, x, ien, hfres, shgl, shp, Qwtf)
2
3c...  The filter operator used here uses the generalized box
4c...  kernel
5
6
7      include "common.h"
8
9      dimension y(nshg,5),             hfres(nshg,16)
10      dimension x(numnp,3),            xl(npro,nenl,3)
11      dimension ien(npro,nshl),        yl(npro,nshl,5),
12     &          fresl(npro,16),        WdetJ(npro),
13     &          u1(npro),              u2(npro),
14     &          u3(npro),
15     &          S11(npro),             S22(npro),
16     &          S33(npro),             S12(npro),
17     &          S13(npro),             S23(npro),
18     &          dxdxi(npro,nsd,nsd),   dxidx(npro,nsd,nsd),
19     &          shgl(nsd,nshl,ngauss),  shg(npro,nshl,nsd),
20     &          shp(nshl,ngauss),
21     &          fresli(npro,16),       Qwtf(ngaussf)
22
23      dimension tmp(npro)
24
25      call local (y,      yl,     ien,    5,  'gather  ')
26      call localx (x,      xl,     ien,    3,  'gather  ')
27c
28
29      fresl = zero
30
31c
32      do intp = 1, ngaussf   ! Loop over quadrature points
33
34c  calculate the metrics
35c
36c
37c.... --------------------->  Element Metrics  <-----------------------
38c
39c.... compute the deformation gradient
40c
41         dxdxi = zero
42c
43         do n = 1, nenl
44            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
45            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
46            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
47            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
48            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
49            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
50            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
51            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
52            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
53         enddo
54c
55c.... compute the inverse of deformation gradient
56c
57         dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
58     &        - dxdxi(:,3,2) * dxdxi(:,2,3)
59         dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
60     &        - dxdxi(:,1,2) * dxdxi(:,3,3)
61         dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
62     &        - dxdxi(:,1,3) * dxdxi(:,2,2)
63         tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
64     &        + dxidx(:,1,2) * dxdxi(:,2,1)
65     &        + dxidx(:,1,3) * dxdxi(:,3,1) )
66         dxidx(:,1,1) = dxidx(:,1,1) * tmp
67         dxidx(:,1,2) = dxidx(:,1,2) * tmp
68         dxidx(:,1,3) = dxidx(:,1,3) * tmp
69         dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
70     &        - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
71         dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
72     &        - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
73         dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
74     &        - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
75         dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
76     &        - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
77         dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
78     &        - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
79         dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
80     &        - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
81c
82c        wght=Qwt(lcsyst,intp)  ! may be different now
83         wght=Qwtf(intp)
84         WdetJ(:) = wght / tmp(:)
85
86
87c... compute the gradient of shape functions at the quad. point.
88
89
90      do n = 1,nshl
91        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
92     &              + shgl(2,n,intp) * dxidx(:,2,1)
93     &              + shgl(3,n,intp) * dxidx(:,3,1))
94        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
95     &              + shgl(2,n,intp) * dxidx(:,2,2)
96     &              + shgl(3,n,intp) * dxidx(:,3,2))
97        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
98     &              + shgl(2,n,intp) * dxidx(:,2,3)
99     &              + shgl(3,n,intp) * dxidx(:,3,3))
100      enddo
101
102
103c... compute the velocities and the strain rate tensor at the quad. point
104
105
106         u1  = zero
107         u2  = zero
108         u3  = zero
109         S11 = zero
110         S22 = zero
111         S33 = zero
112         S12 = zero
113         S13 = zero
114         S23 = zero
115         do i=1,nshl
116            u1 = u1 + shp(i,intp)*yl(:,i,2)
117            u2 = u2 + shp(i,intp)*yl(:,i,3)
118            u3 = u3 + shp(i,intp)*yl(:,i,4)
119
120            S11 = S11 + shg(:,i,1)*yl(:,i,2)
121            S22 = S22 + shg(:,i,2)*yl(:,i,3)
122            S33 = S33 + shg(:,i,3)*yl(:,i,4)
123
124            S12 = S12 + shg(:,i,2)*yl(:,i,2)
125     &                       +shg(:,i,1)*yl(:,i,3)
126            S13 = S13 + shg(:,i,3)*yl(:,i,2)
127     &                       +shg(:,i,1)*yl(:,i,4)
128            S23 = S23 + shg(:,i,3)*yl(:,i,3)
129     &                       +shg(:,i,2)*yl(:,i,4)
130         enddo
131         S12 = pt5 * S12
132         S13 = pt5 * S13
133         S23 = pt5 * S23
134
135
136         fresli(:,1) = WdetJ * u1 !G * u1 * WdetJ
137         fresli(:,2) = WdetJ * u2 !G * u2 * WdetJ
138         fresli(:,3) = WdetJ * u3 !G * u3 * WdetJ
139
140         fresli(:,4) = WdetJ * u1 * u1 ! G*u1*u1*WdetJ
141         fresli(:,5) = WdetJ * u2 * u2 ! G*u2*u2*WdetJ
142         fresli(:,6) = WdetJ * u3 * u3 ! G*u3*u3*WdetJ
143         fresli(:,7) = WdetJ * u1 * u2 ! G*u1*u2*WdetJ
144         fresli(:,8) = WdetJ * u1 * u3 ! G*u1*u3*WdetJ
145         fresli(:,9) = WdetJ * u2 * u3 ! G*u2*u3*WdetJ
146
147         fresli(:,10) = S11 * WdetJ ! G*S_{1,1}*WdetJ
148         fresli(:,11) = S22 * WdetJ ! G*S_{2,2}*WdetJ
149         fresli(:,12) = S33 * WdetJ ! G*S_{3,3}*WdetJ
150         fresli(:,13) = S12 * WdetJ ! G*S_{1,1}*WdetJ
151         fresli(:,14) = S13 * WdetJ ! G*S_{1,3}*WdetJ
152         fresli(:,15) = S23 * WdetJ ! G*S_{2,3}*WdetJ
153
154         fresli(:,16) = WdetJ !Integral of filter kernel, G,
155c                                               over the element
156
157         do i = 1, 16           ! Add contribution of each quad. point
158            fresl(:,i) = fresl(:,i) + fresli(:,i)
159         enddo
160
161      enddo                     !end of loop over integration points
162c
163
164      do j = 1,nshl
165      do nel = 1,npro
166        hfres(ien(nel,j),:) = hfres(ien(nel,j),:) + fresl(nel,:)
167      enddo
168      enddo
169
170
171      return
172      end
173
174c... Here, the filter operation (denoted w/ a tilde) uses the generalized
175c... box kernel.
176
177      subroutine twohfilterB (y, x, strnrm, ien, fres,
178     &     hfres, shgl, shp, Qwtf)
179
180      include "common.h"
181
182      dimension y(nshg,ndof),            fres(nshg,33)
183      dimension x(numnp,nsd),            xl(npro,nenl,nsd)
184      dimension ien(npro,nshl),        yl(npro,nshl,ndof),
185     &          fresl(npro,33),        WdetJ(npro),
186     &          u1(npro),              u2(npro),
187     &          u3(npro),              dxdxi(npro,nsd,nsd),
188     &          strnrm(npro,ngauss),    dxidx(npro,nsd,nsd),
189     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
190     &          shp(nshl,ngauss),
191     &          fresli(npro,33),       Qwtf(ngaussf),
192     &          hfres(nshg,16),        hfresl(npro,nshl,16),
193     &          S(npro,nshl),          SS11(npro,nshl),
194     &          SS22(npro,nshl),       SS33(npro,nshl),
195     &          SS12(npro,nshl),       SS13(npro,nshl),
196     &          SS23(npro,nshl),
197     &          S11p(npro),            S22p(npro),
198     &          S33p(npro),            S12p(npro),
199     &          S13p(npro),            S23p(npro)
200
201      dimension tmp(npro)
202
203      call local (y,      yl,     ien,    5,  'gather  ')
204      call localx (x,      xl,     ien,    3,  'gather  ')
205      call local (hfres,  hfresl, ien,   16,  'gather  ')
206
207      S(:,:) = sqrt(
208     &     two*(hfresl(:,:,10)**2 + hfresl(:,:,11)**2 +
209     &     hfresl(:,:,12)**2) + four*(
210     &     hfresl(:,:,13)**2 + hfresl(:,:,14)**2 +
211     &     hfresl(:,:,15)**2) )
212
213      SS11(:,:) = S(:,:)*hfresl(:,:,10)
214      SS22(:,:) = S(:,:)*hfresl(:,:,11)
215      SS33(:,:) = S(:,:)*hfresl(:,:,12)
216      SS12(:,:) = S(:,:)*hfresl(:,:,13)
217      SS13(:,:) = S(:,:)*hfresl(:,:,14)
218      SS23(:,:) = S(:,:)*hfresl(:,:,15)
219
220      fresl = zero
221
222      do intp = 1, ngauss
223
224
225c  calculate the metrics
226c
227c
228c.... --------------------->  Element Metrics  <-----------------------
229c
230c.... compute the deformation gradient
231c
232        dxdxi = zero
233c
234          do n = 1, nenl
235            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
236            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
237            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
238            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
239            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
240            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
241            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
242            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
243            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
244          enddo
245c
246c.... compute the inverse of deformation gradient
247c
248        dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
249     &                 - dxdxi(:,3,2) * dxdxi(:,2,3)
250        dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
251     &                 - dxdxi(:,1,2) * dxdxi(:,3,3)
252        dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
253     &                 - dxdxi(:,1,3) * dxdxi(:,2,2)
254        tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
255     &                       + dxidx(:,1,2) * dxdxi(:,2,1)
256     &                       + dxidx(:,1,3) * dxdxi(:,3,1) )
257        dxidx(:,1,1) = dxidx(:,1,1) * tmp
258        dxidx(:,1,2) = dxidx(:,1,2) * tmp
259        dxidx(:,1,3) = dxidx(:,1,3) * tmp
260        dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
261     &                - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
262        dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
263     &                - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
264        dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
265     &                - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
266        dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
267     &                - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
268        dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
269     &                - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
270        dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
271     &                - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
272c
273        wght=Qwt(lcsyst,intp)  ! may be different now
274c        wght=Qwtf(intp)
275        WdetJ = wght / tmp
276c
277
278      do n = 1,nshl
279        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
280     &              + shgl(2,n,intp) * dxidx(:,2,1)
281     &              + shgl(3,n,intp) * dxidx(:,3,1))
282        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
283     &              + shgl(2,n,intp) * dxidx(:,2,2)
284     &              + shgl(3,n,intp) * dxidx(:,3,2))
285        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
286     &              + shgl(2,n,intp) * dxidx(:,2,3)
287     &              + shgl(3,n,intp) * dxidx(:,3,3))
288      enddo
289
290c... compute filtered quantities at the hat level evaluated at the quad. pt.
291c... This should really
292c... be the bar-hat level, but the bar filter is implicit so we don't
293c... bother to mention it.
294
295      fresli = zero
296      S11p   = zero
297      S22p   = zero
298      S33p   = zero
299      S12p   = zero
300      S13p   = zero
301      S23p   = zero
302
303      do i = 1, nenl
304         fresli(:,1) = fresli(:,1) + shp(i,intp)*hfresl(:,i,1)  ! hat{u1}
305         fresli(:,2) = fresli(:,2) + shp(i,intp)*hfresl(:,i,2)  ! hat{u2}
306         fresli(:,3) = fresli(:,3) + shp(i,intp)*hfresl(:,i,3)  ! hat{u3}
307
308         fresli(:,4) = fresli(:,4) + shp(i,intp)*hfresl(:,i,1)*
309     &        hfresl(:,i,1)     ! hat{u1}*hat{u1}
310         fresli(:,5) = fresli(:,5) + shp(i,intp)*hfresl(:,i,2)*
311     &        hfresl(:,i,2)     ! hat{u2}*hat{u2}
312         fresli(:,6) = fresli(:,6) + shp(i,intp)*hfresl(:,i,3)*
313     &        hfresl(:,i,3)     ! hat{u3}*hat{u3}
314         fresli(:,7) = fresli(:,7) + shp(i,intp)*hfresl(:,i,1)*
315     &        hfresl(:,i,2)     ! hat{u1}*hat{u2}
316         fresli(:,8) = fresli(:,8) + shp(i,intp)*hfresl(:,i,1)*
317     &        hfresl(:,i,3)     ! hat{u1}*hat{u3}
318         fresli(:,9) = fresli(:,9) + shp(i,intp)*hfresl(:,i,2)*
319     &        hfresl(:,i,3)     ! hat{u2}*hat{u3}
320
321         fresli(:,10) = fresli(:,10) + shp(i,intp)*hfresl(:,i,10)  ! hat{S11}
322         fresli(:,11) = fresli(:,11) + shp(i,intp)*hfresl(:,i,11)  ! hat{S22}
323         fresli(:,12) = fresli(:,12) + shp(i,intp)*hfresl(:,i,12)  ! hat{S33}
324         fresli(:,13) = fresli(:,13) + shp(i,intp)*hfresl(:,i,13)  ! hat{S12}
325         fresli(:,14) = fresli(:,14) + shp(i,intp)*hfresl(:,i,14)  ! hat{S13}
326         fresli(:,15) = fresli(:,15) + shp(i,intp)*hfresl(:,i,15)  ! hat{S23}
327
328         S11p = S11p + shg(:,i,1)*hfresl(:,i,1)
329         S22p = S22p + shg(:,i,2)*hfresl(:,i,2)
330         S33p = S33p + shg(:,i,3)*hfresl(:,i,3)
331
332         S12p = S12p + shg(:,i,2)*hfresl(:,i,1) +
333     &        shg(:,i,1)*hfresl(:,i,2)
334         S13p = S13p + shg(:,i,3)*hfresl(:,i,1) +
335     &        shg(:,i,1)*hfresl(:,i,3)
336         S23p = S23p + shg(:,i,3)*hfresl(:,i,2) +
337     &        shg(:,i,2)*hfresl(:,i,3)
338
339      enddo
340
341c... get the strain rate tensor norm at the quad. pt.
342
343c      strnrm(:,intp) = sqrt(
344c     &   two * (fresli(:,10)**2 + fresli(:,11)**2 + fresli(:,12)**2)
345c     &  + four * ( fresli(:,13)**2 + fresli(:,14)**2 +
346c     &    fresli(:,15)**2 ) )
347
348c      strnrm2(:,intp) = zero
349c      do i = 1, nenl
350c         strnrm2(:,intp) = strnrm2(:,intp) + S(:,i)*shp(i,intp)
351c      enddo
352
353c      strnrm3(:,intp) = sqrt(
354c     &     two * (S11p(:)**2 + S22p(:)**2 + S33p(:)**2)
355c     &    + four * ( S12p(:)**2 + S13p(:)**2 + S23p(:)**2) )
356
357c... compute |hat{S}| * hat{Sij}
358
359      do i = 1, nenl
360         fresli(:,16) =fresli(:,16)+shp(i,intp)*SS11(:,i)! |hat{S}|*hat{S11}
361         fresli(:,17) =fresli(:,17)+shp(i,intp)*SS22(:,i)! |hat{S}|*hat{S22}
362         fresli(:,18) =fresli(:,18)+shp(i,intp)*SS33(:,i)! |hat{S}|*hat{S33}
363         fresli(:,19) =fresli(:,19)+shp(i,intp)*SS12(:,i)! |hat{S}|*hat{S12}
364         fresli(:,20) =fresli(:,20)+shp(i,intp)*SS13(:,i)! |hat{S}|*hat{S13}
365         fresli(:,21) =fresli(:,21)+shp(i,intp)*SS23(:,i)! |hat{S}|*hat{S23}
366      enddo
367
368c... multiply fresli by WdetJ so that when we finish looping over
369c... quad. pts. and add the contributions from all the quad. points
370c... we get filtered quantities at the hat-tilde level, secretly the
371c... bar-hat-tilde level.
372
373      do j = 1, 21
374         fresli(:,j) = fresli(:,j)*WdetJ(:)
375      enddo
376
377c... compute volume of box kernel
378
379      fresli(:,22) = WdetJ
380
381c... add contributions from each quad pt to current element
382
383      do i = 1, 22
384         fresl(:,i) = fresl(:,i) + fresli(:,i)
385      enddo
386
387      enddo ! end of loop over integration points
388
389
390c... scatter locally filtered quantities to the global nodes
391
392      do j = 1,nshl
393      do nel = 1,npro
394        fres(ien(nel,j),:) = fres(ien(nel,j),:) + fresl(nel,:)
395      enddo
396      enddo
397
398      return
399      end
400
401c...  The filter operator used here uses the generalized box
402c...  kernel
403
404      subroutine hfilterCC (y, x, ien, hfres, shgl, shp, Qwtf)
405
406      include "common.h"
407
408      dimension y(nshg,5),             hfres(nshg,22)
409      dimension x(numnp,3),            xl(npro,nenl,3)
410      dimension ien(npro,nshl),        yl(npro,nshl,5),
411     &          fresl(npro,22),        WdetJ(npro),
412     &          u1(npro),              u2(npro),
413     &          u3(npro),
414     &          S11(npro),             S22(npro),
415     &          S33(npro),             S12(npro),
416     &          S13(npro),             S23(npro),
417     &          dxdxi(npro,nsd,nsd),   dxidx(npro,nsd,nsd),
418     &          shgl(nsd,nshl,ngauss),  shg(npro,nshl,nsd),
419     &          shp(nshl,ngauss),
420     &          fresli(npro,22),       Qwtf(ngaussf),
421     &          strnrmi(npro)
422
423      dimension tmp(npro)
424
425      call local (y,      yl,     ien,    5,  'gather  ')
426      call localx (x,      xl,     ien,    3,  'gather  ')
427c
428
429      fresl = zero
430
431c
432      do intp = 1, ngaussf   ! Loop over quadrature points
433
434c  calculate the metrics
435c
436c
437c.... --------------------->  Element Metrics  <-----------------------
438c
439c.... compute the deformation gradient
440c
441         dxdxi = zero
442c
443         do n = 1, nenl
444            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
445            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
446            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
447            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
448            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
449            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
450            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
451            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
452            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
453         enddo
454c
455c.... compute the inverse of deformation gradient
456c
457         dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
458     &        - dxdxi(:,3,2) * dxdxi(:,2,3)
459         dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
460     &        - dxdxi(:,1,2) * dxdxi(:,3,3)
461         dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
462     &        - dxdxi(:,1,3) * dxdxi(:,2,2)
463         tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
464     &        + dxidx(:,1,2) * dxdxi(:,2,1)
465     &        + dxidx(:,1,3) * dxdxi(:,3,1) )
466         dxidx(:,1,1) = dxidx(:,1,1) * tmp
467         dxidx(:,1,2) = dxidx(:,1,2) * tmp
468         dxidx(:,1,3) = dxidx(:,1,3) * tmp
469         dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
470     &        - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
471         dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
472     &        - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
473         dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
474     &        - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
475         dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
476     &        - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
477         dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
478     &        - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
479         dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
480     &        - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
481c
482c         wght=Qwt(lcsyst,intp)  ! may be different now
483         wght=Qwtf(intp)
484         WdetJ(:) = wght / tmp(:)
485
486
487c... compute the gradient of shape functions at the quad. point.
488
489
490      do n = 1,nshl
491        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
492     &              + shgl(2,n,intp) * dxidx(:,2,1)
493     &              + shgl(3,n,intp) * dxidx(:,3,1))
494        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
495     &              + shgl(2,n,intp) * dxidx(:,2,2)
496     &              + shgl(3,n,intp) * dxidx(:,3,2))
497        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
498     &              + shgl(2,n,intp) * dxidx(:,2,3)
499     &              + shgl(3,n,intp) * dxidx(:,3,3))
500      enddo
501
502
503c... compute the velocities and the strain rate tensor at the quad. point
504
505
506         u1  = zero
507         u2  = zero
508         u3  = zero
509         S11 = zero
510         S22 = zero
511         S33 = zero
512         S12 = zero
513         S13 = zero
514         S23 = zero
515         do i=1,nshl
516            u1 = u1 + shp(i,intp)*yl(:,i,2)
517            u2 = u2 + shp(i,intp)*yl(:,i,3)
518            u3 = u3 + shp(i,intp)*yl(:,i,4)
519
520            S11 = S11 + shg(:,i,1)*yl(:,i,2)
521            S22 = S22 + shg(:,i,2)*yl(:,i,3)
522            S33 = S33 + shg(:,i,3)*yl(:,i,4)
523
524            S12 = S12 + shg(:,i,2)*yl(:,i,2)
525     &                       +shg(:,i,1)*yl(:,i,3)
526            S13 = S13 + shg(:,i,3)*yl(:,i,2)
527     &                       +shg(:,i,1)*yl(:,i,4)
528            S23 = S23 + shg(:,i,3)*yl(:,i,3)
529     &                       +shg(:,i,2)*yl(:,i,4)
530         enddo
531         S12 = pt5 * S12
532         S13 = pt5 * S13
533         S23 = pt5 * S23
534
535c... Get the strain rate norm at the quad pts
536
537         strnrmi = sqrt( two*(S11**2 + S22**2 + S33**2)
538     &        + four*(S12**2 + S13**2 + S23**2) )
539
540
541c... Multiply quantities to be filtered by the box kernel
542
543         fresli(:,1) = WdetJ * u1 !G * u1 * WdetJ
544         fresli(:,2) = WdetJ * u2 !G * u2 * WdetJ
545         fresli(:,3) = WdetJ * u3 !G * u3 * WdetJ
546
547         fresli(:,4) = WdetJ * u1 * u1 ! G*u1*u1*WdetJ
548         fresli(:,5) = WdetJ * u2 * u2 ! G*u2*u2*WdetJ
549         fresli(:,6) = WdetJ * u3 * u3 ! G*u3*u3*WdetJ
550         fresli(:,7) = WdetJ * u1 * u2 ! G*u1*u2*WdetJ
551         fresli(:,8) = WdetJ * u1 * u3 ! G*u1*u3*WdetJ
552         fresli(:,9) = WdetJ * u2 * u3 ! G*u2*u3*WdetJ
553
554         fresli(:,10) = S11 * WdetJ ! G*S_{1,1}*WdetJ
555         fresli(:,11) = S22 * WdetJ ! G*S_{2,2}*WdetJ
556         fresli(:,12) = S33 * WdetJ ! G*S_{3,3}*WdetJ
557         fresli(:,13) = S12 * WdetJ ! G*S_{1,1}*WdetJ
558         fresli(:,14) = S13 * WdetJ ! G*S_{1,3}*WdetJ
559         fresli(:,15) = S23 * WdetJ ! G*S_{2,3}*WdetJ
560
561         fresli(:,16) = WdetJ !Integral of filter kernel, G,
562c                                               over the element
563
564         fresli(:,17) = S11 * strnrmi * WdetJ
565         fresli(:,18) = S22 * strnrmi * WdetJ
566         fresli(:,19) = S33 * strnrmi * WdetJ
567         fresli(:,20) = S12 * strnrmi * WdetJ
568         fresli(:,21) = S13 * strnrmi * WdetJ
569         fresli(:,22) = S23 * strnrmi * WdetJ
570
571
572         do i = 1, 22           ! Add contribution of each quad. point
573            fresl(:,i) = fresl(:,i) + fresli(:,i)
574         enddo
575
576      enddo                     !end of loop over integration points
577c
578
579      do j = 1,nshl
580      do nel = 1,npro
581        hfres(ien(nel,j),:) = hfres(ien(nel,j),:) + fresl(nel,:)
582      enddo
583      enddo
584
585
586      return
587      end
588
589
590c... Here, the filter operation (denoted w/ a tilde) uses the generalized
591c... box kernel.
592
593      subroutine twohfilterBB (y, x, strnrm, ien, fres,
594     &     hfres, shgl, shp, Qwtf)
595
596      include "common.h"
597
598      dimension y(nshg,ndof),            fres(nshg,33)
599      dimension x(numnp,nsd),            xl(npro,nenl,nsd)
600      dimension ien(npro,nshl),        yl(npro,nshl,ndof),
601     &          fresl(npro,33),        WdetJ(npro),
602     &          u1(npro),              u2(npro),
603     &          u3(npro),              dxdxi(npro,nsd,nsd),
604     &          strnrm(npro,ngauss),    dxidx(npro,nsd,nsd),
605     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
606     &          shp(nshl,ngauss),
607     &          fresli(npro,33),       Qwtf(ngaussf),
608     &          hfres(nshg,22),        hfresl(npro,nshl,22),
609     &          S(npro,nshl),          SS11(npro,nshl),
610     &          SS22(npro,nshl),       SS33(npro,nshl),
611     &          SS12(npro,nshl),       SS13(npro,nshl),
612     &          SS23(npro,nshl)
613
614      dimension tmp(npro)
615
616      call local (y,      yl,     ien,    5,  'gather  ')
617      call localx (x,      xl,     ien,    3,  'gather  ')
618      call local (hfres,  hfresl, ien,   22,  'gather  ')
619
620      S(:,:) = sqrt(
621     &     two*(hfresl(:,:,10)**2 + hfresl(:,:,11)**2 +
622     &     hfresl(:,:,12)**2) + four*(
623     &     hfresl(:,:,13)**2 + hfresl(:,:,14)**2 +
624     &     hfresl(:,:,15)**2) )
625
626      SS11(:,:) = S(:,:)*hfresl(:,:,10)
627      SS22(:,:) = S(:,:)*hfresl(:,:,11)
628      SS33(:,:) = S(:,:)*hfresl(:,:,12)
629      SS12(:,:) = S(:,:)*hfresl(:,:,13)
630      SS13(:,:) = S(:,:)*hfresl(:,:,14)
631      SS23(:,:) = S(:,:)*hfresl(:,:,15)
632
633      fresl = zero
634
635      do intp = 1, ngaussf
636
637
638c  calculate the metrics
639c
640c
641c.... --------------------->  Element Metrics  <-----------------------
642c
643c.... compute the deformation gradient
644c
645        dxdxi = zero
646c
647          do n = 1, nenl
648            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
649            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
650            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
651            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
652            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
653            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
654            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
655            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
656            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
657          enddo
658c
659c.... compute the inverse of deformation gradient
660c
661        dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
662     &                 - dxdxi(:,3,2) * dxdxi(:,2,3)
663        dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
664     &                 - dxdxi(:,1,2) * dxdxi(:,3,3)
665        dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
666     &                 - dxdxi(:,1,3) * dxdxi(:,2,2)
667        tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
668     &                       + dxidx(:,1,2) * dxdxi(:,2,1)
669     &                       + dxidx(:,1,3) * dxdxi(:,3,1) )
670        dxidx(:,1,1) = dxidx(:,1,1) * tmp
671        dxidx(:,1,2) = dxidx(:,1,2) * tmp
672        dxidx(:,1,3) = dxidx(:,1,3) * tmp
673        dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
674     &                - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
675        dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
676     &                - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
677        dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
678     &                - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
679        dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
680     &                - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
681        dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
682     &                - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
683        dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
684     &                - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
685c
686c        wght=Qwt(lcsyst,intp)  ! may be different now
687        wght=Qwtf(intp)
688        WdetJ = wght / tmp
689c
690
691      do n = 1,nshl
692        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
693     &              + shgl(2,n,intp) * dxidx(:,2,1)
694     &              + shgl(3,n,intp) * dxidx(:,3,1))
695        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
696     &              + shgl(2,n,intp) * dxidx(:,2,2)
697     &              + shgl(3,n,intp) * dxidx(:,3,2))
698        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
699     &              + shgl(2,n,intp) * dxidx(:,2,3)
700     &              + shgl(3,n,intp) * dxidx(:,3,3))
701      enddo
702
703c... compute filtered quantities at the hat level evaluated at the quad. pt.
704c... This should really
705c... be the bar-hat level, but the bar filter is implicit so we don't
706c... bother to mention it.
707
708      fresli = zero
709      S11p   = zero
710      S22p   = zero
711      S33p   = zero
712      S12p   = zero
713      S13p   = zero
714      S23p   = zero
715
716      do i = 1, nenl
717         fresli(:,1) = fresli(:,1) + shp(i,intp)*hfresl(:,i,1)  ! hat{u1}
718         fresli(:,2) = fresli(:,2) + shp(i,intp)*hfresl(:,i,2)  ! hat{u2}
719         fresli(:,3) = fresli(:,3) + shp(i,intp)*hfresl(:,i,3)  ! hat{u3}
720
721         fresli(:,4) = fresli(:,4) + shp(i,intp)*hfresl(:,i,4) ! hat{u1*u1}
722         fresli(:,5) = fresli(:,5) + shp(i,intp)*hfresl(:,i,5) ! hat{u2*u2}
723         fresli(:,6) = fresli(:,6) + shp(i,intp)*hfresl(:,i,6) ! hat{u3*u3}
724         fresli(:,7) = fresli(:,7) + shp(i,intp)*hfresl(:,i,7) ! hat{u1*u2}
725         fresli(:,8) = fresli(:,8) + shp(i,intp)*hfresl(:,i,8) ! hat{u1*u3}
726         fresli(:,9) = fresli(:,9) + shp(i,intp)*hfresl(:,i,9) ! hat{u2*u3}
727
728         fresli(:,10) = fresli(:,10) + shp(i,intp)*hfresl(:,i,10)  ! hat{S11}
729         fresli(:,11) = fresli(:,11) + shp(i,intp)*hfresl(:,i,11)  ! hat{S22}
730         fresli(:,12) = fresli(:,12) + shp(i,intp)*hfresl(:,i,12)  ! hat{S33}
731         fresli(:,13) = fresli(:,13) + shp(i,intp)*hfresl(:,i,13)  ! hat{S12}
732         fresli(:,14) = fresli(:,14) + shp(i,intp)*hfresl(:,i,14)  ! hat{S13}
733         fresli(:,15) = fresli(:,15) + shp(i,intp)*hfresl(:,i,15)  ! hat{S23}
734
735         fresli(:,16) = fresli(:,16) +shp(i,intp)*hfresl(:,i,17)! hat{S11*|S|}
736         fresli(:,17) = fresli(:,17) +shp(i,intp)*hfresl(:,i,18)! hat{S22*|S|}
737         fresli(:,18) = fresli(:,18) +shp(i,intp)*hfresl(:,i,19)! hat{S33*|S|}
738         fresli(:,19) = fresli(:,19) +shp(i,intp)*hfresl(:,i,20)! hat{S12*|S|}
739         fresli(:,20) = fresli(:,20) +shp(i,intp)*hfresl(:,i,21)! hat{S13*|S|}
740         fresli(:,21) = fresli(:,21) +shp(i,intp)*hfresl(:,i,22)! hat{S23*|S|}
741
742
743      enddo
744
745c... multiply fresli by WdetJ so that when we finish looping over
746c... quad. pts. and add the contributions from all the quad. points
747c... we get filtered quantities at the hat-tilde level, secretly the
748c... bar-hat-tilde level.
749
750      do j = 1, 21
751         fresli(:,j) = fresli(:,j)*WdetJ(:)
752      enddo
753
754c... compute volume of box kernel
755
756      fresli(:,22) = WdetJ
757
758c... add contributions from each quad pt to current element
759
760      do i = 1, 22
761         fresl(:,i) = fresl(:,i) + fresli(:,i)
762      enddo
763
764      enddo ! end of loop over integration points
765
766
767c... scatter locally filtered quantities to the global nodes
768
769      do j = 1,nshl
770      do nel = 1,npro
771        fres(ien(nel,j),:) = fres(ien(nel,j),:) + fresl(nel,:)
772      enddo
773      enddo
774
775      return
776      end
777
778
779c...  The filter operator used here uses the generalized hat (witch hat)
780c...  kernel
781
782      subroutine hfilterBB (y, x, ien, hfres, shgl, shp, Qwtf)
783
784      include "common.h"
785
786      dimension y(nshg,5),             hfres(nshg,24)
787      dimension x(numnp,3),            xl(npro,nenl,3)
788      dimension ien(npro,nshl),        yl(npro,nshl,5),
789     &          fresl(npro,nshl,24),        WdetJ(npro),
790     &          u1(npro),              u2(npro),
791     &          u3(npro),              rho(npro),
792     &          S11(npro),             S22(npro),
793     &          S33(npro),             S12(npro),
794     &          S13(npro),             S23(npro),
795     &          dxdxi(npro,nsd,nsd),   dxidx(npro,nsd,nsd),
796     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
797     &          shp(nshl,ngauss),
798     &          fresli(npro,nshl,24),   Qwtf(ngaussf),
799     &          strnrmi(npro)
800
801
802      dimension tmp(npro)
803
804      call local (y,      yl,     ien,    5,  'gather  ')
805      call localx (x,      xl,     ien,    3,  'gather  ')
806c
807
808      fresl = zero
809
810c
811      do intp = 1, ngaussf   ! Loop over quadrature points
812
813c  calculate the metrics
814c
815c
816c.... --------------------->  Element Metrics  <-----------------------
817c
818c.... compute the deformation gradient
819c
820         dxdxi = zero
821c
822         do n = 1, nenl
823            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
824            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
825            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
826            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
827            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
828            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
829            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
830            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
831            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
832         enddo
833c
834c.... compute the inverse of deformation gradient
835c
836         dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
837     &        - dxdxi(:,3,2) * dxdxi(:,2,3)
838         dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
839     &        - dxdxi(:,1,2) * dxdxi(:,3,3)
840         dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
841     &        - dxdxi(:,1,3) * dxdxi(:,2,2)
842         tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
843     &        + dxidx(:,1,2) * dxdxi(:,2,1)
844     &        + dxidx(:,1,3) * dxdxi(:,3,1) )
845         dxidx(:,1,1) = dxidx(:,1,1) * tmp
846         dxidx(:,1,2) = dxidx(:,1,2) * tmp
847         dxidx(:,1,3) = dxidx(:,1,3) * tmp
848         dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
849     &        - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
850         dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
851     &        - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
852         dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
853     &        - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
854         dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
855     &        - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
856         dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
857     &        - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
858         dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
859     &        - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
860c
861c        wght=Qwt(lcsyst,intp)  ! may be different now
862         wght=Qwtf(intp)
863         WdetJ(:) = wght / tmp(:)
864
865
866c... compute the gradient of shape functions at the quad. point.
867
868
869      do n = 1,nshl
870        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
871     &              + shgl(2,n,intp) * dxidx(:,2,1)
872     &              + shgl(3,n,intp) * dxidx(:,3,1))
873        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
874     &              + shgl(2,n,intp) * dxidx(:,2,2)
875     &              + shgl(3,n,intp) * dxidx(:,3,2))
876        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
877     &              + shgl(2,n,intp) * dxidx(:,2,3)
878     &              + shgl(3,n,intp) * dxidx(:,3,3))
879      enddo
880
881
882c... compute the velocities and the strain rate tensor at the quad. point
883
884
885         u1  = zero
886         u2  = zero
887         u3  = zero
888         S11 = zero
889         S22 = zero
890         S33 = zero
891         S12 = zero
892         S13 = zero
893         S23 = zero
894         do i=1,nshl
895            u1 = u1 + shp(i,intp)*yl(:,i,2)
896            u2 = u2 + shp(i,intp)*yl(:,i,3)
897            u3 = u3 + shp(i,intp)*yl(:,i,4)
898
899            S11 = S11 + shg(:,i,1)*yl(:,i,2)
900            S22 = S22 + shg(:,i,2)*yl(:,i,3)
901            S33 = S33 + shg(:,i,3)*yl(:,i,4)
902
903            S12 = S12 + shg(:,i,2)*yl(:,i,2)
904     &                       +shg(:,i,1)*yl(:,i,3)
905            S13 = S13 + shg(:,i,3)*yl(:,i,2)
906     &                       +shg(:,i,1)*yl(:,i,4)
907            S23 = S23 + shg(:,i,3)*yl(:,i,3)
908     &                       +shg(:,i,2)*yl(:,i,4)
909         enddo
910         S12 = pt5 * S12
911         S13 = pt5 * S13
912         S23 = pt5 * S23
913
914c... Get the strain rate norm at the quad pts
915
916         strnrmi = sqrt( two*(S11**2 + S22**2 + S33**2)
917     &        + four*(S12**2 + S13**2 + S23**2) )
918
919
920c... Loop over element nodes and multiply u_{i} and S_{i,j} by the
921c... hat kernel and the Jacobian over the current element evaluated at
922c... the current quad. point.
923
924         do i = 1, nenl   ! Loop over element nodes
925
926            fresli(:,i,1) = WdetJ * u1 * shp(i,intp) !G * u1 * WdetJ
927            fresli(:,i,2) = WdetJ * u2 * shp(i,intp) !G * u2 * WdetJ
928            fresli(:,i,3) = WdetJ * u3 * shp(i,intp) !G * u3 * WdetJ
929
930            fresli(:,i,4) = WdetJ * u1 * u1 * shp(i,intp) ! G*u1*u1*WdetJ
931            fresli(:,i,5) = WdetJ * u2 * u2 * shp(i,intp) ! G*u2*u2*WdetJ
932            fresli(:,i,6) = WdetJ * u3 * u3 * shp(i,intp) ! G*u3*u3*WdetJ
933            fresli(:,i,7) = WdetJ * u1 * u2 * shp(i,intp) ! G*u1*u2*WdetJ
934            fresli(:,i,8) = WdetJ * u1 * u3 * shp(i,intp) ! G*u1*u3*WdetJ
935            fresli(:,i,9) = WdetJ * u2 * u3 * shp(i,intp) ! G*u2*u3*WdetJ
936
937            fresli(:,i,10) = S11 * shp(i,intp) * WdetJ ! G*S_{1,1}*WdetJ
938            fresli(:,i,11) = S22 * shp(i,intp) * WdetJ ! G*S_{2,2}*WdetJ
939            fresli(:,i,12) = S33 * shp(i,intp) * WdetJ ! G*S_{3,3}*WdetJ
940            fresli(:,i,13) = S12 * shp(i,intp) * WdetJ ! G*S_{1,1}*WdetJ
941            fresli(:,i,14) = S13 * shp(i,intp) * WdetJ ! G*S_{1,3}*WdetJ
942            fresli(:,i,15) = S23 * shp(i,intp) * WdetJ ! G*S_{2,3}*WdetJ
943
944            fresli(:,i,22) = WdetJ*shp(i,intp) !Integral of filter kernel, G,
945c                                               over the element
946
947c...   Get G*|S|*S_{i,j}*WdetJ
948
949            fresli(:,i,16) = S11 * strnrmi * shp(i,intp) * WdetJ
950            fresli(:,i,17) = S22 * strnrmi * shp(i,intp) * WdetJ
951            fresli(:,i,18) = S33 * strnrmi * shp(i,intp) * WdetJ
952            fresli(:,i,19) = S12 * strnrmi * shp(i,intp) * WdetJ
953            fresli(:,i,20) = S13 * strnrmi * shp(i,intp) * WdetJ
954            fresli(:,i,21) = S23 * strnrmi * shp(i,intp) * WdetJ
955
956            do j = 1, 22 ! Add contribution of each quad. point for each
957c                          element node
958               fresl(:,i,j) = fresl(:,i,j) + fresli(:,i,j)
959            enddo
960
961         enddo                  !end loop over element nodes
962
963      enddo                     !end of loop over integration points
964
965
966      call local (hfres, fresl, ien, 24, 'scatter ')
967
968      return
969      end
970
971      subroutine ediss (y,           ac,         shgl,
972     &                  shp,         iper,       ilwork,
973     &                  nsons,       ifath,      x,
974     &                  iBC,    BC,  xavegt)
975
976
977      use pvsQbi           ! brings in NABI
978      use stats            !
979      use pointer_data     ! brings in the pointers for the blocked arrays
980      use local_mass
981      use rlssave  ! Use the resolved Leonard stresses at the nodes.
982      use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
983c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
984c                    Shpf and shglf are the shape funciotns and their
985c                    gradient evaluated using the quadrature rule desired
986c                    for computing the dmod. Qwt contains the weights of the
987c                    quad. points.
988
989
990
991      include "common.h"
992      include "mpif.h"
993      include "auxmpi.h"
994
995
996      dimension y(nshg,ndof),                  ac(nshg,ndof),
997     &          ifath(nshg),                   nsons(nshg),
998     &          iper(nshg),                    ilwork(nlwork),
999     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
1000     &          x(numnp,3),
1001     &          qres(nshg,nsd*nsd),             rmass(nshg),
1002     &          iBC(nshg),                      BC(nshg,ndofBC),
1003     &          cdelsq(nshg),                   vol(nshg),
1004     &          stress(nshg,9),                 diss(nshg,3),
1005     &          xave(nshg,12),                  xaveg(nfath,12),
1006     &          xavegr(nfath,12),           xavegt(nfath,12),
1007     &          rnum(nfath),                rden(nfath)
1008
1009
1010c.... First let us obtain cdelsq at each node in the domain.
1011c.... We use numNden which lives in the quadfilt module.
1012
1013      rnum(ifath(:)) = numNden(:,1)
1014      rden(ifath(:)) = numNden(:,2)
1015
1016c      if (myrank .eq. master) then
1017c         write(*,*) 'rnum25=', rnum(25), rden(25)
1018c         write(*,*) 'rnum26=', rnum(26), rden(26)
1019c      endif
1020
1021      cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
1022c      cdelsq(:) = zero ! Debugging
1023
1024      if (istep .eq. 0) then
1025         xavegt = zero  ! For averaging dissipations and SUPG stresses
1026      endif
1027
1028        if (idiff==1 .or. idiff==3) then ! global reconstruction of qdiff
1029c
1030c loop over element blocks for the global reconstruction
1031c of the diffusive flux vector, q, and lumped mass matrix, rmass
1032c
1033           qres = zero
1034           rmass = zero
1035
1036           do iblk = 1, nelblk
1037              iel    = lcblk(1,iblk)
1038              lelCat = lcblk(2,iblk)
1039              lcsyst = lcblk(3,iblk)
1040              iorder = lcblk(4,iblk)
1041              nenl   = lcblk(5,iblk) ! no. of vertices per element
1042              nshl   = lcblk(10,iblk)
1043              mattyp = lcblk(7,iblk)
1044              ndofl  = lcblk(8,iblk)
1045              nsymdl = lcblk(9,iblk)
1046              npro   = lcblk(1,iblk+1) - iel
1047              ngauss = nint(lcsyst)
1048c
1049c.... compute and assemble diffusive flux vector residual, qres,
1050c     and lumped mass matrix, rmass
1051
1052              call AsIq (y,                x,
1053     &                   shp(lcsyst,1:nshl,:),
1054     &                   shgl(lcsyst,:,1:nshl,:),
1055     &                   mien(iblk)%p,     mxmudmi(iblk)%p,
1056     &                   qres,             rmass )
1057           enddo
1058
1059c
1060c.... form the diffusive flux approximation
1061c
1062           call qpbc( rmass, qres, iBC, BC, iper, ilwork )
1063c
1064        endif
1065
1066
1067c.... form the SUPG stresses well as dissipation due to eddy viscosity,
1068c...  and SUPG stabilization.
1069
1070
1071        stress = zero
1072        vol    = zero
1073        diss   = zero
1074
1075      do iblk = 1,nelblk
1076        lcsyst = lcblk(3,iblk)
1077        iel  = lcblk(1,iblk) !Element number where this block begins
1078        npro = lcblk(1,iblk+1) - iel
1079        lelCat = lcblk(2,iblk)
1080        nenl = lcblk(5,iblk)
1081        nshl = lcblk(10,iblk)
1082        inum  = iel + npro - 1
1083
1084        ngauss = nint(lcsyst)
1085        ngaussf = nintf(lcsyst)
1086
1087        call SUPGstress (y, ac, x, qres, mien(iblk)%p, mxmudmi(iblk)%p,
1088     &                   cdelsq, shglf(lcsyst,:,1:nshl,:),
1089     &                   shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf),
1090     &                   shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:),
1091     &                   stress, diss, vol)
1092
1093      enddo
1094
1095      if(numpe>1) call commu (stress, ilwork, 9, 'in ')
1096      if(numpe>1) call commu (diss, ilwork, 3, 'in ')
1097      if(numpe>1) call commu (vol, ilwork, 1, 'in ')
1098
1099c
1100c account for periodicity
1101c
1102      do j = 1,nshg
1103        i = iper(j)
1104        if (i .ne. j) then
1105           stress(i,:) = stress(i,:) + stress(j,:)
1106           diss(i,:)   = diss(i,:)   + diss(j,:)
1107           vol(i)      = vol(i)      + vol(j)
1108        endif
1109      enddo
1110
1111      do j = 1,nshg
1112        i = iper(j)
1113        if (i .ne. j) then
1114           stress(j,:) = stress(i,:)
1115           diss(j,:)   = diss(i,:)
1116           vol(j)      = vol(i)
1117        endif
1118      enddo
1119
1120      if(numpe>1) call commu (stress, ilwork, 9, 'out ')
1121      if(numpe>1) call commu (diss, ilwork, 3, 'out ')
1122      if(numpe>1) call commu (vol, ilwork, 1, 'out ')
1123
1124      vol = one / vol
1125      do i = 1, 9
1126         stress(:,i) = stress(:,i)*vol(:)
1127      enddo
1128      do i = 1, 3
1129         diss(:,i) = diss(:,i)*vol(:)
1130      enddo
1131
1132c---------- > Begin averaging dissipations and SUPG stress <--------------
1133
1134      do i = 1, 9
1135         xave(:,i) = stress(:,i)
1136      enddo
1137      xave(:,10) = diss(:,1)
1138      xave(:,11) = diss(:,2)
1139      xave(:,12) = diss(:,3)
1140
1141c  zero on processor periodic nodes so that they will not be added twice
1142        do j = 1,numnp
1143          i = iper(j)
1144          if (i .ne. j) then
1145            xave(j,:) = zero
1146          endif
1147        enddo
1148
1149      if (numpe.gt.1) then
1150
1151         numtask = ilwork(1)
1152         itkbeg = 1
1153
1154c zero the nodes that are "solved" on the other processors
1155         do itask = 1, numtask
1156
1157            iacc   = ilwork (itkbeg + 2)
1158            numseg = ilwork (itkbeg + 4)
1159
1160            if (iacc .eq. 0) then
1161               do is = 1,numseg
1162                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1163                  lenseg = ilwork (itkbeg + 4 + 2*is)
1164                  isgend = isgbeg + lenseg - 1
1165                  xave(isgbeg:isgend,:) = zero
1166               enddo
1167            endif
1168
1169            itkbeg = itkbeg + 4 + 2*numseg
1170
1171         enddo
1172
1173      endif
1174c
1175
1176      xaveg = zero
1177      do i = 1,nshg
1178         xaveg(ifath(i),:) = xaveg(ifath(i),:) + xave(i,:)
1179      enddo
1180
1181      if(numpe .gt. 1)then
1182         call drvAllreduce(xaveg, xavegr,12*nfath)
1183
1184         do m = 1, 12
1185            xavegr(:,m) = xavegr(:,m)/nsons(:)
1186         enddo
1187
1188         if (myrank .eq. master) then
1189            write(*,*)'diss=', xavegt(14,11), xavegr(14,11)
1190         endif
1191
1192         do m = 1, 12
1193            xavegt(:,m) = xavegt(:,m) + xavegr(:,m)
1194         enddo
1195
1196      else
1197
1198         do m = 1, 12
1199            xaveg(:,m) = xaveg(:,m)/nsons(:)
1200         enddo
1201
1202         do m = 1, 12
1203            xavegt(:,m) = xavegt(:,m) + xaveg(:,m)
1204         enddo
1205
1206      endif
1207
1208      if (myrank .eq. master) then
1209         write(*,*)'diss=', xavegt(14,11), xavegr(14,11)
1210      endif
1211
1212      if ( istep .eq. (nstep(1)-1) ) then
1213         if ( myrank .eq. master) then
1214
1215            do i = 1, nfath
1216               write(376,*)xavegt(i,1),xavegt(i,2),xavegt(i,3)
1217               write(377,*)xavegt(i,4),xavegt(i,5),xavegt(i,6)
1218               write(378,*)xavegt(i,7),xavegt(i,8),xavegt(i,9)
1219               write(379,*)xavegt(i,10),xavegt(i,11),xavegt(i,12)
1220            enddo
1221
1222            call flush(376)
1223            call flush(377)
1224            call flush(378)
1225            call flush(379)
1226
1227         endif
1228      endif
1229
1230
1231      return
1232
1233      end
1234
1235
1236      subroutine resSij (y, x, ien, hfres, shgl, shp, Qwtf)
1237
1238      include "common.h"
1239
1240      dimension y(nshg,5),             hfres(nshg,24)
1241      dimension x(numnp,3),            xl(npro,nenl,3)
1242      dimension ien(npro,nshl),        yl(npro,nshl,5),
1243     &          fresl(npro,nshl,24),        WdetJ(npro),
1244     &          u1(npro),              u2(npro),
1245     &          u3(npro),              rho(npro),
1246     &          S11(npro),             S22(npro),
1247     &          S33(npro),             S12(npro),
1248     &          S13(npro),             S23(npro),
1249     &          dxdxi(npro,nsd,nsd),   dxidx(npro,nsd,nsd),
1250     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
1251     &          shp(nshl,ngauss),
1252     &          fresli(npro,nshl,24),   Qwtf(ngaussf),
1253     &          strnrmi(npro)
1254
1255
1256      dimension tmp(npro)
1257
1258      call local (y,      yl,     ien,    5,  'gather  ')
1259      call localx (x,      xl,     ien,    3,  'gather  ')
1260c
1261
1262      fresl = zero
1263
1264c
1265      do intp = 1, ngaussf   ! Loop over quadrature points
1266
1267c  calculate the metrics
1268c
1269c
1270c.... --------------------->  Element Metrics  <-----------------------
1271c
1272c.... compute the deformation gradient
1273c
1274         dxdxi = zero
1275c
1276         do n = 1, nenl
1277            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
1278            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
1279            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
1280            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
1281            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
1282            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
1283            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
1284            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
1285            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
1286         enddo
1287c
1288c.... compute the inverse of deformation gradient
1289c
1290         dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
1291     &        - dxdxi(:,3,2) * dxdxi(:,2,3)
1292         dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
1293     &        - dxdxi(:,1,2) * dxdxi(:,3,3)
1294         dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
1295     &        - dxdxi(:,1,3) * dxdxi(:,2,2)
1296         tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
1297     &        + dxidx(:,1,2) * dxdxi(:,2,1)
1298     &        + dxidx(:,1,3) * dxdxi(:,3,1) )
1299         dxidx(:,1,1) = dxidx(:,1,1) * tmp
1300         dxidx(:,1,2) = dxidx(:,1,2) * tmp
1301         dxidx(:,1,3) = dxidx(:,1,3) * tmp
1302         dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
1303     &        - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
1304         dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
1305     &        - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
1306         dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
1307     &        - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
1308         dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
1309     &        - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
1310         dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
1311     &        - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
1312         dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
1313     &        - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
1314c
1315c        wght=Qwt(lcsyst,intp)  ! may be different now
1316         wght=Qwtf(intp)
1317         WdetJ(:) = wght / tmp(:)
1318
1319
1320c... compute the gradient of shape functions at the quad. point.
1321
1322
1323      do n = 1,nshl
1324        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
1325     &              + shgl(2,n,intp) * dxidx(:,2,1)
1326     &              + shgl(3,n,intp) * dxidx(:,3,1))
1327        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
1328     &              + shgl(2,n,intp) * dxidx(:,2,2)
1329     &              + shgl(3,n,intp) * dxidx(:,3,2))
1330        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
1331     &              + shgl(2,n,intp) * dxidx(:,2,3)
1332     &              + shgl(3,n,intp) * dxidx(:,3,3))
1333      enddo
1334
1335
1336c... compute the velocities and the strain rate tensor at the quad. point
1337
1338
1339         u1  = zero
1340         u2  = zero
1341         u3  = zero
1342         S11 = zero
1343         S22 = zero
1344         S33 = zero
1345         S12 = zero
1346         S13 = zero
1347         S23 = zero
1348         do i=1,nshl
1349            u1 = u1 + shp(i,intp)*yl(:,i,2)
1350            u2 = u2 + shp(i,intp)*yl(:,i,3)
1351            u3 = u3 + shp(i,intp)*yl(:,i,4)
1352
1353            S11 = S11 + shg(:,i,1)*yl(:,i,2)
1354            S22 = S22 + shg(:,i,2)*yl(:,i,3)
1355            S33 = S33 + shg(:,i,3)*yl(:,i,4)
1356
1357            S12 = S12 + shg(:,i,2)*yl(:,i,2)
1358     &                       +shg(:,i,1)*yl(:,i,3)
1359            S13 = S13 + shg(:,i,3)*yl(:,i,2)
1360     &                       +shg(:,i,1)*yl(:,i,4)
1361            S23 = S23 + shg(:,i,3)*yl(:,i,3)
1362     &                       +shg(:,i,2)*yl(:,i,4)
1363         enddo
1364         S12 = pt5 * S12
1365         S13 = pt5 * S13
1366         S23 = pt5 * S23
1367
1368c... Get the strain rate norm at the quad pts
1369
1370         strnrmi = sqrt( two*(S11**2 + S22**2 + S33**2)
1371     &        + four*(S12**2 + S13**2 + S23**2) )
1372
1373
1374c... Loop over element nodes and multiply u_{i} and S_{i,j} by the
1375c... hat kernel and the Jacobian over the current element evaluated at
1376c... the current quad. point.
1377
1378         do i = 1, nenl   ! Loop over element nodes
1379
1380            fresli(:,i,1) = WdetJ * u1 * shp(i,intp) !G * u1 * WdetJ
1381            fresli(:,i,2) = WdetJ * u2 * shp(i,intp) !G * u2 * WdetJ
1382            fresli(:,i,3) = WdetJ * u3 * shp(i,intp) !G * u3 * WdetJ
1383
1384            fresli(:,i,4) = WdetJ * u1 * u1 * shp(i,intp) ! G*u1*u1*WdetJ
1385            fresli(:,i,5) = WdetJ * u2 * u2 * shp(i,intp) ! G*u2*u2*WdetJ
1386            fresli(:,i,6) = WdetJ * u3 * u3 * shp(i,intp) ! G*u3*u3*WdetJ
1387            fresli(:,i,7) = WdetJ * u1 * u2 * shp(i,intp) ! G*u1*u2*WdetJ
1388            fresli(:,i,8) = WdetJ * u1 * u3 * shp(i,intp) ! G*u1*u3*WdetJ
1389            fresli(:,i,9) = WdetJ * u2 * u3 * shp(i,intp) ! G*u2*u3*WdetJ
1390
1391            fresli(:,i,10) = S11 * shp(i,intp) * WdetJ ! G*S_{1,1}*WdetJ
1392            fresli(:,i,11) = S22 * shp(i,intp) * WdetJ ! G*S_{2,2}*WdetJ
1393            fresli(:,i,12) = S33 * shp(i,intp) * WdetJ ! G*S_{3,3}*WdetJ
1394            fresli(:,i,13) = S12 * shp(i,intp) * WdetJ ! G*S_{1,1}*WdetJ
1395            fresli(:,i,14) = S13 * shp(i,intp) * WdetJ ! G*S_{1,3}*WdetJ
1396            fresli(:,i,15) = S23 * shp(i,intp) * WdetJ ! G*S_{2,3}*WdetJ
1397
1398
1399            fresli(:,i,16) = strnrmi*strnrmi*strnrmi*shp(i,intp)*WdetJ
1400
1401            fresli(:,i,22) = WdetJ*shp(i,intp) !Integral of filter kernel, G,
1402c                                               over the element
1403
1404c...   Get G*|S|*S_{i,j}*WdetJ
1405
1406c            fresli(:,i,16) = S11 * strnrmi * shp(i,intp) * WdetJ
1407            fresli(:,i,17) = S22 * strnrmi * shp(i,intp) * WdetJ
1408            fresli(:,i,18) = S33 * strnrmi * shp(i,intp) * WdetJ
1409            fresli(:,i,19) = S12 * strnrmi * shp(i,intp) * WdetJ
1410            fresli(:,i,20) = S13 * strnrmi * shp(i,intp) * WdetJ
1411            fresli(:,i,21) = S23 * strnrmi * shp(i,intp) * WdetJ
1412
1413            do j = 1, 22 ! Add contribution of each quad. point for each
1414c                          element node
1415               fresl(:,i,j) = fresl(:,i,j) + fresli(:,i,j)
1416            enddo
1417
1418         enddo                  !end loop over element nodes
1419
1420      enddo                     !end of loop over integration points
1421
1422
1423      call local (hfres, fresl, ien, 24, 'scatter ')
1424
1425      return
1426      end
1427
1428
1429
1430	subroutine sparseCG (rhsorig, trx, lhsM, row, col, iper,
1431     &     ilwork, iBC, BC)
1432c
1433c------------------------------------------------------------------------
1434c
1435c  This subroutine uses Conjugate Gradient,
1436c to solve the system of equations.
1437c
1438c Farzin Shakib,  Summer 1987.
1439c------------------------------------------------------------------------
1440c
1441	include "common.h"
1442c
1443	dimension rhsorig(nshg), trx(nshg)
1444c
1445	dimension d(nshg),     p(nshg),
1446     &            q(nshg),     ilwork(nlwork),
1447     &		  Dinv(nshg),  rhs(nshg),
1448     &            pp(nshg),
1449     &            iBC(nshg),
1450     &            BC(nshg)
1451
1452
1453        integer   row(nshg*nnz),         col(nshg+1)
1454	integer   iBCdumb(nshg),         iper(nshg)
1455
1456	real*8 BCdumb(nshg,ndofBC)
1457
1458	real*8	lhsM(nnz*nshg)
1459c
1460	data nitercf / 100 /
1461c
1462c
1463	BCdumb  = one
1464	iBCdumb = 1
1465c
1466	rhs(:)=rhsorig(:)
1467c
1468c.... Calculate the inverse of the diagonal of the mass matrix
1469c
1470c       call CFDinv (Dinv, emass)
1471c
1472c.... Left precondition the mass matrix and the RHS
1473c
1474c	call CFPre (Dinv, emass, rhs)
1475c
1476c Initialize. We have a system Ax=b We have made A as
1477c well conditionedand as we're willing to go.  Since
1478c we used left preconditioning (on the old A and old b),
1479c we don't need to do any back-preconditioning later.
1480c
1481	rr = 0
1482	do n = 1, nshg
1483	   rr  = rr + rhs(n)*rhs(n)
1484	enddo
1485c
1486c  if parallel the above is only this processors part of the dot product.
1487c  get the total result
1488c
1489	   dotTot=zero
1490	   call drvAllreduce(rr,dotTot,1)
1491	   rr=dotTot
1492	rr0 = rr
1493c
1494	trx(:) = zero ! x_{0}=0
1495c                   ! r_{0}=b
1496c
1497c                   ! beta_{1}=0
1498	p(:) = rhs(:) ! p_{1}=r_{0}=b
1499c
1500c.... Iterate
1501c
1502	do iter = 1, nitercf      ! 15  ! nitercf
1503c
1504c.... calculate alpha
1505c
1506	   pp=p   ! make a vector that we can copy masters onto slaves
1507		  ! and thereby make ready for an accurate Ap product
1508
1509#ifdef HAVE_LESLIB
1510	   call commOut(pp, ilwork, 1,iper,iBCdumb,BCdumb)  !slaves= master
1511
1512	   call fLesSparseApSclr(	col,	row,	lhsM,
1513     &					pp,	q,	nshg,
1514     &                                  nnz)
1515
1516	   call commIn(q, ilwork, 1,iper,iBC,BC) ! masters=masters+slaves
1517							 ! slaves zeroed
1518#else
1519           if(myrank.eq.0) write(*,*) 'need alt solver in filter.f'
1520           call error('filter   ','noleslib',1)
1521#endif
1522c	   call CFAp (p,  q) ! put Ap product in q
1523
1524c	   if(nump>1) call commu (q, ilwork, 1, 'in ')
1525
1526c	   do j = 1,nshg
1527c	      i = iper(j)
1528c	      if (i .ne. j) then
1529c		 q(i) = q(i) + q(j)
1530c	      endif
1531c	   enddo
1532
1533c	   do j = 1,nshg
1534c	      i = iper(j)
1535c	      if (i .ne. j) then
1536c		 q(j) = zero
1537c	      endif
1538c	   enddo
1539
1540c     Need to zero off-processor slaves as well.
1541
1542c      if (numpe.gt.1 .and. nsons(1).gt.1) then
1543
1544c         numtask = ilwork(1)
1545c         itkbeg = 1
1546
1547c zero the nodes that are "solved" on the other processors
1548
1549c         do itask = 1, numtask
1550
1551c            iacc   = ilwork (itkbeg + 2)
1552c            numseg = ilwork (itkbeg + 4)
1553
1554c            if (iacc .eq. 0) then
1555c               do is = 1,numseg
1556c                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1557c                  lenseg = ilwork (itkbeg + 4 + 2*is)
1558c                  isgend = isgbeg + lenseg - 1
1559c                  q(isgbeg:isgend) = zero
1560c               enddo
1561c           endif
1562
1563c            itkbeg = itkbeg + 4 + 2*numseg
1564
1565c         enddo
1566
1567c	endif
1568
1569
1570
1571	   pap = 0
1572	   do  n = 1, nshg
1573	      pap = pap + p(n) * q(n)
1574	   enddo
1575c
1576c  if parallel the above is only this processors part of the dot product.
1577c  get the total result
1578c
1579           dotTot=zero
1580	   call drvAllreduce(pap,dotTot,1)
1581	   pap=dotTot
1582	   alpha = rr / pap
1583c
1584c.... calculate the next guess
1585c
1586	   trx(:) = trx(:) + alpha * p(:)
1587c
1588c.... calculate the new residual
1589c
1590c
1591	   rhs(:) = rhs(:) - alpha * q(:)
1592	   tmp = rr
1593	   rr = 0
1594	   do n = 1, nshg
1595	      rr = rr + rhs(n)*rhs(n)
1596	   enddo
1597c
1598c  if parallel the above is only this processors part of the dot product.
1599c  get the total result
1600c
1601           dotTot=zero
1602	   call drvAllreduce(rr,dotTot,1)
1603	   rr=dotTot
1604c
1605c.... check for convergence
1606c
1607	   if(rr.lt.100.*epsM**2) goto 6000
1608c
1609c.... calculate a new search direction
1610c
1611	   beta = rr / tmp
1612	   p(:) = rhs(:) + beta * p(:)
1613c
1614c.... end of iteration
1615c
1616	enddo
1617c
1618c.... if converged
1619c
16206000	continue
1621
1622c need a commu(out) on solution (TRX) to get slaves the correct solution AND
1623c on processor slaves = on processor masters
1624
1625	if(numpe>1) call commu (trx, ilwork, 1, 'out ')
1626	trx(:) = trx(iper(:))
1627
1628c
1629	write(*,9000) iter, rr / rr0
1630c	write(16,9000) iter, rr / rr0
1631c
1632c.... return
1633c
1634	return
16359000	format(20x,'  number of iterations:', i10,/,
1636     &	       20x,'    residual reduction:', 2x,e10.2)
1637	end
1638
1639      subroutine stdfdmc (y,      shgl,      shp,
1640     &                   iper,   ilwork,
1641     &                   nsons,  ifath,     x)
1642
1643      use pointer_data
1644
1645      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
1646c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
1647c                    Shpf and shglf are the shape funciotns and their
1648c                    gradient evaluated using the quadrature rule desired
1649c                    for computing the dmod. Qwt contains the weights of the
1650c                    quad. points.
1651
1652
1653
1654      include "common.h"
1655      include "mpif.h"
1656      include "auxmpi.h"
1657
1658c
1659      dimension fres(nshg,24),         fwr(nshg),
1660     &          strnrm(nshg),         cdelsq(nshg),
1661     &          cdelsq2(nshg),
1662     &          xnum(nshg),           xden(nshg),
1663     &          xmij(nshg,6),         xlij(nshg,6),
1664     &          xnude(nfath,2),        xnuder(nfath,2),
1665     &          ynude(nfath,6),        ynuder(nfath,6)
1666      dimension ui(nfath,3),           snorm(nfath),
1667     &          uir(nfath,3),          snormr(nfath),
1668     &          xm(nfath,6),           xl(nfath,6),
1669     &          xl1(nfath,6),          xl2(nfath,6),
1670     &          xl1r(nfath,6),         xl2r(nfath,6),
1671     &          xmr(nfath,6),          xlr(nfath,6),
1672     &          nsons(nshg),
1673     &          strl(numel,ngauss)
1674      dimension y(nshg,5),            yold(nshg,5),
1675     &          ifath(nshg),          iper(nshg),
1676     &          ilwork(nlwork),!        xmudmi(numel,ngauss),
1677     &          x(numnp,3),
1678     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
1679     &          xnutf(nfath),         xfac(nshg,5)
1680
1681      character*10 cname
1682      character*30 fname1, fname2, fname3, fname4, fname5, fname6,
1683     &             fname0
1684c
1685c
1686c   setup the weights for time averaging of cdelsq (now in quadfilt module)
1687c
1688      denom=max(1.0d0*(lstep),one)
1689      if(dtavei.lt.0) then
1690         wcur=one/denom
1691      else
1692         wcur=dtavei
1693      endif
1694      whist=1.0-wcur
1695
1696      if (istep .eq. 0) then
1697         xnd      = zero
1698         xmodcomp = zero
1699         xmcomp  = zero
1700         xlcomp  = zero
1701         xl1comp  = zero
1702         xl2comp  = zero
1703         ucomp    = zero
1704         scomp    = zero
1705      endif
1706
1707
1708      fres = zero
1709      yold(:,1)=y(:,4)
1710      yold(:,2:4)=y(:,1:3)
1711c
1712
1713c
1714c  hack in an interesting velocity field (uncomment to test dmod)
1715c
1716c      yold(:,5) = 1.0  ! Debugging
1717c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
1718c      yold(:,2) = 2.0
1719c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
1720c      yold(:,3) = 3.0
1721c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
1722c      yold(:,4) = 4.0
1723c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
1724c                               suitable for the
1725
1726
1727
1728      intrul=intg(1,itseq)
1729      intind=intpt(intrul)
1730
1731      do iblk = 1,nelblk
1732        lcsyst = lcblk(3,iblk)
1733        iel  = lcblk(1,iblk) !Element number where this block begins
1734        npro = lcblk(1,iblk+1) - iel
1735        lelCat = lcblk(2,iblk)
1736        nenl = lcblk(5,iblk)
1737        nshl = lcblk(10,iblk)
1738        inum  = iel + npro - 1
1739
1740        ngauss = nint(lcsyst)
1741        ngaussf = nintf(lcsyst)
1742
1743        call asithf (yold, x, strl(iel:inum,:), mien(iblk)%p, fres,
1744     &               shglf(lcsyst,:,1:nshl,:),
1745     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
1746
1747      enddo
1748c
1749
1750c      if (ngaussf .ne. ngauss) then
1751      do iblk = 1,nelblk
1752        lcsyst = lcblk(3,iblk)
1753        iel  = lcblk(1,iblk) !Element number where this block begins
1754        npro = lcblk(1,iblk+1) - iel
1755        lelCat = lcblk(2,iblk)
1756        nenl = lcblk(5,iblk)
1757        nshl = lcblk(10,iblk)
1758        inum  = iel + npro - 1
1759
1760        ngauss = nint(lcsyst)
1761        ngaussf = nintf(lcsyst)
1762
1763        if (ngaussf .ne. ngauss) then
1764
1765        call getstrl (yold, x,      mien(iblk)%p,
1766     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
1767     &               shp(lcsyst,1:nshl,:))
1768
1769        endif
1770
1771      enddo
1772c      endif
1773c
1774c
1775C must fix for abc and dynamic model
1776c      if(iabc==1)   !are there any axisym bc's
1777c     &      call rotabc(res, iBC, BC,nflow, 'in ')
1778c
1779      if(numpe>1) call commu (fres, ilwork, 24, 'in ')
1780c
1781c account for periodicity in filtered variables
1782c
1783      do j = 1,nshg
1784        i = iper(j)
1785        if (i .ne. j) then
1786           fres(i,:) = fres(i,:) + fres(j,:)
1787        endif
1788      enddo
1789      do j = 1,nshg
1790        i = iper(j)
1791        if (i .ne. j) then
1792           fres(j,:) = fres(i,:)
1793        endif
1794      enddo
1795
1796      if(numpe>1)   call commu (fres, ilwork, 24, 'out')
1797
1798      fres(:,23) = one / fres(:,23)
1799      do j = 1,22
1800        fres(:,j) = fres(:,j) * fres(:,23)
1801      enddo
1802c     fres(:,24) = fres(:,24) * fres(:,23)
1803c
1804c.....at this point fres is really all of our filtered quantities
1805c     at the nodes
1806c
1807
1808      strnrm = sqrt(
1809     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
1810     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
1811
1812      fwr = fwr1 * fres(:,22) * strnrm
1813
1814      xmij(:,1) = -fwr
1815     &             * fres(:,10) + fres(:,16)
1816      xmij(:,2) = -fwr
1817     &             * fres(:,11) + fres(:,17)
1818      xmij(:,3) = -fwr
1819     &             * fres(:,12) + fres(:,18)
1820
1821      xmij(:,4) = -fwr * fres(:,13) + fres(:,19)
1822      xmij(:,5) = -fwr * fres(:,14) + fres(:,20)
1823      xmij(:,6) = -fwr * fres(:,15) + fres(:,21)
1824
1825      fres(:,22) = one / fres(:,22)
1826
1827      xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1) * fres(:,22)
1828      xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2) * fres(:,22)
1829      xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3) * fres(:,22)
1830      xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2) * fres(:,22)
1831      xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3) * fres(:,22)
1832      xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3) * fres(:,22)
1833
1834      xnum =        xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2)
1835     &                                    + xlij(:,3) * xmij(:,3)
1836     &     + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5)
1837     &                                    + xlij(:,6) * xmij(:,6))
1838      xden =        xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2)
1839     &                                    + xmij(:,3) * xmij(:,3)
1840     &     + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5)
1841     &                                    + xmij(:,6) * xmij(:,6))
1842      xden = two * xden
1843
1844c... For collectection of statistics on dyn. model components
1845
1846      xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 +
1847     &     fres(:,12)**2
1848     &     + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
1849
1850      xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11)
1851     &     + xlij(:,3)*fres(:,12) +
1852     &     two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) +
1853     &     xlij(:,6)*fres(:,15)) )
1854
1855      xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17)
1856     &     + fres(:,12)*fres(:,18) +
1857     &     two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) +
1858     &     fres(:,15)*fres(:,21)) )
1859
1860      xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17)
1861     &     + xlij(:,3)*fres(:,18) +
1862     &     two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) +
1863     &     xlij(:,6)*fres(:,21))
1864
1865      xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17)
1866     &     + fres(:,18)*fres(:,18) +
1867     &     two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) +
1868     &     fres(:,21)*fres(:,21))
1869
1870c  zero on processor periodic nodes so that they will not be added twice
1871        do j = 1,numnp
1872          i = iper(j)
1873          if (i .ne. j) then
1874            xnum(j) = zero
1875            xden(j) = zero
1876            xfac(j,:) = zero
1877            xmij(j,:) = zero
1878            xlij(j,:) = zero
1879            fres(j,:) = zero
1880            strnrm(j) = zero
1881          endif
1882        enddo
1883
1884      if (numpe.gt.1) then
1885
1886         numtask = ilwork(1)
1887         itkbeg = 1
1888
1889c zero the nodes that are "solved" on the other processors
1890         do itask = 1, numtask
1891
1892            iacc   = ilwork (itkbeg + 2)
1893            numseg = ilwork (itkbeg + 4)
1894
1895            if (iacc .eq. 0) then
1896               do is = 1,numseg
1897                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1898                  lenseg = ilwork (itkbeg + 4 + 2*is)
1899                  isgend = isgbeg + lenseg - 1
1900                  xnum(isgbeg:isgend) = zero
1901                  xden(isgbeg:isgend) = zero
1902                  strnrm(isgbeg:isgend) = zero
1903                  xfac(isgbeg:isgend,:) = zero
1904                  xmij(isgbeg:isgend,:) = zero
1905                  xlij(isgbeg:isgend,:) = zero
1906                  fres(isgbeg:isgend,:) = zero
1907               enddo
1908            endif
1909
1910            itkbeg = itkbeg + 4 + 2*numseg
1911
1912         enddo
1913
1914      endif
1915c
1916c Description of arrays.   Each processor has an array of length equal
1917c to the total number of fathers times 2 xnude(nfathers,2). One to collect
1918c the numerator and one to collect the denominator.  There is also an array
1919c of length nshg on each processor which tells the father number of each
1920c on processor node, ifath(nnshg).  Finally, there is an arry of length
1921c nfathers to tell the total (on all processors combined) number of sons
1922c for each father.
1923c
1924c  Now loop over nodes and accumlate the numerator and the denominator
1925c  to the father nodes.  Only on processor addition at this point.
1926c  Note that serrogate fathers are collect some for the case where some
1927c  sons are on another processor
1928c
1929      xnude = zero
1930      ynude = zero
1931      xm    = zero
1932      xl    = zero
1933      xl1   = zero
1934      xl2   = zero
1935      ui    = zero
1936      snorm = zero
1937
1938      do i = 1,nshg
1939         xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i)
1940         xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i)
1941
1942         ynude(ifath(i),1) = ynude(ifath(i),1) + xfac(i,1)
1943         ynude(ifath(i),2) = ynude(ifath(i),2) + xfac(i,2)
1944         ynude(ifath(i),3) = ynude(ifath(i),3) + xfac(i,3)
1945         ynude(ifath(i),4) = ynude(ifath(i),4) + xfac(i,4)
1946         ynude(ifath(i),5) = ynude(ifath(i),5) + xfac(i,5)
1947
1948         xm(ifath(i),1) = xm(ifath(i),1) + xmij(i,1)
1949         xm(ifath(i),2) = xm(ifath(i),2) + xmij(i,2)
1950         xm(ifath(i),3) = xm(ifath(i),3) + xmij(i,3)
1951         xm(ifath(i),4) = xm(ifath(i),4) + xmij(i,4)
1952         xm(ifath(i),5) = xm(ifath(i),5) + xmij(i,5)
1953         xm(ifath(i),6) = xm(ifath(i),6) + xmij(i,6)
1954
1955         xl(ifath(i),1) = xl(ifath(i),1) + xlij(i,1)
1956         xl(ifath(i),2) = xl(ifath(i),2) + xlij(i,2)
1957         xl(ifath(i),3) = xl(ifath(i),3) + xlij(i,3)
1958         xl(ifath(i),4) = xl(ifath(i),4) + xlij(i,4)
1959         xl(ifath(i),5) = xl(ifath(i),5) + xlij(i,5)
1960         xl(ifath(i),6) = xl(ifath(i),6) + xlij(i,6)
1961
1962         xl1(ifath(i),1) = xl1(ifath(i),1) + fres(i,4)
1963         xl1(ifath(i),2) = xl1(ifath(i),2) + fres(i,5)
1964         xl1(ifath(i),3) = xl1(ifath(i),3) + fres(i,6)
1965         xl1(ifath(i),4) = xl1(ifath(i),4) + fres(i,7)
1966         xl1(ifath(i),5) = xl1(ifath(i),5) + fres(i,8)
1967         xl1(ifath(i),6) = xl1(ifath(i),6) + fres(i,9)
1968
1969         xl2(ifath(i),1) = xl2(ifath(i),1) + fres(i,1)*fres(i,1)
1970         xl2(ifath(i),2) = xl2(ifath(i),2) + fres(i,2)*fres(i,2)
1971         xl2(ifath(i),3) = xl2(ifath(i),3) + fres(i,3)*fres(i,3)
1972         xl2(ifath(i),4) = xl2(ifath(i),4) + fres(i,1)*fres(i,2)
1973         xl2(ifath(i),5) = xl2(ifath(i),5) + fres(i,1)*fres(i,3)
1974         xl2(ifath(i),6) = xl2(ifath(i),6) + fres(i,2)*fres(i,3)
1975
1976         ui(ifath(i),1) = ui(ifath(i),1) + fres(i,1)
1977         ui(ifath(i),2) = ui(ifath(i),2) + fres(i,2)
1978         ui(ifath(i),3) = ui(ifath(i),3) + fres(i,3)
1979
1980         snorm(ifath(i)) = snorm(ifath(i)) + strnrm(i)
1981
1982      enddo
1983
1984c
1985c Now  the true fathers and serrogates combine results and update
1986c each other.
1987c
1988      if(numpe .gt. 1)then
1989         call drvAllreduce(xnude, xnuder,2*nfath)
1990         call drvAllreduce(ynude, ynuder,6*nfath)
1991         call drvAllreduce(xm, xmr,6*nfath)
1992         call drvAllreduce(xl, xlr,6*nfath)
1993         call drvAllreduce(xl1, xl1r,6*nfath)
1994         call drvAllreduce(xl2, xl2r,6*nfath)
1995         call drvAllreduce(ui, uir,3*nfath)
1996         call drvAllreduce(snorm, snormr,nfath)
1997
1998         do i = 1, nfath
1999            ynuder(i,6) = ( ynuder(i,4) - fwr1*ynuder(i,2) ) /
2000     &           ( two*ynuder(i,5) - four*fwr1*ynuder(i,3)
2001     &           + two*fwr1*fwr1*ynuder(i,1) )
2002         enddo
2003
2004         cdelsq2(:) = ynuder(ifath(:),6)  ! For comparison w/ cdelsq
2005c
2006c  xnude is the sum of the sons for each father on this processor
2007c
2008c  xnuder is the sum of the sons for each father on all processor combined
2009c  (the same as if we had not partitioned the mesh for each processor)
2010c
2011c   For each father we have precomputed the number of sons (including
2012c   the sons off processor).
2013c
2014c   Now divide by number of sons to get the average (not really necessary
2015c   for dynamic model since ratio will cancel nsons at each father)
2016c
2017         xnuder(:,1) = xnuder(:,1) / nsons(:)
2018         xnuder(:,2) = xnuder(:,2) / nsons(:)
2019
2020         do m = 1, 5
2021         ynuder(:,m) = ynuder(:,m)/nsons(:)
2022         enddo
2023         do m = 1,6
2024         xmr(:,m) = xmr(:,m)/nsons(:)
2025         xlr(:,m) = xlr(:,m)/nsons(:)
2026         xl1r(:,m) = xl1r(:,m)/nsons(:)
2027         xl2r(:,m) = xl2r(:,m)/nsons(:)
2028         enddo
2029
2030         uir(:,1) = uir(:,1)/nsons(:)
2031         uir(:,2) = uir(:,2)/nsons(:)
2032         uir(:,3) = uir(:,3)/nsons(:)
2033
2034         snormr(:) = snormr(:)/nsons(:)
2035c
2036cc  the next line is c \Delta^2
2037cc
2038cc         xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09)
2039cc         do i = 1,nshg
2040cc            cdelsq(i) = xnuder(ifath(i),1)
2041cc         enddo
2042
2043            numNden(:,1) = whist*numNden(:,1)+wcur*xnuder(ifath(:),1)
2044            numNden(:,2) = whist*numNden(:,2)+wcur*xnuder(ifath(:),2)
2045            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2046
2047c            cdelsq(:) = xnuder(ifath(:),1)/(xnuder(ifath(:),2)+1.d-09)
2048
2049            xnd(:,1) = xnd(:,1) + xnuder(:,1)
2050            xnd(:,2) = xnd(:,2) + xnuder(:,2)
2051
2052            xmodcomp(:,1) = xmodcomp(:,1)+ynuder(:,1)
2053            xmodcomp(:,2) = xmodcomp(:,2)+ynuder(:,2)
2054            xmodcomp(:,3) = xmodcomp(:,3)+ynuder(:,3)
2055            xmodcomp(:,4) = xmodcomp(:,4)+ynuder(:,4)
2056            xmodcomp(:,5) = xmodcomp(:,5)+ynuder(:,5)
2057
2058            xmcomp(:,:) = xmcomp(:,:)+xmr(:,:)
2059            xlcomp(:,:) = xlcomp(:,:)+xlr(:,:)
2060
2061            xl1comp(:,:) = xl1comp(:,:)+xl1r(:,:)
2062            xl2comp(:,:) = xl2comp(:,:)+xl2r(:,:)
2063
2064            ucomp(:,:) = ucomp(:,:)+uir(:,:)
2065            u1 = uir(32,1)
2066            scomp(:)   = scomp(:)+snormr(:)
2067
2068      else
2069
2070         xnude(:,1) = xnude(:,1)/nsons(:)
2071         xnude(:,2) = xnude(:,2)/nsons(:)
2072
2073         do m = 1, 5
2074         ynude(:,m) = ynude(:,m)/nsons(:)
2075         enddo
2076         do m = 1,6
2077         xm(:,m) = xm(:,m)/nsons(:)
2078         xl(:,m) = xl(:,m)/nsons(:)
2079         xl1(:,m) = xl1(:,m)/nsons(:)
2080         xl2(:,m) = xl2(:,m)/nsons(:)
2081         enddo
2082
2083         ui(:,1) = ui(:,1)/nsons(:)
2084         ui(:,2) = ui(:,2)/nsons(:)
2085         ui(:,3) = ui(:,3)/nsons(:)
2086
2087         snorm(:) = snorm(:)/nsons(:)
2088
2089c
2090c     the next line is c \Delta^2, not nu_T but we want to save the
2091c     memory
2092c
2093
2094cc         xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09)
2095cc        do i = 1,nshg
2096cc            cdelsq(i) = xnude(ifath(i),1)
2097cc         enddo
2098cc      endif
2099
2100         do i = 1, nfath
2101            ynude(i,6) = ( ynude(i,4) - fwr1*ynude(i,2) ) /
2102     &           ( two*ynude(i,5) - four*fwr1*ynude(i,3)
2103     &           + fwr1*fwr1*ynude(i,1) )
2104         enddo
2105
2106            numNden(:,1) = whist*numNden(:,1)+wcur*xnude(ifath(:),1)
2107            numNden(:,2) = whist*numNden(:,2)+wcur*xnude(ifath(:),2)
2108
2109            xnd(:,1) = xnd(:,1)+xnude(:,1)
2110            xnd(:,2) = xnd(:,2)+xnude(:,2)
2111
2112            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2113
2114c            cdelsq(:) = xnude(ifath(:),1)/(xnude(ifath(:),2))!+1.d-09)
2115
2116
2117          cdelsq2(:) = ynude(ifath(:),6)  ! For comparison w/ cdelsq
2118
2119            xmodcomp(:,1) = xmodcomp(:,1)+ynude(:,1)
2120            xmodcomp(:,2) = xmodcomp(:,2)+ynude(:,2)
2121            xmodcomp(:,3) = xmodcomp(:,3)+ynude(:,3)
2122            xmodcomp(:,4) = xmodcomp(:,4)+ynude(:,4)
2123            xmodcomp(:,5) = xmodcomp(:,5)+ynude(:,5)
2124
2125            xmcomp(:,:) = xmcomp(:,:)+xm(:,:)
2126            xlcomp(:,:) = xlcomp(:,:)+xl(:,:)
2127
2128            xl1comp(:,:) = xl1comp(:,:)+xl1(:,:)
2129            xl2comp(:,:) = xl2comp(:,:)+xl2(:,:)
2130
2131            ucomp(:,:) = ucomp(:,:)+ui(:,:)
2132            scomp(:)   = scomp(:)+snorm(:)
2133
2134         endif
2135
2136c         do i = 1, nfath
2137c            xmodcomp(i,:) = xmodcomp(i,:)/nsons(i)
2138c            xmcomp(i,:) = xmcomp(i,:)/nsons(i)
2139c            xlcomp(i,:) = xlcomp(i,:)/nsons(i)
2140c            xl2comp(i,:) = xl2comp(i,:)/nsons(i)
2141c            xl1comp(i,:) = xl1comp(i,:)/nsons(i)
2142c            xnd(i,:) = xnd(i,:)/nsons(i)
2143c            scomp(i) = scomp(i)/nsons(i)
2144c            ucomp(i,:) = ucomp(i,:)/nsons(i)
2145c         enddo
2146
2147         if (myrank .eq. master) then
2148            write(*,*)'istep, nstep=', istep, nstep(1)
2149         endif
2150
2151         if ( istep .eq. (nstep(1)-1) ) then
2152         if ( myrank .eq. master) then
2153
2154            do i = 1, nfath
2155            write(365,*)xmodcomp(i,1),xmodcomp(i,2),xmodcomp(i,3),
2156     &              xmodcomp(i,4),xmodcomp(i,5)
2157
2158            write(366,*)xmcomp(i,1),xmcomp(i,2),xmcomp(i,3)
2159            write(367,*)xmcomp(i,4),xmcomp(i,5),xmcomp(i,6)
2160
2161            write(368,*)xlcomp(i,1),xlcomp(i,2),xlcomp(i,3)
2162            write(369,*)xlcomp(i,4),xlcomp(i,5),xlcomp(i,6)
2163
2164            write(370,*)xl1comp(i,1),xl1comp(i,2),xl1comp(i,3)
2165            write(371,*)xl1comp(i,4),xl1comp(i,5),xl1comp(i,6)
2166
2167            write(372,*)xl2comp(i,1),xl2comp(i,2),xl2comp(i,3)
2168            write(373,*)xl2comp(i,4),xl2comp(i,5),xl2comp(i,6)
2169
2170            write(374,*)xnd(i,1),xnd(i,2),scomp(i)
2171            write(375,*)ucomp(i,1),ucomp(i,2),ucomp(i,3)
2172            enddo
2173
2174            call flush(365)
2175            call flush(366)
2176            call flush(367)
2177            call flush(368)
2178            call flush(369)
2179            call flush(370)
2180            call flush(371)
2181            call flush(372)
2182            call flush(373)
2183            call flush(374)
2184            call flush(375)
2185
2186c            close(852)
2187c            close(853)
2188c            close(854)
2189
2190         endif
2191         endif
2192
2193            if (myrank .eq. master) then
2194               write(*,*)'uit uic=', ucomp(32,1),u1
2195            endif
2196
2197 555     format(e14.7,4(2x,e14.7))
2198 556     format(e14.7,5(2x,e14.7))
2199
2200
2201
2202
2203c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2204      tmp1 =  MINVAL(cdelsq)
2205      tmp2 =  MAXVAL(cdelsq)
2206      if(numpe>1) then
2207         call MPI_REDUCE (tmp1, tmp3, 1,MPI_DOUBLE_PRECISION,
2208     &        MPI_MIN, master, MPI_COMM_WORLD, ierr)
2209         call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
2210     &        MPI_MAX, master, MPI_COMM_WORLD, ierr)
2211         tmp1=tmp3
2212         tmp2=tmp4
2213      endif
2214      if (myrank .EQ. master) then !print CDelta^2 range
2215         write(34,*)lstep,tmp1,tmp2
2216         call flush(34)
2217      endif
2218c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2219
2220      if (myrank .eq. master) then
2221         write(*,*) 'cdelsq=', cdelsq(1),cdelsq(2)
2222         write(*,*) 'cdelsq=', cdelsq2(1),cdelsq2(2)
2223         write(22,*) lstep, cdelsq(1)
2224         call flush(22)
2225      endif
2226
2227      do iblk = 1,nelblk
2228         lcsyst = lcblk(3,iblk)
2229         iel  = lcblk(1,iblk)
2230         npro = lcblk(1,iblk+1) - iel
2231         lelCat = lcblk(2,iblk)
2232         inum  = iel + npro - 1
2233
2234         ngauss = nint(lcsyst)
2235
2236         call scatnu (mien(iblk)%p, strl(iel:inum,:),
2237     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
2238      enddo
2239c     $$$$$$$$$$$$$$$$$$$$$$$$$$$
2240c$$$  tmp1 =  MINVAL(xmudmi)
2241c$$$  tmp2 =  MAXVAL(xmudmi)
2242c$$$  if(numpe>1) then
2243c$$$  call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION,
2244c$$$  &                 MPI_MIN, master, MPI_COMM_WORLD, ierr)
2245c$$$  call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
2246c$$$  &                 MPI_MAX, master, MPI_COMM_WORLD, ierr)
2247c$$$      tmp1=tmp3
2248c$$$  tmp2=tmp4
2249c$$$  endif
2250c$$$  if (myrank .EQ. master) then
2251c$$$  write(35,*) lstep,tmp1,tmp2
2252c$$$  call flush(35)
2253c$$$  endif
2254c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2255
2256c
2257c  if flag set, write a restart file with info (reuse xmij's memory)
2258c
2259      if(irs.eq.11) then
2260         lstep=999
2261         xmij(:,1)=xnum(:)
2262         xmij(:,2)=xden(:)
2263         xmij(:,3)=cdelsq(:)
2264         xmij(:,5)=xlij(:,4)    !leave M_{12} in 4 and put L_{12} here
2265         call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac
2266         stop
2267      endif
2268c
2269c  local clipping moved to scatnu with the creation of mxmudmi pointers
2270c
2271c$$$      rmu=datmat(1,2,1)
2272c$$$      xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu
2273c$$$      xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0
2274c      stop !uncomment to test dmod
2275c
2276
2277
2278c  write out the nodal values of xnut (estimate since we don't calc strain
2279c  there and must use the filtered strain).
2280c
2281
2282      if ((irs .ge. 1) .and. (mod(lstep, ntout) .eq. 0)) then
2283c
2284c  collect the average strain into xnude(2)
2285c
2286         xnude(:,2) = zero
2287         do i = 1,numnp
2288            xnude(ifath(i),2) = xnude(ifath(i),2) + strnrm(i)
2289         enddo
2290
2291         if(numpe .gt. 1) then
2292             call drvAllreduce(xnude(:,2), xnuder(:,2),nfath)
2293          else
2294             xnuder=xnude
2295          endif
2296c
2297c          nut= cdelsq    * |S|
2298c
2299         xnutf=xnuder(:,1)*xnuder(:,2)/nsons(:)
2300c
2301c  collect the x and y coords into xnude
2302c
2303         xnude = zero
2304         do i = 1,numnp
2305            xnude(ifath(i),1) = xnude(ifath(i),1) + x(i,1)
2306            xnude(ifath(i),2) = xnude(ifath(i),2) + x(i,2)
2307         enddo
2308
2309         if(numpe .gt. 1)
2310     &        call drvAllreduce(xnude, xnuder,2*nfath)
2311         xnuder(:,1)=xnuder(:,1)/nsons(:)
2312         xnuder(:,2)=xnuder(:,2)/nsons(:)
2313c
2314c  xnude is the sum of the sons for each father on this processor
2315c
2316         if((myrank.eq.master)) then
2317            do i=1,nfath      ! cdelsq   * |S|
2318               write(444,*) xnuder(i,1),xnuder(i,2),xnutf(i)
2319            enddo
2320            call flush(444)
2321         endif
2322      endif
2323
2324      return
2325      end
2326      subroutine widefdmc (y,      shgl,      shp,
2327     &                   iper,   ilwork,
2328     &                   nsons,  ifath,     x)
2329
2330      use pointer_data
2331
2332      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
2333c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
2334c                    Shpf and shglf are the shape funciotns and their
2335c                    gradient evaluated using the quadrature rule desired
2336c                    for computing the dmod. Qwtf contains the weights of the
2337c                    quad. points.
2338
2339      include "common.h"
2340      include "mpif.h"
2341      include "auxmpi.h"
2342
2343c
2344      dimension fres(nshg,33),         fwr(nshg),
2345     &          strnrm(nshg),         cdelsq(nshg),
2346     &          cdelsq2(nshg),
2347     &          xnum(nshg),           xden(nshg),
2348     &          xmij(nshg,6),         xlij(nshg,6),
2349     &          xnude(nfath,2),        xnuder(nfath,2),
2350     &          ynude(nfath,6),        ynuder(nfath,6),
2351     &          ui(nfath,3),           snorm(nfath),
2352     &          uir(nfath,3),          snormr(nfath)
2353      dimension xm(nfath,6),           xl(nfath,6),
2354     &          xl1(nfath,6),          xl2(nfath,6),
2355     &          xl1r(nfath,6),         xl2r(nfath,6),
2356     &          xmr(nfath,6),          xlr(nfath,6),
2357     &          nsons(nshg),
2358     &          strl(numel,ngauss),
2359     &          y(nshg,5),            yold(nshg,5),
2360     &          ifath(nshg),          iper(nshg),
2361     &          ilwork(nlwork),
2362     &          x(numnp,3)
2363      dimension shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
2364     &          xnutf(nfath),
2365     &          hfres(nshg,22),
2366     &          xfac(nshg,5)
2367
2368      character*10 cname
2369      character*30 fname1, fname2, fname3, fname4, fname5, fname6
2370c
2371
2372c
2373c
2374c   setup the weights for time averaging of cdelsq (now in quadfilt module)
2375c
2376
2377      denom=max(1.0d0*(lstep),one)
2378      if(dtavei.lt.0) then
2379         wcur=one/denom
2380      else
2381         wcur=dtavei
2382      endif
2383      whist=1.0-wcur
2384
2385      if (myrank .eq. master) then
2386         write(*,*)'istep=', istep
2387      endif
2388
2389      if (istep .eq. 0) then
2390         xnd      = zero
2391         xmodcomp = zero
2392         xmcomp  = zero
2393         xlcomp  = zero
2394         xl1comp  = zero
2395         xl2comp  = zero
2396         ucomp    = zero
2397         scomp    = zero
2398      endif
2399
2400
2401      fres = zero
2402      hfres = zero
2403
2404      yold(:,1)=y(:,4)
2405      yold(:,2:4)=y(:,1:3)
2406
2407c
2408c  hack in an interesting velocity field (uncomment to test dmod)
2409c
2410c      yold(:,5) = 1.0  ! Debugging
2411c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
2412c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
2413c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
2414c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
2415c                               suitable for the
2416
2417      do iblk = 1,nelblk
2418        lcsyst = lcblk(3,iblk)
2419        iel  = lcblk(1,iblk) !Element number where this block begins
2420        npro = lcblk(1,iblk+1) - iel
2421        lelCat = lcblk(2,iblk)
2422        nenl = lcblk(5,iblk)
2423        nshl = lcblk(10,iblk)
2424        inum  = iel + npro - 1
2425
2426        ngauss = nint(lcsyst)
2427        ngaussf = nintf(lcsyst)
2428
2429c        call hfilterBB (yold, x, mien(iblk)%p, hfres,
2430c     &               shglf(lcsyst,:,1:nshl,:),
2431c     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
2432
2433        call hfilterCC (yold, x, mien(iblk)%p, hfres,
2434     &               shglf(lcsyst,:,1:nshl,:),
2435     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
2436
2437      enddo
2438
2439      if(numpe>1) call commu (hfres, ilwork, 22, 'in ')
2440c
2441c... account for periodicity in filtered variables
2442c
2443      do j = 1,nshg  !    Add on-processor slave contribution to masters
2444        i = iper(j)
2445        if (i .ne. j) then
2446           hfres(i,:) = hfres(i,:) + hfres(j,:)
2447        endif
2448      enddo
2449      do j = 1,nshg ! Set on-processor slaves to be the same as masters
2450        i = iper(j)
2451        if (i .ne. j) then
2452           hfres(j,:) = hfres(i,:)
2453        endif
2454      enddo
2455
2456c... Set off-processor slaves to be the same as their masters
2457
2458      if(numpe>1)   call commu (hfres, ilwork, 22, 'out')
2459
2460
2461      hfres(:,16) = one / hfres(:,16) ! one/(volume filter kernel)
2462
2463      do j = 1, 15
2464	hfres(:,j) = hfres(:,j) * hfres(:,16)
2465      enddo
2466      do j = 17, 22
2467	hfres(:,j) = hfres(:,j) * hfres(:,16)
2468      enddo
2469
2470c... For debugging
2471
2472c      hfres(:,1) = 2.0*x(:,1) - 3.0*x(:,2)
2473c      hfres(:,2) = 3.0*x(:,1) + 4.0*x(:,2)
2474c      hfres(:,3) = 4.0*x(:,1) + x(:,2) + x(:,3)
2475
2476c... Done w/ h-filtering. Begin 2h-filtering.
2477
2478      do iblk = 1,nelblk
2479        lcsyst = lcblk(3,iblk)
2480        iel  = lcblk(1,iblk) !Element number where this block begins
2481        npro = lcblk(1,iblk+1) - iel
2482        lelCat = lcblk(2,iblk)
2483        nenl = lcblk(5,iblk)
2484        nshl = lcblk(10,iblk)
2485        inum  = iel + npro - 1
2486
2487        ngauss = nint(lcsyst)
2488        ngaussf = nintf(lcsyst)
2489
2490        call twohfilterBB (yold, x, strl(iel:inum,:), mien(iblk)%p,
2491     &               fres, hfres, shglf(lcsyst,:,1:nshl,:),
2492     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
2493
2494      enddo
2495c
2496
2497
2498      if(numpe>1) call commu (fres, ilwork, 33, 'in ')
2499c
2500c account for periodicity in filtered variables
2501c
2502      do j = 1,nshg
2503        i = iper(j)
2504        if (i .ne. j) then
2505           fres(i,:) = fres(i,:) + fres(j,:)
2506        endif
2507      enddo
2508
2509      do j = 1,nshg
2510        i = iper(j)
2511        if (i .ne. j) then
2512           fres(j,:) = fres(i,:)
2513        endif
2514      enddo
2515
2516      if(numpe>1)then
2517         call commu (fres, ilwork, 33, 'out')
2518      endif
2519
2520      fres(:,22) = one / fres(:,22)
2521      do j = 1,21
2522        fres(:,j) = fres(:,j) * fres(:,22)
2523      enddo
2524      do j = 23,33
2525        fres(:,j) = fres(:,j) * fres(:,22)
2526      enddo
2527
2528
2529      do iblk = 1,nelblk
2530        lcsyst = lcblk(3,iblk)
2531        iel  = lcblk(1,iblk) !Element number where this block begins
2532        npro = lcblk(1,iblk+1) - iel
2533        lelCat = lcblk(2,iblk)
2534        nenl = lcblk(5,iblk)
2535        nshl = lcblk(10,iblk)
2536        inum  = iel + npro - 1
2537
2538        ngauss = nint(lcsyst)
2539
2540        call getstrl (yold, x,      mien(iblk)%p,
2541     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
2542     &               shp(lcsyst,1:nshl,:))
2543
2544      enddo
2545
2546c
2547c... Obtain the hat-tilde strain rate norm at the nodes
2548c
2549
2550      strnrm = sqrt(
2551     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
2552     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
2553
2554      fwr = fwr1 * strnrm
2555
2556      xmij(:,1) = -fwr
2557     &             * fres(:,10) + fres(:,16)
2558      xmij(:,2) = -fwr
2559     &             * fres(:,11) + fres(:,17)
2560      xmij(:,3) = -fwr
2561     &             * fres(:,12) + fres(:,18)
2562
2563      xmij(:,4) = -fwr * fres(:,13) + fres(:,19)
2564      xmij(:,5) = -fwr * fres(:,14) + fres(:,20)
2565      xmij(:,6) = -fwr * fres(:,15) + fres(:,21)
2566
2567
2568      xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1)
2569      xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2)
2570      xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3)
2571      xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2)
2572      xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3)
2573      xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3)
2574
2575      xnum =        xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2)
2576     &                                    + xlij(:,3) * xmij(:,3)
2577     &     + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5)
2578     &                                    + xlij(:,6) * xmij(:,6))
2579      xden =        xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2)
2580     &                                    + xmij(:,3) * xmij(:,3)
2581     &     + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5)
2582     &                                    + xmij(:,6) * xmij(:,6))
2583      xden = two * xden
2584
2585c... For collectection of statistics on dyn. model components
2586
2587      xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 +
2588     &     fres(:,12)**2
2589     &     + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
2590
2591      xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11)
2592     &     + xlij(:,3)*fres(:,12) +
2593     &     two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) +
2594     &     xlij(:,6)*fres(:,15)) )
2595
2596      xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17)
2597     &     + fres(:,12)*fres(:,18) +
2598     &     two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) +
2599     &     fres(:,15)*fres(:,21)) )
2600
2601      xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17)
2602     &     + xlij(:,3)*fres(:,18) +
2603     &     two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) +
2604     &     xlij(:,6)*fres(:,21))
2605
2606      xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17)
2607     &     + fres(:,18)*fres(:,18) +
2608     &     two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) +
2609     &     fres(:,21)*fres(:,21))
2610
2611c  zero on processor periodic nodes so that they will not be added twice
2612        do j = 1,numnp
2613          i = iper(j)
2614          if (i .ne. j) then
2615            xnum(j) = zero
2616            xden(j) = zero
2617            xfac(j,:) = zero
2618            xmij(j,:) = zero
2619            xlij(j,:) = zero
2620            fres(j,:) = zero
2621            strnrm(j) = zero
2622          endif
2623        enddo
2624
2625      if (numpe.gt.1) then
2626
2627         numtask = ilwork(1)
2628         itkbeg = 1
2629
2630c zero the nodes that are "solved" on the other processors
2631         do itask = 1, numtask
2632
2633            iacc   = ilwork (itkbeg + 2)
2634            numseg = ilwork (itkbeg + 4)
2635
2636            if (iacc .eq. 0) then
2637               do is = 1,numseg
2638                  isgbeg = ilwork (itkbeg + 3 + 2*is)
2639                  lenseg = ilwork (itkbeg + 4 + 2*is)
2640                  isgend = isgbeg + lenseg - 1
2641                  xnum(isgbeg:isgend) = zero
2642                  xden(isgbeg:isgend) = zero
2643                  strnrm(isgbeg:isgend) = zero
2644                  xfac(isgbeg:isgend,:) = zero
2645                  xmij(isgbeg:isgend,:) = zero
2646                  xlij(isgbeg:isgend,:) = zero
2647                  fres(isgbeg:isgend,:) = zero
2648               enddo
2649            endif
2650
2651            itkbeg = itkbeg + 4 + 2*numseg
2652
2653         enddo
2654
2655      endif
2656c
2657c Description of arrays.   Each processor has an array of length equal
2658c to the total number of fathers times 2 xnude(nfathers,2). One to collect
2659c the numerator and one to collect the denominator.  There is also an array
2660c of length nshg on each processor which tells the father number of each
2661c on processor node, ifath(nnshg).  Finally, there is an arry of length
2662c nfathers to tell the total (on all processors combined) number of sons
2663c for each father.
2664c
2665c  Now loop over nodes and accumlate the numerator and the denominator
2666c  to the father nodes.  Only on processor addition at this point.
2667c  Note that serrogate fathers are collect some for the case where some
2668c  sons are on another processor
2669c
2670      xnude = zero
2671      ynude = zero
2672      xm    = zero
2673      xl    = zero
2674      xl1   = zero
2675      xl2   = zero
2676      ui    = zero
2677      snorm = zero
2678
2679      do i = 1,nshg
2680         xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i)
2681         xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i)
2682
2683         ynude(ifath(i),1) = ynude(ifath(i),1) + xfac(i,1)
2684         ynude(ifath(i),2) = ynude(ifath(i),2) + xfac(i,2)
2685         ynude(ifath(i),3) = ynude(ifath(i),3) + xfac(i,3)
2686         ynude(ifath(i),4) = ynude(ifath(i),4) + xfac(i,4)
2687         ynude(ifath(i),5) = ynude(ifath(i),5) + xfac(i,5)
2688
2689         xm(ifath(i),1) = xm(ifath(i),1) + xmij(i,1)
2690         xm(ifath(i),2) = xm(ifath(i),2) + xmij(i,2)
2691         xm(ifath(i),3) = xm(ifath(i),3) + xmij(i,3)
2692         xm(ifath(i),4) = xm(ifath(i),4) + xmij(i,4)
2693         xm(ifath(i),5) = xm(ifath(i),5) + xmij(i,5)
2694         xm(ifath(i),6) = xm(ifath(i),6) + xmij(i,6)
2695
2696         xl(ifath(i),1) = xl(ifath(i),1) + xlij(i,1)
2697         xl(ifath(i),2) = xl(ifath(i),2) + xlij(i,2)
2698         xl(ifath(i),3) = xl(ifath(i),3) + xlij(i,3)
2699         xl(ifath(i),4) = xl(ifath(i),4) + xlij(i,4)
2700         xl(ifath(i),5) = xl(ifath(i),5) + xlij(i,5)
2701         xl(ifath(i),6) = xl(ifath(i),6) + xlij(i,6)
2702
2703         xl1(ifath(i),1) = xl1(ifath(i),1) + fres(i,4)
2704         xl1(ifath(i),2) = xl1(ifath(i),2) + fres(i,5)
2705         xl1(ifath(i),3) = xl1(ifath(i),3) + fres(i,6)
2706         xl1(ifath(i),4) = xl1(ifath(i),4) + fres(i,7)
2707         xl1(ifath(i),5) = xl1(ifath(i),5) + fres(i,8)
2708         xl1(ifath(i),6) = xl1(ifath(i),6) + fres(i,9)
2709
2710         xl2(ifath(i),1) = xl2(ifath(i),1) + fres(i,1)*fres(i,1)
2711         xl2(ifath(i),2) = xl2(ifath(i),2) + fres(i,2)*fres(i,2)
2712         xl2(ifath(i),3) = xl2(ifath(i),3) + fres(i,3)*fres(i,3)
2713         xl2(ifath(i),4) = xl2(ifath(i),4) + fres(i,1)*fres(i,2)
2714         xl2(ifath(i),5) = xl2(ifath(i),5) + fres(i,1)*fres(i,3)
2715         xl2(ifath(i),6) = xl2(ifath(i),6) + fres(i,2)*fres(i,3)
2716
2717         ui(ifath(i),1) = ui(ifath(i),1) + fres(i,1)
2718         ui(ifath(i),2) = ui(ifath(i),2) + fres(i,2)
2719         ui(ifath(i),3) = ui(ifath(i),3) + fres(i,3)
2720
2721         snorm(ifath(i)) = snorm(ifath(i)) + strnrm(i)
2722
2723      enddo
2724
2725c
2726c Now  the true fathers and serrogates combine results and update
2727c each other.
2728c
2729      if(numpe .gt. 1)then
2730         call drvAllreduce(xnude, xnuder,2*nfath)
2731         call drvAllreduce(ynude, ynuder,6*nfath)
2732         call drvAllreduce(xm, xmr,6*nfath)
2733         call drvAllreduce(xl, xlr,6*nfath)
2734         call drvAllreduce(xl1, xl1r,6*nfath)
2735         call drvAllreduce(xl2, xl2r,6*nfath)
2736         call drvAllreduce(ui, uir,3*nfath)
2737         call drvAllreduce(snorm, snormr,nfath)
2738
2739         do i = 1, nfath
2740            ynuder(i,6) = ( ynuder(i,4) - fwr1*ynuder(i,2) ) /
2741     &           ( two*ynuder(i,5) - four*fwr1*ynuder(i,3)
2742     &           + two*fwr1*fwr1*ynuder(i,1) )
2743         enddo
2744
2745         cdelsq2(:) = ynuder(ifath(:),6)  ! For comparison w/ cdelsq
2746c
2747c  xnude is the sum of the sons for each father on this processor
2748c
2749c  xnuder is the sum of the sons for each father on all processor combined
2750c  (the same as if we had not partitioned the mesh for each processor)
2751c
2752c   For each father we have precomputed the number of sons (including
2753c   the sons off processor).
2754c
2755c   Now divide by number of sons to get the average (not really necessary
2756c   for dynamic model since ratio will cancel nsons at each father)
2757c
2758         xnuder(:,1) = xnuder(:,1) / nsons(:)
2759         xnuder(:,2) = xnuder(:,2) / nsons(:)
2760
2761         do m = 1, 5
2762         ynuder(:,m) = ynuder(:,m)/nsons(:)
2763         enddo
2764         do m = 1,6
2765         xmr(:,m) = xmr(:,m)/nsons(:)
2766         xlr(:,m) = xlr(:,m)/nsons(:)
2767         xl1r(:,m) = xl1r(:,m)/nsons(:)
2768         xl2r(:,m) = xl2r(:,m)/nsons(:)
2769         enddo
2770
2771         uir(:,1) = uir(:,1)/nsons(:)
2772         uir(:,2) = uir(:,2)/nsons(:)
2773         uir(:,3) = uir(:,3)/nsons(:)
2774
2775         snormr(:) = snormr(:)/nsons(:)
2776
2777c
2778cc  the next line is c \Delta^2
2779cc
2780cc         xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09)
2781cc         do i = 1,nshg
2782cc            cdelsq(i) = xnuder(ifath(i),1)
2783cc         enddo
2784
2785            numNden(:,1) = whist*numNden(:,1)+wcur*xnuder(ifath(:),1)
2786            numNden(:,2) = whist*numNden(:,2)+wcur*xnuder(ifath(:),2)
2787            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2788
2789c            cdelsq(:) = xnuder(ifath(:),1)/(xnuder(ifath(:),2)+1.d-09)
2790
2791            xnd(:,1) = xnd(:,1) + xnuder(:,1)
2792            xnd(:,2) = xnd(:,2) + xnuder(:,2)
2793
2794            xmodcomp(:,1) = xmodcomp(:,1)+ynuder(:,1)
2795            xmodcomp(:,2) = xmodcomp(:,2)+ynuder(:,2)
2796            xmodcomp(:,3) = xmodcomp(:,3)+ynuder(:,3)
2797            xmodcomp(:,4) = xmodcomp(:,4)+ynuder(:,4)
2798            xmodcomp(:,5) = xmodcomp(:,5)+ynuder(:,5)
2799
2800            xmcomp(:,:) = xmcomp(:,:)+xmr(:,:)
2801            xlcomp(:,:) = xlcomp(:,:)+xlr(:,:)
2802
2803            xl1comp(:,:) = xl1comp(:,:)+xl1r(:,:)
2804            xl2comp(:,:) = xl2comp(:,:)+xl2r(:,:)
2805
2806            ucomp(:,:) = ucomp(:,:)+uir(:,:)
2807            u1 = uir(32,1)
2808            scomp(:)   = scomp(:)+snormr(:)
2809
2810      else
2811
2812         xnude(:,1) = xnude(:,1)/nsons(:)
2813         xnude(:,2) = xnude(:,2)/nsons(:)
2814
2815         do m = 1, 5
2816         ynude(:,m) = ynude(:,m)/nsons(:)
2817         enddo
2818         do m = 1,6
2819         xm(:,m) = xm(:,m)/nsons(:)
2820         xl(:,m) = xl(:,m)/nsons(:)
2821         xl1(:,m) = xl1(:,m)/nsons(:)
2822         xl2(:,m) = xl2(:,m)/nsons(:)
2823         enddo
2824
2825         ui(:,1) = ui(:,1)/nsons(:)
2826         ui(:,2) = ui(:,2)/nsons(:)
2827         ui(:,3) = ui(:,3)/nsons(:)
2828
2829         snorm(:) = snorm(:)/nsons(:)
2830c
2831c     the next line is c \Delta^2, not nu_T but we want to save the
2832c     memory
2833c
2834
2835cc         xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09)
2836cc        do i = 1,nshg
2837cc            cdelsq(i) = xnude(ifath(i),1)
2838cc         enddo
2839cc      endif
2840
2841         do i = 1, nfath
2842            ynude(i,6) = ( ynude(i,4) - fwr1*ynude(i,2) ) /
2843     &           ( two*ynude(i,5) - four*fwr1*ynude(i,3)
2844     &           + fwr1*fwr1*ynude(i,1) )
2845         enddo
2846
2847            numNden(:,1) = whist*numNden(:,1)+wcur*xnude(ifath(:),1)
2848            numNden(:,2) = whist*numNden(:,2)+wcur*xnude(ifath(:),2)
2849
2850            xnd(:,1) = xnd(:,1)+xnude(:,1)
2851            xnd(:,2) = xnd(:,2)+xnude(:,2)
2852
2853            cdelsq(:) = numNden(:,1) / (numNden(:,2)) ! + 1.d-09)
2854
2855c            cdelsq(:) = xnude(ifath(:),1)/(xnude(ifath(:),2))!+1.d-09)
2856
2857
2858          cdelsq2(:) = ynude(ifath(:),6)  ! For comparison w/ cdelsq
2859
2860            xmodcomp(:,1) = xmodcomp(:,1)+ynude(:,1)
2861            xmodcomp(:,2) = xmodcomp(:,2)+ynude(:,2)
2862            xmodcomp(:,3) = xmodcomp(:,3)+ynude(:,3)
2863            xmodcomp(:,4) = xmodcomp(:,4)+ynude(:,4)
2864            xmodcomp(:,5) = xmodcomp(:,5)+ynude(:,5)
2865
2866            xmcomp(:,:) = xmcomp(:,:)+xm(:,:)
2867            xlcomp(:,:) = xlcomp(:,:)+xl(:,:)
2868
2869            xl1comp(:,:) = xl1comp(:,:)+xl1(:,:)
2870            xl2comp(:,:) = xl2comp(:,:)+xl2(:,:)
2871
2872            ucomp(:,:) = ucomp(:,:)+ui(:,:)
2873            u1 = ui(32,1)
2874            scomp(:)   = scomp(:)+snorm(:)
2875
2876         endif
2877
2878
2879c         do i = 1, nfath
2880c            xmodcomp(i,:) = xmodcomp(i,:)/nsons(i)
2881c            xmcomp(i,:) = xmcomp(i,:)/nsons(i)
2882c            xlcomp(i,:) = xlcomp(i,:)/nsons(i)
2883c            xl2comp(i,:) = xl2comp(i,:)/nsons(i)
2884c            xl1comp(i,:) = xl1comp(i,:)/nsons(i)
2885c            xnd(i,:) = xnd(i,:)/nsons(i)
2886c            scomp(i) = scomp(i)/nsons(i)
2887c            ucomp(i,:) = ucomp(i,:)/nsons(i)
2888c         enddo
2889
2890         if ( istep .eq. (nstep(1)-1) ) then
2891         if ( myrank .eq. master) then
2892
2893            do i = 1, nfath
2894            write(365,*)xmodcomp(i,1),xmodcomp(i,2),xmodcomp(i,3),
2895     &              xmodcomp(i,4),xmodcomp(i,5)
2896
2897            write(366,*)xmcomp(i,1),xmcomp(i,2),xmcomp(i,3)
2898            write(367,*)xmcomp(i,4),xmcomp(i,5),xmcomp(i,6)
2899
2900            write(368,*)xlcomp(i,1),xlcomp(i,2),xlcomp(i,3)
2901            write(369,*)xlcomp(i,4),xlcomp(i,5),xlcomp(i,6)
2902
2903            write(370,*)xl1comp(i,1),xl1comp(i,2),xl1comp(i,3)
2904            write(371,*)xl1comp(i,4),xl1comp(i,5),xl1comp(i,6)
2905
2906            write(372,*)xl2comp(i,1),xl2comp(i,2),xl2comp(i,3)
2907            write(373,*)xl2comp(i,4),xl2comp(i,5),xl2comp(i,6)
2908
2909            write(374,*)xnd(i,1),xnd(i,2),scomp(i)
2910            write(375,*)ucomp(i,1),ucomp(i,2),ucomp(i,3)
2911
2912c            write(*,*)'uit uic=', ucomp(32,1),u1
2913            enddo
2914
2915
2916            call flush(365)
2917            call flush(366)
2918            call flush(367)
2919            call flush(368)
2920            call flush(369)
2921            call flush(370)
2922            call flush(371)
2923            call flush(372)
2924            call flush(373)
2925            call flush(374)
2926            call flush(375)
2927
2928c            if (myrank .eq. master) then
2929c               write(*,*)'uit uic=', ucomp(32,1),u1
2930c            endif
2931
2932
2933c            close(852)
2934c            close(853)
2935c            close(854)
2936
2937         endif
2938         endif
2939
2940            if (myrank .eq. master) then
2941               write(*,*)'uit uic=', ucomp(32,1),u1
2942            endif
2943
2944
2945 555     format(e14.7,4(2x,e14.7))
2946 556     format(e14.7,5(2x,e14.7))
2947
2948c         close(849)
2949c         close(850)
2950c         close(851)
2951c         close(852)
2952c         close(853)
2953c         close(854)
2954
2955c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2956      tmp1 =  MINVAL(cdelsq)
2957      tmp2 =  MAXVAL(cdelsq)
2958      if(numpe>1) then
2959         call MPI_REDUCE (tmp1, tmp3, 1,MPI_DOUBLE_PRECISION,
2960     &        MPI_MIN, master, MPI_COMM_WORLD, ierr)
2961         call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
2962     &        MPI_MAX, master, MPI_COMM_WORLD, ierr)
2963         tmp1=tmp3
2964         tmp2=tmp4
2965      endif
2966      if (myrank .EQ. master) then !print CDelta^2 range
2967         write(34,*)lstep,tmp1,tmp2
2968         call flush(34)
2969      endif
2970c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2971
2972      if (myrank .eq. master) then
2973         write(*,*) 'cdelsq=', cdelsq(1),cdelsq(2)
2974         write(*,*) 'cdelsq=', cdelsq2(1),cdelsq2(2)
2975         write(22,*) lstep, cdelsq(1)
2976         call flush(22)
2977      endif
2978
2979      do iblk = 1,nelblk
2980         lcsyst = lcblk(3,iblk)
2981         iel  = lcblk(1,iblk)
2982         npro = lcblk(1,iblk+1) - iel
2983         lelCat = lcblk(2,iblk)
2984         inum  = iel + npro - 1
2985
2986         ngauss = nint(lcsyst)
2987
2988         call scatnu (mien(iblk)%p, strl(iel:inum,:),
2989     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
2990      enddo
2991c     $$$$$$$$$$$$$$$$$$$$$$$$$$$
2992c$$$  tmp1 =  MINVAL(xmudmi)
2993c$$$  tmp2 =  MAXVAL(xmudmi)
2994c$$$  if(numpe>1) then
2995c$$$  call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION,
2996c$$$  &                 MPI_MIN, master, MPI_COMM_WORLD, ierr)
2997c$$$  call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
2998c$$$  &                 MPI_MAX, master, MPI_COMM_WORLD, ierr)
2999c$$$      tmp1=tmp3
3000c$$$  tmp2=tmp4
3001c$$$  endif
3002c$$$  if (myrank .EQ. master) then
3003c$$$  write(35,*) lstep,tmp1,tmp2
3004c$$$  call flush(35)
3005c$$$  endif
3006c $$$$$$$$$$$$$$$$$$$$$$$$$$$
3007
3008c
3009c  if flag set, write a restart file with info (reuse xmij's memory)
3010c
3011      if(irs.eq.11) then
3012         lstep=999
3013         xmij(:,1)=xnum(:)
3014         xmij(:,2)=xden(:)
3015         xmij(:,3)=cdelsq(:)
3016         xmij(:,5)=xlij(:,4)    !leave M_{12} in 4 and put L_{12} here
3017         call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac
3018         stop
3019      endif
3020c
3021c  local clipping moved to scatnu with the creation of mxmudmi pointers
3022c
3023c$$$      rmu=datmat(1,2,1)
3024c$$$      xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu
3025c$$$      xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0
3026c      stop !uncomment to test dmod
3027c
3028
3029
3030c  write out the nodal values of xnut (estimate since we don't calc strain
3031c  there and must use the filtered strain).
3032c
3033
3034      if ((irs .ge. 1) .and. (mod(lstep, ntout) .eq. 0)) then
3035c
3036c  collect the average strain into xnude(2)
3037c
3038         xnude(:,2) = zero
3039         do i = 1,numnp
3040            xnude(ifath(i),2) = xnude(ifath(i),2) + strnrm(i)
3041         enddo
3042
3043         if(numpe .gt. 1) then
3044             call drvAllreduce(xnude(:,2), xnuder(:,2),nfath)
3045          else
3046             xnuder=xnude
3047          endif
3048c
3049c          nut= cdelsq    * |S|
3050c
3051         xnutf=xnuder(:,1)*xnuder(:,2)/nsons(:)
3052c
3053c  collect the x and y coords into xnude
3054c
3055         xnude = zero
3056         do i = 1,numnp
3057            xnude(ifath(i),1) = xnude(ifath(i),1) + x(i,1)
3058            xnude(ifath(i),2) = xnude(ifath(i),2) + x(i,2)
3059         enddo
3060
3061         if(numpe .gt. 1)
3062     &        call drvAllreduce(xnude, xnuder,2*nfath)
3063         xnuder(:,1)=xnuder(:,1)/nsons(:)
3064         xnuder(:,2)=xnuder(:,2)/nsons(:)
3065c
3066c  xnude is the sum of the sons for each father on this processor
3067c
3068         if((myrank.eq.master)) then
3069            do i=1,nfath      ! cdelsq   * |S|
3070               write(444,*) xnuder(i,1),xnuder(i,2),xnutf(i)
3071            enddo
3072            call flush(444)
3073         endif
3074      endif
3075
3076      return
3077      end
3078