xref: /phasta/AcuStat/src/commuMax.f (revision 1e99f302ca5103688ae35115c2fefb7cfa6714f1)
1      subroutine commuMax (global, ilwork,  n,  code)
2c---------------------------------------------------------------------
3c
4c This subroutine is responsible for interprocessor communication of
5c the residual and solution vectors.
6c
7c input:
8c     global(nshg,n): global vector to be communicated. Note that
9c                      this vector is local to the processor, (i.e.
10c                      not distributed across processors)
11c     ilwork(nlwork):  this is the local interprocessor work array.
12c                      This array is local to the processor, (i.e.
13c                      each processor has a unique ilwork array.
14c     n:               second dimension of the array to be communicated
15c     code:            = 'in' for communicating with the residual
16c                      = 'out' for cummunicating the solution
17c
18c---------------------------------------------------------------------
19c
20c The array ilwork describes the details of the communications.
21c Each communication step (call of this routine) consists of a
22c sequence of "tasks", where a task is defined as a communication
23c between two processors where data is exchanged. This would imply
24c that for a given processor, there will be as many tasks as there
25c are processors with which it must communicate. Details of the
26c ilwork array appear below.
27c
28c---------------------------------------------------------------------
29c
30      include "commonAcuStat.h"
31      include "mpif.h"
32      include "auxmpiAcuStat.h"
33      integer status(MPI_STATUS_SIZE), ierr
34      integer stat(MPI_STATUS_SIZE, 2*maxtask), req(2*maxtask)
35      real*8  rDelISend, rDelIRecv, rDelWaitAll
36
37      dimension global(nshg,n),
38     &          rtemp(maxfront*n,maxtask),
39     &          ilwork(nlwork)
40
41      character*3 code
42
43      if(impistat2.eq.1) call MPI_BARRIER (MPI_COMM_WORLD, ierr)
44      if(impistat.eq.1) rDelIRecv = zero
45      if(impistat.eq.1) rDelISend = zero
46      if(impistat.eq.1) rDelWaitAll = zero
47
48
49      if (code .ne. 'in ' .and. code .ne. 'out')
50     &  call error ('commu   ','code    ',0)
51
52
53      if     (n .eq. 1)      then        ! like a scalar
54        kdof = 1
55      elseif (n .eq. nsd)    then        ! like the normal vectors
56        kdof = 2
57      elseif (n .eq. ndof)   then        ! res, y, ac, krylov vectors....
58        kdof = 3
59      elseif (n .eq. nflow*nflow) then     ! bdiag
60        kdof = 4
61      elseif (n .eq. (nflow-1)*nsd) then  ! qres
62        kdof = 5
63      elseif (n .eq. nflow) then
64        kdof = 6
65      elseif (n .eq. 24 ) then
66        kdof = 7
67      elseif (n .eq. 9) then
68        kdof = 8
69      elseif (n .eq. 11 ) then
70        kdof = 9
71      elseif (n .eq. 7 ) then
72        kdof = 10
73!      elseif (n .eq. 33 ) then ! hack
74      elseif (n .eq. 13 ) then ! for error
75         kdof = 11
76      elseif (n .eq. 22 ) then
77         kdof = 12
78      elseif (n .eq. 16 ) then
79         kdof = 13
80      elseif (n .eq. 10 ) then
81         kdof = 14
82      elseif (n .eq. nflow*nsd ) then   !surface tension + qres
83         kdof = 15
84      else
85        call error ('commu   ','n       ',n)
86      endif
87
88c... Note that when adding another kdof to the above set, we must
89c... also make changes in ctypes.f and auxmpi.h
90
91c---------------------------------------------------------------------
92c  ilwork(1): number of tasks
93c
94c  The following information is contained in ilwork for each task:
95c     itag: tag of the communication
96c     iacc: == 0 if task is a send
97c           == 1 if task is a recieve
98c     iother: rank of processor with which this communication occurs
99c     numseg: number of data "segments" to be sent or recieved. A
100c             segment is defined as a continuous section of the global
101c             vector to be communicated, (i.e. a group of nodes (or,
102c             rather, "shape function coefficients") which occur
103c             sequentially in the array global(nshg,n)).
104c     isbeg:  location of the first segment in the array owned by the
105c             current processor.
106c
107c The two types of communication are 'in', where the residual is being
108c communicated, and 'out', where the solution is being communicated.
109c Note that when the type is 'out', senders recieve and recievers send.
110c
111c The following comment pertains to a communication of type 'in':
112c
113c     If the task is a send, then all of the numseg segments are
114c     sent with a single call to MPI_SEND. Where these segments live in
115c     the array is built into the array sevsegtype, which is a common
116c     array constructed in the subroutine "ctypes.f". In other words,
117c     sevsegtype is a data type that describes the indices of the blocks
118c     to be sent, in terms of there beginning index, and the length of
119c     each segment. Using this, we can make a single send to take care of
120c     all the segments for this task.
121c
122c     If the task is a recieve, then once the vector is recieved, the
123c     recieved segments must be added to the correct locations in the
124c     current array. These locations are described in ilwork as the
125c     beginning position, then the length of the segment.
126c
127c---------------------------------------------------------------------
128      numtask = ilwork(1)
129
130      itkbeg = 1
131      m = 0
132      idl=0
133
134      DO itask = 1, numtask
135        m      = m + 1
136        itag   = ilwork (itkbeg + 1)
137        iacc   = ilwork (itkbeg + 2)
138        iother = ilwork (itkbeg + 3)
139        numseg = ilwork (itkbeg + 4)
140        isgbeg = ilwork (itkbeg + 5)
141c
142c.... if iacc == 0, then this task is a send.
143c     slave
144c
145        if (iacc .EQ. 0) then
146c
147c.... residual communication
148c
149          if (code .eq. 'in ') then
150            if(impistat.eq.1) iISend = iISend+1
151            if(impistat.eq.1) rmpitmr = TMRC()
152            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
153     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
154            if(impistat.eq.1) rDelISend = TMRC()-rmpitmr
155            if(impistat.eq.1) rISend = rISend+rDelISend
156          endif
157c
158c.... solution communication
159c
160          if (code .eq. 'out') then
161            if(impistat.eq.1) iIRecv = iIRecv+1
162            if(impistat.eq.1) rmpitmr = TMRC()
163            call MPI_IRECV(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
164     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
165            if(impistat.eq.1) rDelIRecv = TMRC()-rmpitmr
166            if(impistat.eq.1) rIRecv = rIRecv+rDelIRecv
167          endif
168c
169c.... if iacc == 1, then this task is a recieve.
170c     master
171c
172        else
173          if (code .eq. 'in ') then
174c
175c.... determine the number of total number of nodes involved in this
176c     communication (lfront), including all segments
177c
178            lfront = 0
179            do is = 1,numseg
180              lenseg = ilwork (itkbeg + 4 + 2*is)
181              lfront = lfront + lenseg
182            enddo
183c
184c.... recieve all segments for this task in a single step
185c
186            idl=idl+1 ! stands for i Do Later, the number to fix later
187            if(impistat.eq.1) iIRecv = iIRecv+1
188            if(impistat.eq.1) rmpitmr = TMRC()
189            call MPI_IRECV(rtemp(1,idl), lfront*n, MPI_DOUBLE_PRECISION,
190     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
191            if(impistat.eq.1) rDelIRecv = TMRC()-rmpitmr
192            if(impistat.eq.1) rIRecv = rIRecv+rDelIRecv
193          endif
194          if (code .eq. 'out') then
195            if(impistat.eq.1) iISend = iISend+1
196            if(impistat.eq.1) rmpitmr = TMRC()
197            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
198     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
199            if(impistat.eq.1) rDelISend = TMRC()-rmpitmr
200            if(impistat.eq.1) rISend = rISend+rDelISend
201          endif
202        endif
203
204        itkbeg = itkbeg + 4 + 2*numseg
205
206      enddo   !! end tasks loop
207
208      if(impistat.eq.1) iWaitAll = iWaitAll+1
209      if(impistat.eq.1) rmpitmr = TMRC()
210      call MPI_WAITALL(m, req, stat, ierr)
211      if(impistat.eq.1) rDelWaitAll = TMRC()-rmpitmr
212      if(impistat.eq.1) rWaitAll = rWaitAll+rDelWaitAll
213      if(impistat.eq.1) rCommu = rCommu+rDelIRecv+rDelISend+rDelWaitAll
214
215c
216c     Stuff added below is a delayed assembly of that which was communicated
217c     above but due to the switch to non-blocking receivves could not be
218c     assembled until after the waitall.  Only necessary for commu "in"
219c
220
221      if(code .eq. 'in ') then
222         itkbeg=1
223         jdl=0
224         do j=1,numtask         ! time to do all the segments that needed to be
225                                ! assembled into the global vector
226
227            iacc   = ilwork (itkbeg + 2)
228            numseg = ilwork (itkbeg + 4)
229            isgbeg = ilwork (itkbeg + 5)
230            if(iacc.eq.1) then
231               jdl=jdl+1  ! keep track of order of rtemp's
232c
233c... add the recieved data to the global array on the current processor.
234c    Note that this involves splitting up the chunk of recieved data
235c    into its correct segment locations for the current processor.
236c
237               itemp = 1
238               do idof = 1,n
239                  do is = 1,numseg
240                 isgbeg = ilwork (itkbeg + 3 + 2*is)
241                 lenseg = ilwork (itkbeg + 4 + 2*is)
242                 isgend = isgbeg + lenseg - 1
243c                 global(isgbeg:isgend,idof) = global(isgbeg:isgend,idof)
244c     &                                + rtemp (itemp:itemp+lenseg-1,jdl)
245                 do k=isgbeg,isgend  ! break this into an explicit loop an max instead of accumulate
246                 global(k,idof) = max(global(k,idof),rtemp (itemp,jdl))
247                 itemp=itemp+1   ! advance this index one at a time instead of in lenseg jumps
248                 enddo
249c                 itemp = itemp + lenseg
250                  enddo
251               enddo
252            endif ! end of receive (iacc=1)
253            itkbeg = itkbeg + 4 + 2*numseg
254         enddo
255      endif  ! commu "in"
256      return
257      end
258
259
260
261