xref: /phasta/M2N/src/commu.f (revision 16223cb9c3f88b34f2cb94151b5cf5ffc1aac5e2)
1      subroutine commu (global, ilwork,  n,  code)
2c---------------------------------------------------------------------
3c
4c This subroutine is responsible for interprocessor communication of
5c the residual and solution vectors.
6c
7c input:
8c     global(nshg,n): global vector to be communicated. Note that
9c                      this vector is local to the processor, (i.e.
10c                      not distributed across processors)
11c     ilwork(nlwork):  this is the local interprocessor work array.
12c                      This array is local to the processor, (i.e.
13c                      each processor has a unique ilwork array.
14c     n:               second dimension of the array to be communicated
15c     code:            = 'in' for communicating with the residual
16c                      = 'out' for cummunicating the solution
17c
18c---------------------------------------------------------------------
19c
20c The array ilwork describes the details of the communications.
21c Each communication step (call of this routine) consists of a
22c sequence of "tasks", where a task is defined as a communication
23c between two processors where data is exchanged. This would imply
24c that for a given processor, there will be as many tasks as there
25c are processors with which it must communicate. Details of the
26c ilwork array appear below.
27c
28c---------------------------------------------------------------------
29c
30      include "commonM2N.h"
31      include "mpif.h"
32      include "auxmpiM2N.h"
33      integer status(MPI_STATUS_SIZE), ierr
34      integer stat(MPI_STATUS_SIZE, 2*maxtask), req(2*maxtask)
35      real*8  rDelISend, rDelIRecv, rDelWaitAll
36
37      dimension global(nshg,n),
38     &          rtemp(maxfront*n,maxtask),
39     &          ilwork(nlwork)
40
41      character*3 code
42
43      if(impistat2.eq.1) call MPI_BARRIER (MPI_COMM_WORLD, ierr)
44      if(impistat.eq.1) rDelIRecv = zero
45      if(impistat.eq.1) rDelISend = zero
46      if(impistat.eq.1) rDelWaitAll = zero
47
48      if (code .ne. 'in ' .and. code .ne. 'out')
49     &  call error ('commu   ','code    ',0)
50
51      if     (n .eq. 1)      then        ! like a scalar
52        kdof = 1
53      elseif (n .eq. nsd)    then        ! like the normal vectors
54        kdof = 2
55      elseif (n .eq. ndof)   then        ! res, y, ac, krylov vectors....
56        kdof = 3
57      elseif (n .eq. nflow*nflow) then     ! bdiag
58        kdof = 4
59      elseif (n .eq. (nflow-1)*nsd) then  ! qres
60        kdof = 5
61      elseif (n .eq. nflow) then
62        kdof = 6
63      elseif (n .eq. 24 ) then
64        kdof = 7
65      elseif (n .eq. 9) then
66        kdof = 8
67      elseif (n .eq. 11 ) then
68        kdof = 9
69      elseif (n .eq. 7 ) then
70        kdof = 10
71      elseif (n .eq. 33 ) then
72         kdof = 11
73      elseif (n .eq. 22 ) then
74         kdof = 12
75      elseif (n .eq. 16 ) then
76         kdof = 13
77      elseif (n .eq. 10 ) then
78         kdof = 14
79       elseif (n .eq. nflow*nsd ) then   !surface tension + qres
80         kdof = 15
81      else
82        call error ('commu   ','n       ',n)
83      endif
84
85c... Note that when adding another kdof to the above set, we must
86c... also make changes in ctypes.f and auxmpi.h
87
88c---------------------------------------------------------------------
89c  ilwork(1): number of tasks
90c
91c  The following information is contained in ilwork for each task:
92c     itag: tag of the communication
93c     iacc: == 0 if task is a send
94c           == 1 if task is a recieve
95c     iother: rank of processor with which this communication occurs
96c     numseg: number of data "segments" to be sent or recieved. A
97c             segment is defined as a continuous section of the global
98c             vector to be communicated, (i.e. a group of nodes (or,
99c             rather, "shape function coefficients") which occur
100c             sequentially in the array global(nshg,n)).
101c     isbeg:  location of the first segment in the array owned by the
102c             current processor.
103c
104c The two types of communication are 'in', where the residual is being
105c communicated, and 'out', where the solution is being communicated.
106c Note that when the type is 'out', senders recieve and recievers send.
107c
108c The following comment pertains to a communication of type 'in':
109c
110c     If the task is a send, then all of the numseg segments are
111c     sent with a single call to MPI_SEND. Where these segments live in
112c     the array is built into the array sevsegtype, which is a common
113c     array constructed in the subroutine "ctypes.f". In other words,
114c     sevsegtype is a data type that describes the indices of the blocks
115c     to be sent, in terms of there beginning index, and the length of
116c     each segment. Using this, we can make a single send to take care of
117c     all the segments for this task.
118c
119c     If the task is a recieve, then once the vector is recieved, the
120c     recieved segments must be added to the correct locations in the
121c     current array. These locations are described in ilwork as the
122c     beginning position, then the length of the segment.
123c
124c---------------------------------------------------------------------
125      numtask = ilwork(1)
126
127      itkbeg = 1
128      m = 0
129      idl=0
130
131      DO itask = 1, numtask
132        m      = m + 1
133        itag   = ilwork (itkbeg + 1)
134        iacc   = ilwork (itkbeg + 2)
135        iother = ilwork (itkbeg + 3)
136        numseg = ilwork (itkbeg + 4)
137        isgbeg = ilwork (itkbeg + 5)
138c
139c.... if iacc == 0, then this task is a send.
140c     slave
141c
142        if (iacc .EQ. 0) then
143c
144c.... residual communication
145c
146          if (code .eq. 'in ') then
147            if(impistat.eq.1) iISend = iISend+1
148            if(impistat.eq.1) rmpitmr = TMRC()
149            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
150     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
151            if(impistat.eq.1) rDelISend = TMRC()-rmpitmr
152            if(impistat.eq.1) rISend = rISend+rDelISend
153          endif
154c
155c.... solution communication
156c
157          if (code .eq. 'out') then
158            if(impistat.eq.1) iIRecv = iIRecv+1
159            if(impistat.eq.1) rmpitmr = TMRC()
160            call MPI_IRECV(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
161     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
162            if(impistat.eq.1) rDelIRecv = TMRC()-rmpitmr
163            if(impistat.eq.1) rIRecv = rIRecv+rDelIRecv
164          endif
165c
166c.... if iacc == 1, then this task is a recieve.
167c     master
168c
169        else
170          if (code .eq. 'in ') then
171c
172c.... determine the number of total number of nodes involved in this
173c     communication (lfront), including all segments
174c
175            lfront = 0
176            do is = 1,numseg
177              lenseg = ilwork (itkbeg + 4 + 2*is)
178              lfront = lfront + lenseg
179            enddo
180c
181c.... recieve all segments for this task in a single step
182c
183            idl=idl+1 ! stands for i Do Later, the number to fix later
184            if(impistat.eq.1) iIRecv = iIRecv+1
185            if(impistat.eq.1) rmpitmr = TMRC()
186            call MPI_IRECV(rtemp(1,idl), lfront*n, MPI_DOUBLE_PRECISION,
187     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
188            if(impistat.eq.1) rDelIRecv = TMRC()-rmpitmr
189            if(impistat.eq.1) rIRecv = rIRecv+rDelIRecv
190          endif
191          if (code .eq. 'out') then
192            if(impistat.eq.1) iISend = iISend+1
193            if(impistat.eq.1) rmpitmr = TMRC()
194            call MPI_ISEND(global(isgbeg, 1), 1, sevsegtype(itask,kdof),
195     &                     iother, itag, MPI_COMM_WORLD, req(m), ierr)
196            if(impistat.eq.1) rDelISend = TMRC()-rmpitmr
197            if(impistat.eq.1) rISend = rISend+rDelISend
198          endif
199        endif
200
201        itkbeg = itkbeg + 4 + 2*numseg
202
203      enddo   !! end tasks loop
204
205      if(impistat.eq.1) iWaitAll = iWaitAll+1
206      if(impistat.eq.1) rmpitmr = TMRC()
207      call MPI_WAITALL(m, req, stat, ierr)
208      if(impistat.eq.1) rDelWaitAll = TMRC()-rmpitmr
209      if(impistat.eq.1) rWaitAll = rWaitAll+rDelWaitAll
210      if(impistat.eq.1) rCommu = rCommu+rDelIRecv+rDelISend+rDelWaitAll
211
212c
213c     Stuff added below is a delayed assembly of that which was communicated
214c     above but due to the switch to non-blocking receivves could not be
215c     assembled until after the waitall.  Only necessary for commu "in"
216c
217
218      if(code .eq. 'in ') then
219         itkbeg=1
220         jdl=0
221         do j=1,numtask         ! time to do all the segments that needed to be
222                                ! assembled into the global vector
223
224            iacc   = ilwork (itkbeg + 2)
225            numseg = ilwork (itkbeg + 4)
226            isgbeg = ilwork (itkbeg + 5)
227            if(iacc.eq.1) then
228               jdl=jdl+1  ! keep track of order of rtemp's
229c
230c... add the recieved data to the global array on the current processor.
231c    Note that this involves splitting up the chunk of recieved data
232c    into its correct segment locations for the current processor.
233c
234               itemp = 1
235               do idof = 1,n
236                  do is = 1,numseg
237                 isgbeg = ilwork (itkbeg + 3 + 2*is)
238                 lenseg = ilwork (itkbeg + 4 + 2*is)
239                 isgend = isgbeg + lenseg - 1
240c                 global(isgbeg:isgend,idof) = global(isgbeg:isgend,idof)
241c     &                                + rtemp (itemp:itemp+lenseg-1,jdl)
242                 do j=isgbeg,isgend  ! break this into an explicit loop an max instead of accumulate
243                 global(j,idof) = max(global(j,idof),
244     &                                rtemp (itemp,jdl))
245                 itemp=itemp+1   ! advance this index one at a time instead of in lenseg jumps
246                 enddo
247c                 itemp = itemp + lenseg
248                  enddo
249               enddo
250            endif ! end of receive (iacc=1)
251            itkbeg = itkbeg + 4 + 2*numseg
252         enddo
253      endif  ! commu "in"
254      return
255      end
256
257
258
259